Evaluate test report

uyuni-project · Nov 26, 2024 · e9f117e · e9f117e
1 parent 970e062
commit e9f117e
Show file tree

Hide file tree

Showing 8 changed files with 589 additions and 196 deletions.
diff --git a/testsuite/Rakefile b/testsuite/Rakefile
@@ -187,12 +187,12 @@ namespace :utils do
 
   desc 'Collect and tag flaky tests'
   task :collect_and_tag_flaky_tests do
-    `ruby ext-tools/machine_learning/gh_issues_parser.rb --collect-and-tag --directory-path features`
+    `ruby ext-tools/machine_learning/gh_issues_parser.rb --collect_and_tag --directory_path features`
   end
 
   desc 'Generate dataset from GH issues'
   task :generate_dataset_gh_issues do
-    `ruby ext-tools/machine_learning/gh_issues_parser.rb --generate-dataset --file-path gh_issues_dataset.json`
+    `ruby ext-tools/machine_learning/gh_issues_parser.rb --generate_dataset --output_path gh_issues_dataset.json`
   end
 
   desc 'Generate dataset from JSON Cucumber Test Report'

diff --git a/testsuite/ext-tools/machine_learning/cucumber_report_history.rb b/testsuite/ext-tools/machine_learning/cucumber_report_history.rb
@@ -2,7 +2,6 @@
 # Copyright (c) 2024 SUSE LLC.
 # Licensed under the terms of the MIT license.
 
-require 'csv'
 require 'json'
 require 'net/http'
 require 'optparse'
@@ -17,7 +16,7 @@
       options[:server] = server
     end
 
-    opts.on('-o', '--output_path FILEPATH', 'Output file path (CSV format)') do |filepath|
+    opts.on('-o', '--output_path FILEPATH', 'Output file path (JSON format)') do |filepath|
       options[:output_path] = filepath
     end
 
@@ -44,26 +43,28 @@
   response = Net::HTTP.get_response(uri)
   if response.is_a?(Net::HTTPSuccess)
     data = JSON.parse(response.body)
+    label_mapping = {
+      'PASSED' => 0,
+      'SKIPPED' => 1,
+      'FIXED' => 2,
+      'REGRESSION' => 3,
+      'FAILED' => 4
+    }
     dataset =
       data['data']['result'].map do |result|
         metric = result['metric']
         {
-          label: metric['status'].downcase,
+          label: label_mapping[metric['status']],
           description: {
-            jobname: metric['jobname'],
             scenario: metric['case'],
             feature: metric['suite'],
+            jobname: metric['jobname'],
             failedsince: metric['failedsince'].to_i,
             age: result['value'][1].to_i
           }
         }
       end
-    CSV.open(options[:output_path], 'w') do |csv|
-      csv << dataset.first.keys
-      dataset.each do |entry|
-        csv << [entry[:label], entry[:description].to_json]
-      end
-    end
+    File.write(options[:output_path], dataset.to_json)
   else
     puts "Failed to fetch data from Prometheus: #{response.code} #{response.message}"
   end

diff --git a/testsuite/ext-tools/machine_learning/cucumber_report_parser.rb b/testsuite/ext-tools/machine_learning/cucumber_report_parser.rb
@@ -1,7 +1,6 @@
 # Copyright (c) 2024 SUSE LLC.
 # Licensed under the terms of the MIT license.
 
-require 'csv'
 require 'json'
 require 'nokogiri'
 require 'optparse'
@@ -79,7 +78,7 @@ def extract_dataset_from_json(json_report_path)
       options[:report_path] = f
     end
 
-    opts.on('-o', '--output_path PATH', 'Path to the processed report file (CSV format)') do |f|
+    opts.on('-o', '--output_path PATH', 'Path to the processed report file (JSON format)') do |f|
       options[:output_path] = f
     end
 
@@ -98,9 +97,4 @@ def extract_dataset_from_json(json_report_path)
 end
 
 dataset = extract_dataset_from_json(options[:report_path])
-CSV.open(options[:output_path], 'w') do |csv|
-  csv << dataset.first.keys
-  dataset.each do |entry|
-    csv << [entry[:label], entry[:description].to_json]
-  end
-end
+File.write(options[:output_path], dataset.to_json)
diff --git a/testsuite/ext-tools/machine_learning/cucumber_report_review.py b/testsuite/ext-tools/machine_learning/cucumber_report_review.py
diff --git a/testsuite/ext-tools/machine_learning/evaluate_cucumber_report.py b/testsuite/ext-tools/machine_learning/evaluate_cucumber_report.py
@@ -0,0 +1,299 @@
+import sys
+import json
+import numpy as np
+from sklearn.feature_extraction.text import TfidfVectorizer
+from joblib import load
+from scipy.sparse import hstack
+import os
+import base64
+
+LABEL_MAPPING = {
+    0: "new_issue",
+    1: "under_debugging",
+    2: "test_issue",
+    3: "flaky",
+    4: "bug_reported",
+    5: "fixed",
+    6: "workaround",
+}
+
+# Define row colors for each predicted label
+LABEL_COLORS = {
+    'new_issue': 'background-color: #ffcccc;',
+    'under_debugging': 'background-color: #ffcc99;',
+    'test_issue': 'background-color: #ffff99;',
+    'flaky': 'background-color: #cce5ff;',
+    'bug_reported': 'background-color: #d4edda;',
+    'fixed': 'background-color: #c3e6cb;',
+    'workaround': 'background-color: #f8d7da;',
+}
+
+def decode_base64(data):
+    """Decode base64-encoded data."""
+    try:
+        decoded_data = base64.b64decode(data).decode('utf-8')
+        return decoded_data
+    except Exception as e:
+        return f"Error decoding base64 data: {e}"
+
+
+def load_json(filepath):
+    """Load JSON dataset from a file."""
+    with open(filepath, 'r') as file:
+        return json.load(file)
+
+
+def preprocess_test_data(data, tfidf=None, model_type="random_forest"):
+    """Preprocess test data for evaluation."""
+    texts = []
+    metadata = []
+    for entry in data:
+        description = entry['description']
+        combined_text = f"{description.get('feature', '')} {description.get('scenario', '')} {description.get('error_message', '')} {' '.join(description.get('logs', []))}"
+        texts.append(combined_text)
+        metadata.append(description)
+
+    if model_type == "random_forest":
+        if not hasattr(tfidf, "transform"):
+            raise ValueError("The provided TF-IDF vectorizer is invalid or incorrectly loaded.")
+        # Convert to TF-IDF features
+        X_text = tfidf.transform(texts)
+
+        # Generate numeric features
+        X_numeric = np.array([
+            [
+                description.get('age', 0),
+                description.get('failedsince', 0),
+                description.get('age', 0) * description.get('failedsince', 0),
+                ]
+            for description in metadata
+        ])
+
+        # Combine text and numeric features
+        X = hstack([X_text, X_numeric])
+        return X, metadata
+    elif model_type == "bert":
+        return texts, metadata
+
+
+def evaluate_model(model, X, label_encoder):
+    """Evaluate the model and predict labels."""
+    predictions = model.predict(X)
+    predicted_labels = label_encoder.inverse_transform(predictions)
+    return predicted_labels
+
+def predict_root_cause(meta):
+    """Predict the root cause based on the available logs, screenshots, and error messages."""
+    reason = ""
+    if meta.get("logs"):
+        logs = " ".join(meta["logs"])
+        if "timeout" in logs.lower():
+            reason += "Possible timeout issue. "
+        if "connection" in logs.lower():
+            reason += "Possible network connection issue. "
+        if "not found" in logs.lower():
+            reason += "Missing required resource or configuration. "
+
+    if meta.get("error_message"):
+        reason += f"Error: {meta['error_message']} "
+
+    if not reason:  # If no specific reason, mention the possible cause
+        reason = "No specific cause identified. May require further investigation."
+
+    return reason
+
+
+def save_results(metadata, predictions, output_path, html_output_path):
+    """
+    Save evaluation results as a JSON file and an HTML file with column filtering and root cause prediction.
+    """
+    results = []
+    html_rows = []
+
+    # Create the 'screenshots' folder if it doesn't exist
+    screenshots_folder = os.path.join(os.path.dirname(html_output_path), 'screenshots')
+    os.makedirs(screenshots_folder, exist_ok=True)
+
+    for meta, pred in zip(metadata, predictions):
+        human_label = LABEL_MAPPING.get(pred, "unknown")
+        meta["predicted_label"] = human_label
+
+        # Handle base64 encoded logs
+        if 'logs' in meta and isinstance(meta['logs'], list):
+            meta['logs'] = [decode_base64(log) if isinstance(log, str) else log for log in meta['logs']]
+
+        # Convert screenshots to HTML-displayable images
+        if "screenshots" in meta and meta["screenshots"]:
+            image_paths = []
+            for i, screenshot in enumerate(meta["screenshots"]):
+                image_filename = f"screenshot_{meta['scenario'].replace(' ', '_')}_{i}.png"
+                image_path = os.path.join(screenshots_folder, image_filename)
+
+                # Decode the base64 screenshot and save as an image
+                with open(image_path, "wb") as img_file:
+                    img_file.write(base64.b64decode(screenshot))
+                image_paths.append(os.path.join('screenshots', image_filename))  # Store relative path
+
+            meta["screenshots"] = image_paths
+
+        # For failed or flaky tests, predict the root cause
+        if human_label in ['flaky', 'test_issue', 'bug_reported']:
+            meta["root_cause"] = predict_root_cause(meta)
+
+        results.append(meta)
+
+        # Generate HTML rows with conditional styling for predicted labels
+        row_style = LABEL_COLORS.get(human_label, '')  # Default to no color if label not found
+        html_row = f"""
+        <tr style="{row_style}">
+            <td>{meta.get('feature', 'N/A')}</td>
+            <td>{meta.get('scenario', 'N/A')}</td>
+            <td>{meta.get('predicted_label', 'N/A')}</td>
+            <td>{meta.get('error_message', 'N/A')}</td>
+            <td>{meta.get('logs', '')}</td>
+            <td>
+                {"<br>".join([f'<img src="{img}" width="300">' for img in meta.get("screenshots", [])])}
+            </td>
+            <td>{meta.get('root_cause', 'N/A')}</td>
+        </tr>
+        """
+        html_rows.append(html_row)
+
+    # Save results as JSON
+    with open(output_path, 'w') as file:
+        json.dump(results, file, indent=4)
+    print(f"Results saved to {output_path}")
+
+    # Save results as HTML with column-specific filtering and root cause analysis
+    html_content = f"""
+    <!DOCTYPE html>
+    <html lang="en">
+    <head>
+        <meta charset="UTF-8">
+        <meta name="viewport" content="width=device-width, initial-scale=1.0">
+        <title>Evaluation Results</title>
+        <link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/1.12.1/css/jquery.dataTables.min.css">
+        <script type="text/javascript" charset="utf8" src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
+        <script type="text/javascript" charset="utf8" src="https://cdn.datatables.net/1.12.1/js/jquery.dataTables.min.js"></script>
+        <style>
+            body {{
+                font-family: Arial, sans-serif;
+                margin: 20px;
+            }}
+            table {{
+                width: 100%;
+                border-collapse: collapse;
+                margin-bottom: 20px;
+            }}
+            th, td {{
+                padding: 10px;
+                text-align: left;
+                border: 1px solid #ddd;
+            }}
+            th {{
+                background-color: #f2f2f2;
+            }}
+            td {{
+                font-size: 14px;
+            }}
+            img {{
+                max-width: 100%;
+                height: auto;
+            }}
+            input {{
+                padding: 5px;
+                width: 100%;
+            }}
+            #resultsTable_wrapper {{
+                padding-top: 20px;
+            }}
+        </style>
+        <script>
+            $(document).ready(function() {{
+                var table = $('#resultsTable').DataTable({{
+                    "paging": true,
+                    "searching": true,
+                    "ordering": false,
+                    "info": true
+                }});
+
+                // Apply column-specific search functionality
+                table.columns().every(function () {{
+                    var column = this;
+                    var input = $('<input type="text" placeholder="Search...">')
+                        .appendTo($(column.header()).empty())
+                        .on('keyup change', function () {{
+                            column.search($(this).val()).draw();
+                        }});
+                }});
+            }});
+        </script>
+    </head>
+    <body>
+        <h1>Evaluation Results</h1>
+        <table id="resultsTable" class="display">
+            <thead>
+                <tr>
+                    <th>Feature</th>
+                    <th>Scenario</th>
+                    <th>Predicted Label</th>
+                    <th>Error Message</th>
+                    <th>Logs</th>
+                    <th>Screenshots</th>
+                    <th>Root Cause</th>
+                </tr>
+            </thead>
+            <tbody>
+                {''.join(html_rows)}
+            </tbody>
+        </table>
+    </body>
+    </html>
+    """
+    with open(html_output_path, 'w') as file:
+        file.write(html_content)
+    print(f"HTML results saved to {html_output_path}")
+
+
+def main(model_type, model_path, tfidf_path, label_encoder_path, test_data_path, json_output_path, html_output_path):
+    """
+    Main function to evaluate a given model on a test dataset.
+
+    Args:
+        model_type (str): Model type ('random_forest' or 'bert').
+        model_path (str): Path to the trained model file.
+        tfidf_path (str): Path to the TF-IDF vectorizer file.
+        label_encoder_path (str): Path to the label encoder file.
+        test_data_path (str): Path to the test dataset JSON file.
+        json_output_path (str): Path to save the evaluation results as JSON.
+        html_output_path (str): Path to save the evaluation results as HTML.
+    """
+    # Load model, TF-IDF vectorizer, and label encoder
+    model = load(model_path)
+    tfidf = load(tfidf_path)
+    label_encoder = load(label_encoder_path)
+
+    # Load and preprocess test data
+    test_data = load_json(test_data_path)
+    X, metadata = preprocess_test_data(test_data, tfidf, model_type)
+
+    # Predict labels
+    predictions = model.predict(X)
+
+    # Save results to JSON and HTML
+    save_results(metadata, predictions, json_output_path, html_output_path)
+
+if __name__ == "__main__":
+    if len(sys.argv) != 8:
+        print("Usage: python evaluate_cucumber_report.py <model_type> <model_path> <tfidf_path> <label_encoder_path> <test_data_path> <output_path> <html_output_path>")
+        sys.exit(1)
+
+    model_type = sys.argv[1].lower()
+    model_path = sys.argv[2]
+    tfidf_path = sys.argv[3]
+    label_encoder_path = sys.argv[4]
+    test_data_path = sys.argv[5]
+    output_path = sys.argv[6]
+    html_output_path = sys.argv[7]
+
+    main(model_type, model_path, tfidf_path, label_encoder_path, test_data_path, output_path, html_output_path)