Skip to content

Commit

Permalink
Evaluate test report
Browse files Browse the repository at this point in the history
  • Loading branch information
srbarrios committed Nov 26, 2024
1 parent 970e062 commit e9f117e
Show file tree
Hide file tree
Showing 8 changed files with 589 additions and 196 deletions.
4 changes: 2 additions & 2 deletions testsuite/Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -187,12 +187,12 @@ namespace :utils do

desc 'Collect and tag flaky tests'
task :collect_and_tag_flaky_tests do
`ruby ext-tools/machine_learning/gh_issues_parser.rb --collect-and-tag --directory-path features`
`ruby ext-tools/machine_learning/gh_issues_parser.rb --collect_and_tag --directory_path features`
end

desc 'Generate dataset from GH issues'
task :generate_dataset_gh_issues do
`ruby ext-tools/machine_learning/gh_issues_parser.rb --generate-dataset --file-path gh_issues_dataset.json`
`ruby ext-tools/machine_learning/gh_issues_parser.rb --generate_dataset --output_path gh_issues_dataset.json`
end

desc 'Generate dataset from JSON Cucumber Test Report'
Expand Down
21 changes: 11 additions & 10 deletions testsuite/ext-tools/machine_learning/cucumber_report_history.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
# Copyright (c) 2024 SUSE LLC.
# Licensed under the terms of the MIT license.

require 'csv'
require 'json'
require 'net/http'
require 'optparse'
Expand All @@ -17,7 +16,7 @@
options[:server] = server
end

opts.on('-o', '--output_path FILEPATH', 'Output file path (CSV format)') do |filepath|
opts.on('-o', '--output_path FILEPATH', 'Output file path (JSON format)') do |filepath|
options[:output_path] = filepath
end

Expand All @@ -44,26 +43,28 @@
response = Net::HTTP.get_response(uri)
if response.is_a?(Net::HTTPSuccess)
data = JSON.parse(response.body)
label_mapping = {
'PASSED' => 0,
'SKIPPED' => 1,
'FIXED' => 2,
'REGRESSION' => 3,
'FAILED' => 4
}
dataset =
data['data']['result'].map do |result|
metric = result['metric']
{
label: metric['status'].downcase,
label: label_mapping[metric['status']],
description: {
jobname: metric['jobname'],
scenario: metric['case'],
feature: metric['suite'],
jobname: metric['jobname'],
failedsince: metric['failedsince'].to_i,
age: result['value'][1].to_i
}
}
end
CSV.open(options[:output_path], 'w') do |csv|
csv << dataset.first.keys
dataset.each do |entry|
csv << [entry[:label], entry[:description].to_json]
end
end
File.write(options[:output_path], dataset.to_json)
else
puts "Failed to fetch data from Prometheus: #{response.code} #{response.message}"
end
Expand Down
10 changes: 2 additions & 8 deletions testsuite/ext-tools/machine_learning/cucumber_report_parser.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# Copyright (c) 2024 SUSE LLC.
# Licensed under the terms of the MIT license.

require 'csv'
require 'json'
require 'nokogiri'
require 'optparse'
Expand Down Expand Up @@ -79,7 +78,7 @@ def extract_dataset_from_json(json_report_path)
options[:report_path] = f
end

opts.on('-o', '--output_path PATH', 'Path to the processed report file (CSV format)') do |f|
opts.on('-o', '--output_path PATH', 'Path to the processed report file (JSON format)') do |f|
options[:output_path] = f
end

Expand All @@ -98,9 +97,4 @@ def extract_dataset_from_json(json_report_path)
end

dataset = extract_dataset_from_json(options[:report_path])
CSV.open(options[:output_path], 'w') do |csv|
csv << dataset.first.keys
dataset.each do |entry|
csv << [entry[:label], entry[:description].to_json]
end
end
File.write(options[:output_path], dataset.to_json)
Empty file.
299 changes: 299 additions & 0 deletions testsuite/ext-tools/machine_learning/evaluate_cucumber_report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,299 @@
import sys
import json
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from joblib import load
from scipy.sparse import hstack
import os
import base64

LABEL_MAPPING = {
0: "new_issue",
1: "under_debugging",
2: "test_issue",
3: "flaky",
4: "bug_reported",
5: "fixed",
6: "workaround",
}

# Define row colors for each predicted label
LABEL_COLORS = {
'new_issue': 'background-color: #ffcccc;',
'under_debugging': 'background-color: #ffcc99;',
'test_issue': 'background-color: #ffff99;',
'flaky': 'background-color: #cce5ff;',
'bug_reported': 'background-color: #d4edda;',
'fixed': 'background-color: #c3e6cb;',
'workaround': 'background-color: #f8d7da;',
}

def decode_base64(data):
"""Decode base64-encoded data."""
try:
decoded_data = base64.b64decode(data).decode('utf-8')
return decoded_data
except Exception as e:
return f"Error decoding base64 data: {e}"


def load_json(filepath):
"""Load JSON dataset from a file."""
with open(filepath, 'r') as file:
return json.load(file)


def preprocess_test_data(data, tfidf=None, model_type="random_forest"):
"""Preprocess test data for evaluation."""
texts = []
metadata = []
for entry in data:
description = entry['description']
combined_text = f"{description.get('feature', '')} {description.get('scenario', '')} {description.get('error_message', '')} {' '.join(description.get('logs', []))}"
texts.append(combined_text)
metadata.append(description)

if model_type == "random_forest":
if not hasattr(tfidf, "transform"):
raise ValueError("The provided TF-IDF vectorizer is invalid or incorrectly loaded.")
# Convert to TF-IDF features
X_text = tfidf.transform(texts)

# Generate numeric features
X_numeric = np.array([
[
description.get('age', 0),
description.get('failedsince', 0),
description.get('age', 0) * description.get('failedsince', 0),
]
for description in metadata
])

# Combine text and numeric features
X = hstack([X_text, X_numeric])
return X, metadata
elif model_type == "bert":
return texts, metadata


def evaluate_model(model, X, label_encoder):
"""Evaluate the model and predict labels."""
predictions = model.predict(X)
predicted_labels = label_encoder.inverse_transform(predictions)
return predicted_labels

def predict_root_cause(meta):
"""Predict the root cause based on the available logs, screenshots, and error messages."""
reason = ""
if meta.get("logs"):
logs = " ".join(meta["logs"])
if "timeout" in logs.lower():
reason += "Possible timeout issue. "
if "connection" in logs.lower():
reason += "Possible network connection issue. "
if "not found" in logs.lower():
reason += "Missing required resource or configuration. "

if meta.get("error_message"):
reason += f"Error: {meta['error_message']} "

if not reason: # If no specific reason, mention the possible cause
reason = "No specific cause identified. May require further investigation."

return reason


def save_results(metadata, predictions, output_path, html_output_path):
"""
Save evaluation results as a JSON file and an HTML file with column filtering and root cause prediction.
"""
results = []
html_rows = []

# Create the 'screenshots' folder if it doesn't exist
screenshots_folder = os.path.join(os.path.dirname(html_output_path), 'screenshots')
os.makedirs(screenshots_folder, exist_ok=True)

for meta, pred in zip(metadata, predictions):
human_label = LABEL_MAPPING.get(pred, "unknown")
meta["predicted_label"] = human_label

# Handle base64 encoded logs
if 'logs' in meta and isinstance(meta['logs'], list):
meta['logs'] = [decode_base64(log) if isinstance(log, str) else log for log in meta['logs']]

# Convert screenshots to HTML-displayable images
if "screenshots" in meta and meta["screenshots"]:
image_paths = []
for i, screenshot in enumerate(meta["screenshots"]):
image_filename = f"screenshot_{meta['scenario'].replace(' ', '_')}_{i}.png"
image_path = os.path.join(screenshots_folder, image_filename)

# Decode the base64 screenshot and save as an image
with open(image_path, "wb") as img_file:
img_file.write(base64.b64decode(screenshot))
image_paths.append(os.path.join('screenshots', image_filename)) # Store relative path

meta["screenshots"] = image_paths

# For failed or flaky tests, predict the root cause
if human_label in ['flaky', 'test_issue', 'bug_reported']:
meta["root_cause"] = predict_root_cause(meta)

results.append(meta)

# Generate HTML rows with conditional styling for predicted labels
row_style = LABEL_COLORS.get(human_label, '') # Default to no color if label not found
html_row = f"""
<tr style="{row_style}">
<td>{meta.get('feature', 'N/A')}</td>
<td>{meta.get('scenario', 'N/A')}</td>
<td>{meta.get('predicted_label', 'N/A')}</td>
<td>{meta.get('error_message', 'N/A')}</td>
<td>{meta.get('logs', '')}</td>
<td>
{"<br>".join([f'<img src="{img}" width="300">' for img in meta.get("screenshots", [])])}
</td>
<td>{meta.get('root_cause', 'N/A')}</td>
</tr>
"""
html_rows.append(html_row)

# Save results as JSON
with open(output_path, 'w') as file:
json.dump(results, file, indent=4)
print(f"Results saved to {output_path}")

# Save results as HTML with column-specific filtering and root cause analysis
html_content = f"""
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Evaluation Results</title>
<link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/1.12.1/css/jquery.dataTables.min.css">
<script type="text/javascript" charset="utf8" src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
<script type="text/javascript" charset="utf8" src="https://cdn.datatables.net/1.12.1/js/jquery.dataTables.min.js"></script>
<style>
body {{
font-family: Arial, sans-serif;
margin: 20px;
}}
table {{
width: 100%;
border-collapse: collapse;
margin-bottom: 20px;
}}
th, td {{
padding: 10px;
text-align: left;
border: 1px solid #ddd;
}}
th {{
background-color: #f2f2f2;
}}
td {{
font-size: 14px;
}}
img {{
max-width: 100%;
height: auto;
}}
input {{
padding: 5px;
width: 100%;
}}
#resultsTable_wrapper {{
padding-top: 20px;
}}
</style>
<script>
$(document).ready(function() {{
var table = $('#resultsTable').DataTable({{
"paging": true,
"searching": true,
"ordering": false,
"info": true
}});
// Apply column-specific search functionality
table.columns().every(function () {{
var column = this;
var input = $('<input type="text" placeholder="Search...">')
.appendTo($(column.header()).empty())
.on('keyup change', function () {{
column.search($(this).val()).draw();
}});
}});
}});
</script>
</head>
<body>
<h1>Evaluation Results</h1>
<table id="resultsTable" class="display">
<thead>
<tr>
<th>Feature</th>
<th>Scenario</th>
<th>Predicted Label</th>
<th>Error Message</th>
<th>Logs</th>
<th>Screenshots</th>
<th>Root Cause</th>
</tr>
</thead>
<tbody>
{''.join(html_rows)}
</tbody>
</table>
</body>
</html>
"""
with open(html_output_path, 'w') as file:
file.write(html_content)
print(f"HTML results saved to {html_output_path}")


def main(model_type, model_path, tfidf_path, label_encoder_path, test_data_path, json_output_path, html_output_path):
"""
Main function to evaluate a given model on a test dataset.
Args:
model_type (str): Model type ('random_forest' or 'bert').
model_path (str): Path to the trained model file.
tfidf_path (str): Path to the TF-IDF vectorizer file.
label_encoder_path (str): Path to the label encoder file.
test_data_path (str): Path to the test dataset JSON file.
json_output_path (str): Path to save the evaluation results as JSON.
html_output_path (str): Path to save the evaluation results as HTML.
"""
# Load model, TF-IDF vectorizer, and label encoder
model = load(model_path)
tfidf = load(tfidf_path)
label_encoder = load(label_encoder_path)

# Load and preprocess test data
test_data = load_json(test_data_path)
X, metadata = preprocess_test_data(test_data, tfidf, model_type)

# Predict labels
predictions = model.predict(X)

# Save results to JSON and HTML
save_results(metadata, predictions, json_output_path, html_output_path)

if __name__ == "__main__":
if len(sys.argv) != 8:
print("Usage: python evaluate_cucumber_report.py <model_type> <model_path> <tfidf_path> <label_encoder_path> <test_data_path> <output_path> <html_output_path>")
sys.exit(1)

model_type = sys.argv[1].lower()
model_path = sys.argv[2]
tfidf_path = sys.argv[3]
label_encoder_path = sys.argv[4]
test_data_path = sys.argv[5]
output_path = sys.argv[6]
html_output_path = sys.argv[7]

main(model_type, model_path, tfidf_path, label_encoder_path, test_data_path, output_path, html_output_path)
Loading

0 comments on commit e9f117e

Please sign in to comment.