-
Notifications
You must be signed in to change notification settings - Fork 185
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
589 additions
and
196 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
299 changes: 299 additions & 0 deletions
299
testsuite/ext-tools/machine_learning/evaluate_cucumber_report.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,299 @@ | ||
import sys | ||
import json | ||
import numpy as np | ||
from sklearn.feature_extraction.text import TfidfVectorizer | ||
from joblib import load | ||
from scipy.sparse import hstack | ||
import os | ||
import base64 | ||
|
||
LABEL_MAPPING = { | ||
0: "new_issue", | ||
1: "under_debugging", | ||
2: "test_issue", | ||
3: "flaky", | ||
4: "bug_reported", | ||
5: "fixed", | ||
6: "workaround", | ||
} | ||
|
||
# Define row colors for each predicted label | ||
LABEL_COLORS = { | ||
'new_issue': 'background-color: #ffcccc;', | ||
'under_debugging': 'background-color: #ffcc99;', | ||
'test_issue': 'background-color: #ffff99;', | ||
'flaky': 'background-color: #cce5ff;', | ||
'bug_reported': 'background-color: #d4edda;', | ||
'fixed': 'background-color: #c3e6cb;', | ||
'workaround': 'background-color: #f8d7da;', | ||
} | ||
|
||
def decode_base64(data): | ||
"""Decode base64-encoded data.""" | ||
try: | ||
decoded_data = base64.b64decode(data).decode('utf-8') | ||
return decoded_data | ||
except Exception as e: | ||
return f"Error decoding base64 data: {e}" | ||
|
||
|
||
def load_json(filepath): | ||
"""Load JSON dataset from a file.""" | ||
with open(filepath, 'r') as file: | ||
return json.load(file) | ||
|
||
|
||
def preprocess_test_data(data, tfidf=None, model_type="random_forest"): | ||
"""Preprocess test data for evaluation.""" | ||
texts = [] | ||
metadata = [] | ||
for entry in data: | ||
description = entry['description'] | ||
combined_text = f"{description.get('feature', '')} {description.get('scenario', '')} {description.get('error_message', '')} {' '.join(description.get('logs', []))}" | ||
texts.append(combined_text) | ||
metadata.append(description) | ||
|
||
if model_type == "random_forest": | ||
if not hasattr(tfidf, "transform"): | ||
raise ValueError("The provided TF-IDF vectorizer is invalid or incorrectly loaded.") | ||
# Convert to TF-IDF features | ||
X_text = tfidf.transform(texts) | ||
|
||
# Generate numeric features | ||
X_numeric = np.array([ | ||
[ | ||
description.get('age', 0), | ||
description.get('failedsince', 0), | ||
description.get('age', 0) * description.get('failedsince', 0), | ||
] | ||
for description in metadata | ||
]) | ||
|
||
# Combine text and numeric features | ||
X = hstack([X_text, X_numeric]) | ||
return X, metadata | ||
elif model_type == "bert": | ||
return texts, metadata | ||
|
||
|
||
def evaluate_model(model, X, label_encoder): | ||
"""Evaluate the model and predict labels.""" | ||
predictions = model.predict(X) | ||
predicted_labels = label_encoder.inverse_transform(predictions) | ||
return predicted_labels | ||
|
||
def predict_root_cause(meta): | ||
"""Predict the root cause based on the available logs, screenshots, and error messages.""" | ||
reason = "" | ||
if meta.get("logs"): | ||
logs = " ".join(meta["logs"]) | ||
if "timeout" in logs.lower(): | ||
reason += "Possible timeout issue. " | ||
if "connection" in logs.lower(): | ||
reason += "Possible network connection issue. " | ||
if "not found" in logs.lower(): | ||
reason += "Missing required resource or configuration. " | ||
|
||
if meta.get("error_message"): | ||
reason += f"Error: {meta['error_message']} " | ||
|
||
if not reason: # If no specific reason, mention the possible cause | ||
reason = "No specific cause identified. May require further investigation." | ||
|
||
return reason | ||
|
||
|
||
def save_results(metadata, predictions, output_path, html_output_path): | ||
""" | ||
Save evaluation results as a JSON file and an HTML file with column filtering and root cause prediction. | ||
""" | ||
results = [] | ||
html_rows = [] | ||
|
||
# Create the 'screenshots' folder if it doesn't exist | ||
screenshots_folder = os.path.join(os.path.dirname(html_output_path), 'screenshots') | ||
os.makedirs(screenshots_folder, exist_ok=True) | ||
|
||
for meta, pred in zip(metadata, predictions): | ||
human_label = LABEL_MAPPING.get(pred, "unknown") | ||
meta["predicted_label"] = human_label | ||
|
||
# Handle base64 encoded logs | ||
if 'logs' in meta and isinstance(meta['logs'], list): | ||
meta['logs'] = [decode_base64(log) if isinstance(log, str) else log for log in meta['logs']] | ||
|
||
# Convert screenshots to HTML-displayable images | ||
if "screenshots" in meta and meta["screenshots"]: | ||
image_paths = [] | ||
for i, screenshot in enumerate(meta["screenshots"]): | ||
image_filename = f"screenshot_{meta['scenario'].replace(' ', '_')}_{i}.png" | ||
image_path = os.path.join(screenshots_folder, image_filename) | ||
|
||
# Decode the base64 screenshot and save as an image | ||
with open(image_path, "wb") as img_file: | ||
img_file.write(base64.b64decode(screenshot)) | ||
image_paths.append(os.path.join('screenshots', image_filename)) # Store relative path | ||
|
||
meta["screenshots"] = image_paths | ||
|
||
# For failed or flaky tests, predict the root cause | ||
if human_label in ['flaky', 'test_issue', 'bug_reported']: | ||
meta["root_cause"] = predict_root_cause(meta) | ||
|
||
results.append(meta) | ||
|
||
# Generate HTML rows with conditional styling for predicted labels | ||
row_style = LABEL_COLORS.get(human_label, '') # Default to no color if label not found | ||
html_row = f""" | ||
<tr style="{row_style}"> | ||
<td>{meta.get('feature', 'N/A')}</td> | ||
<td>{meta.get('scenario', 'N/A')}</td> | ||
<td>{meta.get('predicted_label', 'N/A')}</td> | ||
<td>{meta.get('error_message', 'N/A')}</td> | ||
<td>{meta.get('logs', '')}</td> | ||
<td> | ||
{"<br>".join([f'<img src="{img}" width="300">' for img in meta.get("screenshots", [])])} | ||
</td> | ||
<td>{meta.get('root_cause', 'N/A')}</td> | ||
</tr> | ||
""" | ||
html_rows.append(html_row) | ||
|
||
# Save results as JSON | ||
with open(output_path, 'w') as file: | ||
json.dump(results, file, indent=4) | ||
print(f"Results saved to {output_path}") | ||
|
||
# Save results as HTML with column-specific filtering and root cause analysis | ||
html_content = f""" | ||
<!DOCTYPE html> | ||
<html lang="en"> | ||
<head> | ||
<meta charset="UTF-8"> | ||
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | ||
<title>Evaluation Results</title> | ||
<link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/1.12.1/css/jquery.dataTables.min.css"> | ||
<script type="text/javascript" charset="utf8" src="https://code.jquery.com/jquery-3.6.0.min.js"></script> | ||
<script type="text/javascript" charset="utf8" src="https://cdn.datatables.net/1.12.1/js/jquery.dataTables.min.js"></script> | ||
<style> | ||
body {{ | ||
font-family: Arial, sans-serif; | ||
margin: 20px; | ||
}} | ||
table {{ | ||
width: 100%; | ||
border-collapse: collapse; | ||
margin-bottom: 20px; | ||
}} | ||
th, td {{ | ||
padding: 10px; | ||
text-align: left; | ||
border: 1px solid #ddd; | ||
}} | ||
th {{ | ||
background-color: #f2f2f2; | ||
}} | ||
td {{ | ||
font-size: 14px; | ||
}} | ||
img {{ | ||
max-width: 100%; | ||
height: auto; | ||
}} | ||
input {{ | ||
padding: 5px; | ||
width: 100%; | ||
}} | ||
#resultsTable_wrapper {{ | ||
padding-top: 20px; | ||
}} | ||
</style> | ||
<script> | ||
$(document).ready(function() {{ | ||
var table = $('#resultsTable').DataTable({{ | ||
"paging": true, | ||
"searching": true, | ||
"ordering": false, | ||
"info": true | ||
}}); | ||
// Apply column-specific search functionality | ||
table.columns().every(function () {{ | ||
var column = this; | ||
var input = $('<input type="text" placeholder="Search...">') | ||
.appendTo($(column.header()).empty()) | ||
.on('keyup change', function () {{ | ||
column.search($(this).val()).draw(); | ||
}}); | ||
}}); | ||
}}); | ||
</script> | ||
</head> | ||
<body> | ||
<h1>Evaluation Results</h1> | ||
<table id="resultsTable" class="display"> | ||
<thead> | ||
<tr> | ||
<th>Feature</th> | ||
<th>Scenario</th> | ||
<th>Predicted Label</th> | ||
<th>Error Message</th> | ||
<th>Logs</th> | ||
<th>Screenshots</th> | ||
<th>Root Cause</th> | ||
</tr> | ||
</thead> | ||
<tbody> | ||
{''.join(html_rows)} | ||
</tbody> | ||
</table> | ||
</body> | ||
</html> | ||
""" | ||
with open(html_output_path, 'w') as file: | ||
file.write(html_content) | ||
print(f"HTML results saved to {html_output_path}") | ||
|
||
|
||
def main(model_type, model_path, tfidf_path, label_encoder_path, test_data_path, json_output_path, html_output_path): | ||
""" | ||
Main function to evaluate a given model on a test dataset. | ||
Args: | ||
model_type (str): Model type ('random_forest' or 'bert'). | ||
model_path (str): Path to the trained model file. | ||
tfidf_path (str): Path to the TF-IDF vectorizer file. | ||
label_encoder_path (str): Path to the label encoder file. | ||
test_data_path (str): Path to the test dataset JSON file. | ||
json_output_path (str): Path to save the evaluation results as JSON. | ||
html_output_path (str): Path to save the evaluation results as HTML. | ||
""" | ||
# Load model, TF-IDF vectorizer, and label encoder | ||
model = load(model_path) | ||
tfidf = load(tfidf_path) | ||
label_encoder = load(label_encoder_path) | ||
|
||
# Load and preprocess test data | ||
test_data = load_json(test_data_path) | ||
X, metadata = preprocess_test_data(test_data, tfidf, model_type) | ||
|
||
# Predict labels | ||
predictions = model.predict(X) | ||
|
||
# Save results to JSON and HTML | ||
save_results(metadata, predictions, json_output_path, html_output_path) | ||
|
||
if __name__ == "__main__": | ||
if len(sys.argv) != 8: | ||
print("Usage: python evaluate_cucumber_report.py <model_type> <model_path> <tfidf_path> <label_encoder_path> <test_data_path> <output_path> <html_output_path>") | ||
sys.exit(1) | ||
|
||
model_type = sys.argv[1].lower() | ||
model_path = sys.argv[2] | ||
tfidf_path = sys.argv[3] | ||
label_encoder_path = sys.argv[4] | ||
test_data_path = sys.argv[5] | ||
output_path = sys.argv[6] | ||
html_output_path = sys.argv[7] | ||
|
||
main(model_type, model_path, tfidf_path, label_encoder_path, test_data_path, output_path, html_output_path) |
Oops, something went wrong.