jacobgil · jacobgil · May 16, 2024 · May 13, 2024 · May 16, 2024 · May 16, 2024
diff --git a/README.md b/README.md
@@ -121,6 +121,50 @@ By default method='wilson', the wilson interval, which behaves better for smalle
 
 method can be one of ['wilson', 'normal', 'agresti_coull', 'beta', 'jeffreys', 'binom_test'], or one of the boostrap methods.
 
+## Get a Classification Report
+The [classification_report.py](confidenceinterval%2Fclassification_report.py) function builds a text report showing the main classification metrics and their confidence intervals.
+Each class will be first treated as a binary classification problem, the default CI for P and R used being Wilson, and Takahashi-binary for F1. Then the 
+micro and macro multi-class metric will be calculated using the Takahashi-methods.
+
+```python
+from confidenceinterval import classification_report_with_ci
+
+y_true = [0, 1, 2, 2, 2, 1, 1, 1, 0, 2, 2, 1, 0, 2, 2, 1, 2, 2, 1, 1]
+y_pred = [0, 1, 0, 0, 2, 1, 1, 1, 0, 2, 2, 1, 0, 1, 2, 1, 2, 2, 1, 1]
+
+classification_report_with_ci(y_true, y_pred)
+
+     Class  Precision  Recall  F1-Score    Precision CI       Recall CI     F1-Score CI  Support
+0  Class 0      0.600   1.000     0.750  (0.231, 0.882)    (0.439, 1.0)  (0.408, 1.092)        3
+1  Class 1      0.889   1.000     0.941   (0.565, 0.98)    (0.676, 1.0)  (0.796, 1.086)        8
+2  Class 2      1.000   0.667     0.800     (0.61, 1.0)  (0.354, 0.879)  (0.562, 1.038)        9
+3    micro      0.850   0.850     0.850  (0.694, 1.006)  (0.694, 1.006)  (0.694, 1.006)       20
+4    macro      0.830   0.889     0.830  (0.702, 0.958)  (0.775, 1.002)  (0.548, 1.113)       20
+```
+You can also provide a custom mapping for the class names, as well as modify the binary CI method and rounding.
+```python
+from confidenceinterval import classification_report_with_ci
+
+y_true = [0, 1, 2, 2, 2, 1, 1, 1, 0, 2, 2, 1, 0, 2, 2, 1, 2, 2, 1, 1]
+y_pred = [0, 1, 0, 0, 2, 1, 1, 1, 0, 2, 2, 1, 0, 1, 2, 1, 2, 2, 1, 1]
+
+numerical_to_label = {
+    0: "Cherries",
+    1: "Olives",
+    2: "Tangerines"
+}
+
+classification_report_with_ci(y_true, y_pred, round_ndigits=2, numerical_to_label_map = numerical_to_label, binary_method='wilson')
+
+        Class  Precision  Recall  F1-Score  Precision CI     Recall CI   F1-Score CI  Support
+0    Cherries       0.60    1.00      0.75  (0.23, 0.88)   (0.44, 1.0)  (0.41, 1.09)        3
+1      Olives       0.89    1.00      0.94  (0.57, 0.98)   (0.68, 1.0)   (0.8, 1.09)        8
+2  Tangerines       1.00    0.67      0.80   (0.61, 1.0)  (0.35, 0.88)  (0.56, 1.04)        9
+3       micro       0.85    0.85      0.85  (0.69, 1.01)  (0.69, 1.01)  (0.69, 1.01)       20
+4       macro       0.83    0.89      0.83   (0.7, 0.96)   (0.78, 1.0)  (0.55, 1.11)       20
+```
+
+
 ## Get a confidence interval for any custom metric with Bootstrapping
 With the bootstrap_ci method, you can get the CI for any metric function that gets y_true and y_pred as arguments.
 

diff --git a/confidenceinterval/__init__.py b/confidenceinterval/__init__.py
@@ -11,3 +11,5 @@
     recall_score, \
     f1_score
 from confidenceinterval.auc import roc_auc_score
+
+from confidenceinterval.classification_report import classification_report_with_ci
diff --git a/confidenceinterval/classification_report.py b/confidenceinterval/classification_report.py
@@ -0,0 +1,116 @@
+""" Classification report similar to https://scikit-learn.org/stable/modules/generated/sklearn.metrics.classification_report.html
+"""
+
+from typing import List, Dict, Optional
+
+import pandas as pd
+import numpy as np
+from confidenceinterval.takahashi_methods import  precision_score,recall_score, f1_score
+
+def round_tuple(t, decimals=3):
+    return tuple(round(num, decimals) for num in t)
+
+def classification_report_with_ci(y_true: List[int], y_pred: List[int], 
+                                  binary_method: str = 'wilson',
+                                  round_ndigits: int = 3,
+                                  numerical_to_label_map: Optional[Dict[int, str]] = None, 
+                                  confidence_level: float = 0.95) -> pd.DataFrame:
+    """
+    Parameters
+    ----------
+    y_true : List[int]
+        The ground truth labels.
+    y_pred : List[int]
+        The predicted categories.
+    binary_method: str = 'wilson'
+        The method to calculate the CI for binary proportions.
+    round_ndigits: int = 3
+        Number of digits to return after the decimal point.
+    numerical_to_label_map: Optional[Dict[int, str]]
+        Mapping from class indices to descriptive names.
+    confidence_level: float, optional
+        The confidence level, by default 0.95
+
+    Returns
+    -------
+    pd.DataFrame
+        A DataFrame containing precision, recall, F1-score, and their confidence intervals for each class,
+        as well as micro and macro averages.
+    """
+
+    # Unique classes in the dataset
+    classes = np.unique(y_true)
+
+    # Validate that all unique classes are covered in the numerical_to_label_map if provided
+    if numerical_to_label_map is not None:
+        missing_labels = [cls for cls in classes if cls not in numerical_to_label_map]
+        if missing_labels:
+            raise ValueError(f'Missing labels for classes: {missing_labels}')
+
+    data = []  # List to store row dictionaries
+
+    # Unique classes in the dataset
+    classes = np.unique(y_true)
+
+    # Calculate precision, recall, f1 for each class treated as binary
+    for class_ in classes:
+        y_true_binary = [1 if y == class_ else 0 for y in y_true]
+        y_pred_binary = [1 if y == class_ else 0 for y in y_pred]
+
+        # Calculate metrics
+        precision, precision_ci = precision_score(y_true_binary, y_pred_binary, average='binary', method=binary_method)
+        recall, recall_ci = recall_score(y_true_binary, y_pred_binary, average='binary', method=binary_method)
+        binary_f1, binary_f1_ci = f1_score(y_true_binary, y_pred_binary, confidence_level=confidence_level, average='binary')
+
+        class_name = numerical_to_label_map[class_] if (
+                    numerical_to_label_map and class_ in numerical_to_label_map) else f'Class {class_}'
+        support = sum(y_true_binary)
+
+        # Create a new row as a DataFrame and append it to the main DataFrame
+        # Append new row to the list
+        data.append({
+            'Class': class_name,
+            'Precision': round(precision, round_ndigits),
+            'Recall': round(recall, round_ndigits),
+            'F1-Score': round(binary_f1, round_ndigits),
+            'Precision CI': round_tuple(precision_ci, round_ndigits),
+            'Recall CI': round_tuple(recall_ci, round_ndigits),
+            'F1-Score CI': round_tuple(binary_f1_ci, round_ndigits),
+            'Support': support
+        })
+
+    precision_micro, p_ci_micro = precision_score(y_true, y_pred, average='micro')
+    precision_macro, p_ci_macro = precision_score(y_true, y_pred, average='macro')
+
+    recall_micro, r_ci_micro = recall_score(y_true, y_pred, average='micro')
+    recall_macro, r_ci_macro = recall_score(y_true, y_pred, average='macro')
+
+    f1_micro, f1_ci_micro = f1_score(y_true, y_pred, average='micro')
+    f1_macro, f1_ci_macro = f1_score(y_true, y_pred, average='macro')
+
+    data.append({
+        'Class': 'micro',
+        'Precision': round(precision_micro, round_ndigits),
+        'Recall': round(recall_micro, round_ndigits),
+        'F1-Score': round(f1_micro, round_ndigits),
+        'Precision CI': round_tuple(p_ci_micro, round_ndigits),
+        'Recall CI': round_tuple(r_ci_micro, round_ndigits),
+        'F1-Score CI': round_tuple(f1_ci_micro, round_ndigits),
+        'Support': len(y_true)
+    })
+
+    data.append({
+        'Class': 'macro',
+        'Precision': round(precision_macro, round_ndigits),
+        'Recall': round(recall_macro, round_ndigits),
+        'F1-Score': round(f1_macro, round_ndigits),
+        'Precision CI': round_tuple(p_ci_macro, decimals=round_ndigits),
+        'Recall CI': round_tuple(r_ci_macro, decimals=round_ndigits),
+        'F1-Score CI': round_tuple(f1_ci_macro, decimals=round_ndigits),
+        'Support': len(y_true)
+
+    })
+
+    df = pd.DataFrame(data)
+
+    return df
diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,6 @@
-statsmodels
-scikit-learn
-scipy
-numpy
+statsmodels~=0.14.2
+scikit-learn~=1.4.2
+scipy~=1.13.0
+numpy~=1.26.4
+setuptools~=68.2.0
+pandas~=2.2.2