-
Notifications
You must be signed in to change notification settings - Fork 3
/
utils.py
40 lines (26 loc) · 1.26 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
from math import log, exp
import numpy as np
from sklearn.metrics import roc_auc_score
def cos_sim_to_prob(sim):
return (sim + 1) / 2 # linear transformation to 0 and 1
def log_prob_to_prob(log_prob):
return exp(log_prob)
def prob_to_log_prob(prob):
return log(prob)
def calculate_auroc(all_disease_probs, gt_diseases):
'''
Calculates the AUROC (Area Under the Receiver Operating Characteristic curve) for multiple diseases.
Parameters:
all_disease_probs (numpy array): predicted disease labels, a multi-hot vector of shape (N_samples, 14)
gt_diseases (numpy array): ground truth disease labels, a multi-hot vector of shape (N_samples, 14)
Returns:
overall_auroc (float): the overall AUROC score
per_disease_auroc (numpy array): an array of shape (14,) containing the AUROC score for each disease
'''
per_disease_auroc = np.zeros((gt_diseases.shape[1],)) # num of diseases
for i in range(gt_diseases.shape[1]):
# Compute the AUROC score for each disease
per_disease_auroc[i] = roc_auc_score(gt_diseases[:, i], all_disease_probs[:, i])
# Compute the overall AUROC score
overall_auroc = roc_auc_score(gt_diseases, all_disease_probs, average='macro')
return overall_auroc, per_disease_auroc