Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improvements to AucPerfMetric #643

Merged
merged 5 commits into from
Aug 11, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 24 additions & 3 deletions python/test/perf_metric_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,11 +97,32 @@ def test_auc_perf_metric(self):
predictions = [1, 2, 3, 4]
metric = AucPerfMetric(groundtruths, predictions)
result = metric.evaluate()
self.assertAlmostEqual(result['score'], 0.9375, places=6)
self.assertAlmostEqual(result['score'], 0.9999999999999999, places=6)
self.assertAlmostEqual(result['AUC_BW'], 0.9999999999999999, places=6)
self.assertAlmostEqual(result['AUC_DS'], 0.9375, places=6)
self.assertAlmostEqual(result['CC_0'], 1.0, places=6)
self.assertAlmostEqual(result['THR'], 3.0, places=6)
self.assertAlmostEqual(result['THR'], 1.0, places=6)

@unittest.skipIf(sys.version_info < (3,), reason="For py3 only: py2 uses a different random seed.")
def test_auc_perf_multiple_metrics(self):
np.random.seed(1)
groundtruths = np.random.normal(0, 1.0, [4, 10]) + np.tile(np.array([1, 2, 3, 4]), [10, 1]).T
predictions = [[1, 2, 3, 4], [3, 1, 2, 4]]
metric = AucPerfMetric(groundtruths, predictions)
result = metric.evaluate()
self.assertAlmostEqual(result['score'][0], 0.9999999999999999, places=6)
self.assertAlmostEqual(result['AUC_BW'][0], 0.9999999999999999, places=6)
self.assertAlmostEqual(result['AUC_DS'][0], 0.9375, places=6)
self.assertAlmostEqual(result['CC_0'][0], 1.0, places=6)
self.assertAlmostEqual(result['THR'][0], 1.0, places=6)
self.assertAlmostEqual(result['score'][1], 0.8125, places=6)
self.assertAlmostEqual(result['AUC_BW'][1], 0.8125, places=6)
self.assertAlmostEqual(result['AUC_DS'][1], 0.6250, places=6)
self.assertAlmostEqual(result['CC_0'][1], 0.75, places=6)
self.assertAlmostEqual(result['THR'][1], 2, places=6)
self.assertAlmostEqual(result['pDS_DL'][0, 1], 0.02746864, places=6)
self.assertAlmostEqual(result['pBW_DL'][0, 1], 0.06136883, places=6)
self.assertAlmostEqual(result['pCC0_b'][0, 1], 0.03250944, places=6)

def test_auc_metrics_performance(self):
mat_filepath = VmafConfig.test_resource_path('data_Toyama.mat')
Expand All @@ -110,7 +131,7 @@ def test_auc_metrics_performance(self):
self.assertAlmostEqual(np.float(np.mean(results['AUC_DS'])), 0.69767003960902052, places=6)
self.assertAlmostEqual(np.float(np.mean(results['AUC_BW'])), 0.94454700301894534, places=6)
self.assertAlmostEqual(np.float(np.mean(results['CC_0'])), 0.88105386206276415, places=6)
self.assertAlmostEqual(np.float(np.mean(results['THR'])), 6.2392849606450556, places=6)
self.assertAlmostEqual(np.float(np.mean(results['THR'])), 3.899105581509778, places=6)

def test_respow_perf_metric(self):
np.random.seed(0)
Expand Down
92 changes: 61 additions & 31 deletions python/vmaf/core/perf_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,16 +173,16 @@ def _metrics_performance(objScoDif, signif):
# end
pDS_DL = np.ones([M, M])
for i in range(1, M):
for j in range(i+1, M+1):
for j in range(i + 1, M + 1):
# http://stackoverflow.com/questions/4257394/slicing-of-a-numpy-2d-array-or-how-do-i-extract-an-mxm-submatrix-from-an-nxn-ar
pDS_DL[i-1, j-1] = calpvalue(AUC_DS[[i-1, j-1]], C[[[i-1],[j-1]],[i-1, j-1]])
pDS_DL[j-1, i-1] = pDS_DL[i-1, j-1]
pDS_DL[i - 1, j - 1] = calpvalue(AUC_DS[[i - 1, j - 1]], C[[[i - 1], [j - 1]], [i - 1, j - 1]])
pDS_DL[j - 1, i - 1] = pDS_DL[i - 1, j - 1]

# [pDS_HM,CI_DS] = significanceHM(S, D, AUC_DS);
pDS_HM, CI_DS = significanceHM(S, D, AUC_DS)
## [pDS_HM,CI_DS] = significanceHM(S, D, AUC_DS);
# pDS_HM, CI_DS = significanceHM(S, D, AUC_DS)

# THR = prctile(D',95);
THR = np.percentile(D, 95, axis=1)
THR = np.percentile(S, 95, axis=1)

# %%%%%%%%%%%%%%%%%%%%%%% Better / Worse %%%%%%%%%%%%%%%%%%%%%%%%%%%

Expand Down Expand Up @@ -213,7 +213,7 @@ def _metrics_performance(objScoDif, signif):
L = B.shape[1] + W.shape[1]
CC_0 = np.zeros(M)
for m in range(M):
CC_0[m] = float(np.sum(B[m,:] > 0) + np.sum(W[m,:] < 0)) / L
CC_0[m] = float(np.sum(B[m, :] > 0) + np.sum(W[m, :] < 0)) / L

# % significance calculation

Expand All @@ -236,18 +236,18 @@ def _metrics_performance(objScoDif, signif):
pCC0_b = np.ones([M, M])
# pCC0_F = np.ones([M, M])
for i in range(1, M):
for j in range(i+1, M+1):
pBW_DL[i-1, j-1] = calpvalue(AUC_BW[[i-1, j-1]], C[[[i-1],[j-1]],[i-1, j-1]])
pBW_DL[j-1, i-1] = pBW_DL[i-1, j-1]
for j in range(i + 1, M + 1):
pBW_DL[i - 1, j - 1] = calpvalue(AUC_BW[[i - 1, j - 1]], C[[[i - 1], [j - 1]], [i - 1, j - 1]])
pBW_DL[j - 1, i - 1] = pBW_DL[i - 1, j - 1]

pCC0_b[i-1, j-1] = significanceBinomial(CC_0[i-1], CC_0[j-1], L)
pCC0_b[j-1, i-1] = pCC0_b[i-1, j-1]
pCC0_b[i - 1, j - 1] = significanceBinomial(CC_0[i - 1], CC_0[j - 1], L)
pCC0_b[j - 1, i - 1] = pCC0_b[i - 1, j - 1]

# pCC0_F[i-1, j-1] = fexact(CC_0[i-1]*L, 2*L, CC_0[i-1]*L + CC_0[j-1]*L, L, 'tail', 'b') / 2.0
# pCC0_F[j-1, i-1] = pCC0_F[i-1,j]

# [pBW_HM,CI_BW] = significanceHM(B, W, AUC_BW);
pBW_HM,CI_BW = significanceHM(B, W, AUC_BW)
# # [pBW_HM,CI_BW] = significanceHM(B, W, AUC_BW);
# pBW_HM, CI_BW = significanceHM(B, W, AUC_BW)

# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

Expand All @@ -266,10 +266,10 @@ def _metrics_performance(objScoDif, signif):
result = {
'AUC_DS': AUC_DS,
'pDS_DL': pDS_DL,
'pDS_HM': pDS_HM,
# 'pDS_HM': pDS_HM,
'AUC_BW': AUC_BW,
'pBW_DL': pBW_DL,
'pBW_HM': pBW_HM,
# 'pBW_HM': pBW_HM,
'CC_0': CC_0,
'pCC0_b': pCC0_b,
# 'pCC0_F': pCC0_F,
Expand Down Expand Up @@ -304,7 +304,7 @@ def _signif(a, b):
n_b = len(b)
var_a = np.var(a, ddof=1)
var_b = np.var(b, ddof=1)
den = var_a/n_a + var_b/n_b
den = var_a / n_a + var_b / n_b
if den == 0.0:
den = 1e-8
z = (mos_a - mos_b) / np.sqrt(den)
Expand All @@ -317,19 +317,41 @@ def _signif(a, b):

# generate pairs
N = len(groundtruths)
objscodif_mtx = np.zeros([N, N])
signif_mtx = np.zeros([N, N])
i = 0
for groundtruth, prediction in zip(groundtruths, predictions):
for groundtruth in groundtruths:
j = 0
for groundtruth2, prediction2 in zip(groundtruths, predictions):
objscodif = prediction - prediction2
for groundtruth2 in groundtruths:
signif = _signif(groundtruth, groundtruth2)
objscodif_mtx[i, j] = objscodif
signif_mtx[i, j] = signif
j += 1
i += 1

if isinstance(predictions[0], list):
M = len(predictions)
else:
M = 1

objscodif_all = np.zeros([M, N * N])
for metric_idx in range(M):
objscodif_mtx = np.zeros([N, N])

if M > 1:
metric_predictions = predictions[metric_idx]
else:
metric_predictions = predictions

i = 0
for prediction in metric_predictions:
j = 0
for prediction2 in metric_predictions:
objscodif = prediction - prediction2
objscodif_mtx[i, j] = objscodif
j += 1
i += 1

objscodif_all[metric_idx, :] = objscodif_mtx.reshape(1, N * N)

# import matplotlib.pyplot as plt
# plt.figure()
# plt.imshow(objscodif_mtx, interpolation='nearest')
Expand All @@ -341,18 +363,26 @@ def _signif(a, b):
# plt.colorbar()
# DisplayConfig.show()

results = cls._metrics_performance(objscodif_mtx.reshape(1, N*N), signif_mtx.reshape(1, N*N))

# _metrics_performance allows processing multiple objective quality
# metrics together. Here we just process one:
result = {}
for key in results:
result[key] = results[key][0]
results = cls._metrics_performance(objscodif_all, signif_mtx.reshape(1, N * N))
results['score'] = results['AUC_BW']

result['score'] = result['AUC_DS']
if isinstance(predictions[0], list):
return results
else:
result = {}
for key in results:
result[key] = results[key][0]
return result

return result
def _assert_args(self):
if isinstance(self.predictions[0], list):
for metric in self.predictions:
assert len(self.groundtruths) == len(metric), 'The lengths of groundtruth labels and predictions do not match.'
for score in metric:
assert isinstance(score, float) or isinstance(score, int), 'Predictions need to be a list of lists of numbers.'

else:
assert len(self.groundtruths) == len(self.predictions), 'The lengths of groundtruth labels and predictions do not match.'

class ResolvingPowerPerfMetric(RawScorePerfMetric):
"""
Expand Down