Skip to content

Commit a72453d

Browse files
committed
Added metrics
1 parent 374029c commit a72453d

File tree

1 file changed

+142
-8
lines changed

1 file changed

+142
-8
lines changed

pycalib/metrics.py

Lines changed: 142 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -108,11 +108,20 @@ def brier_score(y_true, y_pred):
108108

109109

110110
def conf_ECE(y_true, probs, bins=15):
111-
"""
112-
Calculate ECE score based on model output probabilities and true labels
111+
r"""Confidence Expected Calibration Error
112+
113+
Calculate ECE score based on model maximum output probabilities and true labels
114+
115+
.. math::
116+
117+
\text{confidence-ECE} = \sum_{i=1}^M \frac{|B_{i}|}{N} |
118+
\text{accuracy}(B_{i}) - \bar{p}(B_{i})|
119+
120+
In which $p$ are the maximum predicted probabilities.
121+
113122
114123
Parameters
115-
==========
124+
----------
116125
y_true:
117126
- a list containing the actual class labels
118127
- ndarray shape (n_samples) with a list containing actual class
@@ -126,9 +135,21 @@ def conf_ECE(y_true, probs, bins=15):
126135
- into how many bins are probabilities divided (default = 15)
127136
128137
Returns
129-
=======
138+
-------
130139
ece : float
131140
expected calibration error
141+
142+
Examples
143+
--------
144+
>>> from pycalib.metrics import conf_ECE
145+
>>> Y = np.array([[1, 0], [0, 1]]).T
146+
>>> P = np.array([[0.9, 0.1], [0.1, 0.9]]).T
147+
>>> print(round(conf_ECE(Y, P, bins=2), 8))
148+
0.1
149+
>>> Y = np.array([[1, 1, 1, 0, 0, 0], [0, 0, 0, 1, 1, 1]]).T
150+
>>> P = np.array([[.9, .8, .7, .3, .2, .1], [.1, .2, .3, .7, .8, .9]]).T
151+
>>> print(round(conf_ECE(Y, P, bins=2), 8))
152+
0.2
132153
"""
133154
return ECE(y_true, probs, normalize=False, bins=bins, ece_full=False)
134155

@@ -432,6 +453,64 @@ def conf_MCE(y_true, probs, bins=15):
432453
return MCE(y_true, probs, normalize=False, bins=bins, mce_full=False)
433454

434455

456+
def binary_MCE(y_true, probs, power=1, bins=15):
457+
r"""Binary Maximum Calibration Error
458+
459+
.. math::
460+
461+
\text{binary-MCE} = \max_{i \in \{1, ..., M\}} |\bar{y}(B_{i}) - \bar{p}(B_{i})|
462+
463+
Parameters
464+
----------
465+
y_true : indicator vector (n_samples, )
466+
True labels.
467+
468+
probs : matrix (n_samples, )
469+
Predicted probabilities for positive class.
470+
471+
Returns
472+
-------
473+
score : float
474+
475+
Examples
476+
--------
477+
>>> from pycalib.metrics import binary_MCE
478+
>>> Y = np.array([0, 1])
479+
>>> P = np.array([0.1, 0.6])
480+
>>> print(round(binary_MCE(Y, P, bins=2), 8))
481+
0.4
482+
>>> Y = np.array([0, 0, 0, 1, 1, 1])
483+
>>> P = np.array([.1, .2, .3, .6, .7, .8])
484+
>>> print(round(binary_MCE(Y, P, bins=2), 8))
485+
0.3
486+
>>> Y = np.array([0, 0, 0, 1, 1, 1])
487+
>>> P = np.array([.1, .2, .3, .3, .2, .1])
488+
>>> print(round(binary_MCE(Y, P, bins=1), 8))
489+
0.3
490+
>>> Y = np.array([0, 0, 0, 1, 1, 1])
491+
>>> P = np.array([.1, .2, .3, .9, .9, .9])
492+
>>> print(round(binary_MCE(Y, P, bins=2), 8))
493+
0.2
494+
>>> Y = np.array([0, 0, 0, 1, 1, 1])
495+
>>> P = np.array([.1, .1, .1, .6, .6, .6])
496+
>>> print(round(binary_MCE(Y, P, bins=2), 8))
497+
0.4
498+
"""
499+
idx = np.digitize(probs, np.linspace(0, 1 + 1e-8, bins + 1)) - 1
500+
501+
def bin_func(y, p, idx):
502+
return (np.abs(np.mean(p[idx]) - np.mean(y[idx])) ** power)
503+
504+
mce = []
505+
for i in np.unique(idx):
506+
# print('Mean scores', np.mean(probs[idx == i]))
507+
# print('True proportion', np.mean(y_true[idx == i]))
508+
# print('Difference ', np.abs(np.mean(probs[idx == i])
509+
# - np.mean(y_true[idx == i])))
510+
mce.append(bin_func(y_true, probs, idx == i))
511+
return max(mce)
512+
513+
435514
def binary_ECE(y_true, probs, power=1, bins=15):
436515
r"""Binary Expected Calibration Error
437516
@@ -463,6 +542,10 @@ def binary_ECE(y_true, probs, power=1, bins=15):
463542
>>> P = np.array([.1, .2, .3, .7, .8, .9])
464543
>>> print(round(binary_ECE(Y, P, bins=2), 8))
465544
0.2
545+
>>> Y = np.array([0, 0, 0, 1, 1, 1])
546+
>>> P = np.array([.4, .4, .4, .6, .6, .6])
547+
>>> print(round(binary_ECE(Y, P, bins=2), 8))
548+
0.4
466549
"""
467550
idx = np.digitize(probs, np.linspace(0, 1 + 1e-8, bins + 1)) - 1
468551

@@ -506,12 +589,12 @@ def classwise_ECE(y_true, probs, power=1, bins=15):
506589
Examples
507590
--------
508591
>>> from pycalib.metrics import classwise_ECE
509-
>>> Y = np.array([[1, 0], [0, 1]])
510-
>>> P = np.array([[0.9, 0.1], [0.1, 0.9]])
592+
>>> Y = np.array([[1, 0], [0, 1]]).T
593+
>>> P = np.array([[0.9, 0.1], [0.1, 0.9]]).T
511594
>>> print(round(classwise_ECE(Y, P, bins=2), 8))
512595
0.1
513-
>>> Y = np.array([[1, 1, 1, 0, 0, 0], [0, 0, 0, 1, 1, 1]])
514-
>>> P = np.array([[.9, .8, .7, .3, .2, .1], [.1, .2, .3, .7, .8, .9]])
596+
>>> Y = np.array([[1, 1, 1, 0, 0, 0], [0, 0, 0, 1, 1, 1]]).T
597+
>>> P = np.array([[.9, .8, .7, .3, .2, .1], [.1, .2, .3, .7, .8, .9]]).T
515598
>>> print(round(classwise_ECE(Y, P, bins=2), 8))
516599
0.2
517600
"""
@@ -531,6 +614,57 @@ def classwise_ECE(y_true, probs, power=1, bins=15):
531614
)
532615

533616

617+
def classwise_MCE(y_true, probs, bins=15):
618+
r"""Classwise Maximum Calibration Error
619+
620+
.. math::
621+
622+
\text{class-$j$-MCE} = \max_{i \in {1, ..., M}}
623+
|\bar{y}_j(B_{i,j}) - \bar{p}_j(B_{i,j})|,
624+
625+
\text{classwise-MCE} = \max_{j \in {1, ..., K}} \text{class-$j$-MCE}
626+
627+
Parameters
628+
----------
629+
y_true : label indicator matrix (n_samples, n_classes)
630+
True labels.
631+
# TODO Add option to pass array with shape (n_samples, )
632+
633+
probs : matrix (n_samples, n_classes)
634+
Predicted probabilities.
635+
636+
Returns
637+
-------
638+
score : float
639+
640+
Examples
641+
--------
642+
>>> from pycalib.metrics import classwise_MCE
643+
>>> Y = np.array([[1, 0], [0, 1]]).T
644+
>>> P = np.array([[0.8, 0.1], [0.2, 0.9]]).T
645+
>>> print(round(classwise_MCE(Y, P, bins=2), 8))
646+
0.2
647+
>>> Y = np.array([[1, 1, 1, 0, 0, 0], [0, 0, 0, 1, 1, 1]]).T
648+
>>> P = np.array([[.8, .7, .6, .1, .1, .1], [.2, .3, .4, .9, .9, .9]]).T
649+
>>> print(round(classwise_MCE(Y, P, bins=2), 8))
650+
0.3
651+
"""
652+
probs = np.array(probs)
653+
if not np.array_equal(probs.shape, y_true.shape):
654+
y_true = label_binarize(np.array(y_true),
655+
classes=range(probs.shape[1]))
656+
657+
n_classes = probs.shape[1]
658+
659+
return np.max(
660+
[
661+
binary_MCE(
662+
y_true[:, c].astype(float), probs[:, c], bins=bins
663+
) for c in range(n_classes)
664+
]
665+
)
666+
667+
534668
def simplex_binning(y_true, probs, bins=15):
535669

536670
probs = np.array(probs)

0 commit comments

Comments
 (0)