← retour aux snippets

sklearn: calibration curve

Évaluer la calibration des probabilités.

objectif

Évaluer la calibration des probabilités.

code minimal

from sklearn.calibration import calibration_curve
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

X, y = load_breast_cancer(return_X_y=True)
Xtr, Xte, ytr, yte = train_test_split(X, y, random_state=42, stratify=y)
proba = LogisticRegression(max_iter=1000).fit(Xtr, ytr).predict_proba(Xte)[:,1]
prob_true, prob_pred = calibration_curve(yte, proba, n_bins=10)
print(len(prob_true) == len(prob_pred))

utilisation

print(max(abs(prob_true - prob_pred)) <= 1.0)

variante(s) utile(s)

from sklearn.calibration import CalibratedClassifierCV
print(hasattr(CalibratedClassifierCV(), "fit"))

notes

  • Isotonic souvent meilleur mais demande plus de données.