objectif
Tracer ROC et PR; choisir métriques pour classes rares.
code minimal
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, average_precision_score
X, y = load_breast_cancer(return_X_y=True)
Xtr, Xte, ytr, yte = train_test_split(X, y, random_state=42, stratify=y)
clf = LogisticRegression(max_iter=1000).fit(Xtr, ytr)
proba = clf.predict_proba(Xte)[:,1]
print(round(roc_auc_score(yte, proba),3) > 0.5, round(average_precision_score(yte, proba),3) > 0.5)
utilisation
from sklearn.metrics import roc_curve, precision_recall_curve
fpr, tpr, _ = roc_curve(yte, proba)
prec, rec, _ = precision_recall_curve(yte, proba)
print(len(fpr), len(prec))
variante(s) utile(s)
from sklearn.metrics import RocCurveDisplay, PrecisionRecallDisplay
RocCurveDisplay.from_predictions(yte, proba); PrecisionRecallDisplay.from_predictions(yte, proba)
print("displays ok")
notes
- PR AUC plus informative si classe positive rare.