← retour aux snippets

sklearn: permutation importance

Mesurer importance des features par permutation.

objectif

Mesurer importance des features par permutation.

code minimal

from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.inspection import permutation_importance
X, y = load_iris(return_X_y=True)
rf = RandomForestClassifier(random_state=0).fit(X, y)
r = permutation_importance(rf, X, y, n_repeats=5, random_state=0)
print(r.importances_mean.shape[0] == X.shape[1])

utilisation

from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
pipe = make_pipeline(StandardScaler(), LogisticRegression(max_iter=1000)).fit(X, y)
print(pipe.score(X, y) <= 1.0)

variante(s) utile(s)

from sklearn.feature_selection import SelectFromModel
print(hasattr(SelectFromModel(rf), "fit"))

notes

  • Évaluer sur un set de validation, pas sur le train.