objectif
Mesurer importance des features par permutation.
code minimal
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.inspection import permutation_importance
X, y = load_iris(return_X_y=True)
rf = RandomForestClassifier(random_state=0).fit(X, y)
r = permutation_importance(rf, X, y, n_repeats=5, random_state=0)
print(r.importances_mean.shape[0] == X.shape[1])
utilisation
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
pipe = make_pipeline(StandardScaler(), LogisticRegression(max_iter=1000)).fit(X, y)
print(pipe.score(X, y) <= 1.0)
variante(s) utile(s)
from sklearn.feature_selection import SelectFromModel
print(hasattr(SelectFromModel(rf), "fit"))
notes
- Évaluer sur un set de validation, pas sur le train.