objectif
Mesurer l’importance des features par permutation avec sklearn.inspection.
code minimal
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.inspection import permutation_importance
from catboost import CatBoostRegressor
import numpy as np
# Données synthétiques pour compatibilité
rng = np.random.RandomState(0)
X = rng.randn(200, 4)
y = X @ np.array([1.0, -2.0, 0.5, 0.0]) + rng.randn(200)*0.1
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=0)
model = CatBoostRegressor(iterations=400, learning_rate=0.05, depth=6, verbose=False, random_seed=0).fit(X_train, y_train)
r = permutation_importance(model, X_val, y_val, n_repeats=5, random_state=0)
print(r.importances_mean.shape[0] == X.shape[1])
utilisation
# Importance native (PredictionValuesChange)
from catboost import Pool
imp_native = model.get_feature_importance(Pool(X_val, y_val), type="PredictionValuesChange")
print(len(imp_native) == X.shape[1])
variante(s) utile(s)
# Visualisation simple (ordre décroissant)
order = np.argsort(-r.importances_mean)
print(order.shape[0] == X.shape[1])
notes
- La permutation mesure la dégradation de score quand on mélange une feature; coûteux mais fiable.