sklearn StackingClassifier/Regressor
objectif
Expliquer et montrer comment empiler plusieurs modèles avec Stacking pour de meilleures performances.
code minimal
from sklearn.datasets import make_classification
from sklearn.ensemble import StackingClassifier, RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
X, y = make_classification(n_samples=2000, n_features=30, random_state=0)
estimators = [
("rf", RandomForestClassifier(n_estimators=200, random_state=0)),
("svc", make_pipeline(StandardScaler(), LinearSVC(dual=False)))
]
final_est = LogisticRegression(max_iter=1000)
stack = StackingClassifier(estimators=estimators, final_estimator=final_est, cv=5, stack_method="auto")
stack.fit(X, y)
stack.score(X, y)
utilisation
# prédictions de proba si possible
has_proba = hasattr(stack, "predict_proba")
print("proba:", bool(has_proba))
variante(s) utile(s)
# version régression
# from sklearn.ensemble import StackingRegressor
# stackr = StackingRegressor(estimators=[...], final_estimator=Ridge())
notes
- Utiliser des folds pour générer les méta-features (cv param).
- Attention au leakage: encapsuler prétraitements dans chaque base learner.