objectif
Optimiser LGBMClassifier avec Optuna et early stopping.
code minimal
import optuna
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from lightgbm import LGBMClassifier
from sklearn.datasets import load_breast_cancer
X, y = load_breast_cancer(return_X_y=True)
Xtr, Xva, ytr, yva = train_test_split(X, y, test_size=0.2, random_state=0, stratify=y)
def objective(trial):
clf = LGBMClassifier(
n_estimators=5000,
learning_rate=trial.suggest_float("learning_rate", 0.01, 0.2, log=True),
num_leaves=trial.suggest_int("num_leaves", 16, 64),
min_child_samples=trial.suggest_int("min_child_samples", 5, 50),
subsample=trial.suggest_float("subsample", 0.6, 1.0),
colsample_bytree=trial.suggest_float("colsample_bytree", 0.6, 1.0),
reg_lambda=trial.suggest_float("reg_lambda", 1e-3, 10.0, log=True),
random_state=0,
)
clf.fit(Xtr, ytr, eval_set=[(Xva, yva)], eval_metric="auc", callbacks=[], verbose=False)
auc = roc_auc_score(yva, clf.predict_proba(Xva)[:,1])
trial.set_user_attr("best_iteration", int(getattr(clf, "best_iteration_", clf.n_estimators)))
return auc
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=20)
print(len(study.trials) >= 1)
utilisation
# Réentraînement final
best_n = study.best_trial.user_attrs["best_iteration"]
params = study.best_trial.params
final = LGBMClassifier(n_estimators=best_n, random_state=0, **params).fit(X, y)
print(hasattr(final, "predict_proba"))
variante(s) utile(s)
# Importance des paramètres
from optuna.importance import get_param_importances
imp = get_param_importances(study)
print(isinstance(imp, dict))
notes
- LGBMClassifier expose best_iteration_ après fit avec eval_set; utilisez-le pour fixer n_estimators final.