← retour aux snippets

catboost + optuna: tuning bout en bout

Optimiser CatBoostClassifier via Optuna avec early stopping.

python tuning #catboost#optuna#tuning

objectif

Optimiser CatBoostClassifier via Optuna avec early stopping.

code minimal

import optuna
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from catboost import CatBoostClassifier
from sklearn.datasets import load_breast_cancer

X, y = load_breast_cancer(return_X_y=True)
Xtr, Xva, ytr, yva = train_test_split(X, y, test_size=0.2, random_state=0, stratify=y)

def objective(trial):
    clf = CatBoostClassifier(
        iterations=2000,
        learning_rate=trial.suggest_float("learning_rate", 0.01, 0.2, log=True),
        depth=trial.suggest_int("depth", 4, 8),
        l2_leaf_reg=trial.suggest_float("l2_leaf_reg", 1e-3, 10.0, log=True),
        random_seed=0,
        loss_function="Logloss",
        verbose=False,
    )
    clf.fit(Xtr, ytr, eval_set=(Xva, yva), use_best_model=True, verbose=False)
    auc = roc_auc_score(yva, clf.predict_proba(Xva)[:,1])
    trial.set_user_attr("best_iteration", int(getattr(clf, "tree_count_", 100)))
    return auc

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=20)
print(study.best_value <= 1.0)

utilisation

# Réentraîner modèle final
best_n = study.best_trial.user_attrs["best_iteration"]
params = study.best_trial.params
final = CatBoostClassifier(iterations=best_n, loss_function="Logloss", random_seed=0, verbose=False, **params).fit(X, y)
print(hasattr(final, "predict_proba"))

variante(s) utile(s)

# Export des résultats
df = study.trials_dataframe()
print("value" in df.columns)

notes

  • CatBoost gère bien les features catégorielles; pour DataFrame avec strings, passez cat_features.