← retour aux snippets

sklearn: OneHotEncoder + ColumnTransformer

Encoder catégories et scaler numériques en pipeline colonne.

objectif

Encoder catégories et scaler numériques en pipeline colonne.

code minimal

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
import numpy as np

X = np.array([["red", 1.0], ["blue", 2.0]], dtype=object)
pre = ColumnTransformer([("cat", OneHotEncoder(handle_unknown="ignore"), [0]), ("num", StandardScaler(), [1])])
pipe = Pipeline([("pre", pre), ("clf", LogisticRegression(max_iter=1000))]).fit(X, [1,0])
print(hasattr(pipe, "predict"))

utilisation

print(len(pipe.named_steps["pre"].transformers) >= 1)

variante(s) utile(s)

from sklearn.compose import make_column_selector
print(callable(make_column_selector(dtype_include=object)))

notes

  • handle_unknown='ignore' pour éviter erreurs au déploiement.