objectif
Encoder catégories et scaler numériques en pipeline colonne.
code minimal
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
import numpy as np
X = np.array([["red", 1.0], ["blue", 2.0]], dtype=object)
pre = ColumnTransformer([("cat", OneHotEncoder(handle_unknown="ignore"), [0]), ("num", StandardScaler(), [1])])
pipe = Pipeline([("pre", pre), ("clf", LogisticRegression(max_iter=1000))]).fit(X, [1,0])
print(hasattr(pipe, "predict"))
utilisation
print(len(pipe.named_steps["pre"].transformers) >= 1)
variante(s) utile(s)
from sklearn.compose import make_column_selector
print(callable(make_column_selector(dtype_include=object)))
notes
handle_unknown='ignore'pour éviter erreurs au déploiement.