objectif
Prétraiter colonnes num/cat et entraîner un modèle.
code minimal
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
import numpy as np
X = np.array([[25,"FR"], [40,"DE"], [30,"FR"]], dtype=object)
y = np.array([0,1,0])
ct = ColumnTransformer([("num", StandardScaler(), [0]), ("cat", OneHotEncoder(handle_unknown="ignore"), [1])])
pipe = Pipeline([("prep", ct), ("clf", LogisticRegression(max_iter=1000))]).fit(X, y)
print(len(pipe.named_steps))
utilisation
print(pipe.predict([[35,"FR"]]).tolist())
variante(s) utile(s)
from sklearn.impute import SimpleImputer
ct2 = ColumnTransformer([("num", Pipeline([("imp", SimpleImputer()), ("sc", StandardScaler())]), [0])])
print(hasattr(ct2, "transform"))
notes
- ColumnTransformer évite les DataFrames au profit de tableaux.