objectif
Créer un transformer compatible Pipeline (fit/transform).
code minimal
from sklearn.base import BaseEstimator, TransformerMixin
import numpy as np
class AddOnes(BaseEstimator, TransformerMixin):
def fit(self, X, y=None): return self
def transform(self, X): return np.hstack([X, np.ones((X.shape[0],1))])
print(hasattr(AddOnes(), "fit") and hasattr(AddOnes(), "transform"))
utilisation
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LinearRegression
import numpy as np
X = np.arange(6).reshape(3,2).astype(float); y = np.array([1.,2.,3.])
model = make_pipeline(AddOnes(), LinearRegression()).fit(X, y)
print(hasattr(model, "predict"))
variante(s) utile(s)
from sklearn.compose import ColumnTransformer
print(hasattr(ColumnTransformer([("ones", AddOnes(), [0,1])]), "fit"))
notes
- Toujours retourner array/CSR avec mêmes n d’échantillons.