← retour aux snippets

sklearn: transformateur personnalisé

Créer un transformer compatible Pipeline (fit/transform).

objectif

Créer un transformer compatible Pipeline (fit/transform).

code minimal

from sklearn.base import BaseEstimator, TransformerMixin
import numpy as np

class AddOnes(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None): return self
    def transform(self, X): return np.hstack([X, np.ones((X.shape[0],1))])

print(hasattr(AddOnes(), "fit") and hasattr(AddOnes(), "transform"))

utilisation

from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LinearRegression
import numpy as np

X = np.arange(6).reshape(3,2).astype(float); y = np.array([1.,2.,3.])
model = make_pipeline(AddOnes(), LinearRegression()).fit(X, y)
print(hasattr(model, "predict"))

variante(s) utile(s)

from sklearn.compose import ColumnTransformer
print(hasattr(ColumnTransformer([("ones", AddOnes(), [0,1])]), "fit"))

notes

  • Toujours retourner array/CSR avec mêmes n d’échantillons.