objectif
Mesurer l’information mutuelle pour sélectionner des features.
code minimal
from sklearn.feature_selection import mutual_info_classif
from sklearn.datasets import load_iris
X, y = load_iris(return_X_y=True)
mi = mutual_info_classif(X, y, random_state=0)
print(len(mi) == X.shape[1])
utilisation
from sklearn.feature_selection import mutual_info_regression
import numpy as np
X = np.column_stack([np.arange(100), np.random.default_rng(0).normal(size=100)])
y = X[:,0] + np.random.default_rng(1).normal(size=100)
print(len(mutual_info_regression(X, y, random_state=0)) == 2)
variante(s) utile(s)
from sklearn.feature_selection import SelectPercentile, f_classif
print(hasattr(SelectPercentile(f_classif, percentile=50), "fit"))
notes
- MI capture les relations non linéaires.