← retour aux snippets

sklearn: mutual information

Mesurer l'information mutuelle pour sélectionner des features.

objectif

Mesurer l’information mutuelle pour sélectionner des features.

code minimal

from sklearn.feature_selection import mutual_info_classif
from sklearn.datasets import load_iris

X, y = load_iris(return_X_y=True)
mi = mutual_info_classif(X, y, random_state=0)
print(len(mi) == X.shape[1])

utilisation

from sklearn.feature_selection import mutual_info_regression
import numpy as np

X = np.column_stack([np.arange(100), np.random.default_rng(0).normal(size=100)])
y = X[:,0] + np.random.default_rng(1).normal(size=100)
print(len(mutual_info_regression(X, y, random_state=0)) == 2)

variante(s) utile(s)

from sklearn.feature_selection import SelectPercentile, f_classif
print(hasattr(SelectPercentile(f_classif, percentile=50), "fit"))

notes

  • MI capture les relations non linéaires.