objectif
DBSCAN et courbe des k-distances pour choisir eps.
code minimal
from sklearn.cluster import DBSCAN
from sklearn.neighbors import NearestNeighbors
import numpy as np
X = np.array([[0,0],[0,0.1],[0,0.2],[5,5]])
nbrs = NearestNeighbors(n_neighbors=4).fit(X)
dists, idx = nbrs.kneighbors(X)
kdist = np.sort(dists[:, -1])
labels = DBSCAN(eps=0.5, min_samples=4).fit_predict(X)
print(kdist.tolist(), set(labels.tolist()))
utilisation
from sklearn.neighbors import NearestNeighbors
import numpy as np
X = np.array([[0,0],[1,0],[0,1],[10,10]])
d, _ = NearestNeighbors(n_neighbors=3).fit(X).kneighbors(X)
print(d[:, -1].shape[0] == X.shape[0])
variante(s) utile(s)
from sklearn.cluster import DBSCAN
import numpy as np
X = np.array([[0,0],[0,0.1],[0,0.2],[5,5]])
print(set(DBSCAN(eps=0.3, min_samples=2).fit_predict(X)) <= {-1,0,1,2})
notes
- Chercher l’angle dans la courbe triée des k-distances.