Applied#
from sklearn.cluster import AgglomerativeClustering
from scipy.cluster.hierarchy import dendrogram
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt
plt.figure(figsize=(16, 14), dpi=80)
import numpy as np
centers = [(-3, -3), (4, 4), (4, -4)]
cluster_std = [2, 3, 2]
X, y = make_blobs(
n_samples=500,
cluster_std = cluster_std,
centers = centers,
n_features = 2,
random_state=0
)
# https://scikit-learn.org/stable/auto_examples/cluster/plot_agglomerative_dendrogram.html#sphx-glr-auto-examples-cluster-plot-agglomerative-dendrogram-py
def plot_dendrogram(model, **kwargs):
# Create linkage matrix and then plot the dendrogram
# create the counts of samples under each node
counts = np.zeros(model.children_.shape[0])
n_samples = len(model.labels_)
for i, merge in enumerate(model.children_):
current_count = 0
for child_idx in merge:
if child_idx < n_samples:
current_count += 1 # leaf node
else:
current_count += counts[child_idx - n_samples]
counts[i] = current_count
linkage_matrix = np.column_stack(
[model.children_, model.distances_, counts]
).astype(float)
# Plot the corresponding dendrogram
plt.figure()
dendrogram(linkage_matrix, **kwargs)
<Figure size 1280x1120 with 0 Axes>
Using sklearn#
for i,linkage in enumerate(['ward', 'complete', 'average']):
cluster = AgglomerativeClustering(
linkage=linkage,
distance_threshold=0,
n_clusters=None
)
cluster = cluster.fit(X)
plot_dendrogram(cluster)
data:image/s3,"s3://crabby-images/3f7db/3f7db1dba585f494afc00f52161540dea15f05f9" alt="../../../../_images/710a9a181f412d5e3d04816de7da937b17cc47a6f1b2bd6736a3356aadaea5e6.png"
data:image/s3,"s3://crabby-images/f2965/f29650a7e9c629cb1b83d0abb067d1654d2e888f" alt="../../../../_images/104e5ae3020a732c11802a9ce52d7954ab3ae8e0168ca68b0b25b3ae6251695a.png"
data:image/s3,"s3://crabby-images/b34b1/b34b1fe0e7c758741b0f4912a031d48a5f8f7df8" alt="../../../../_images/652e2fde6843f6f7a069cc1b55a8872e7e8c4793026e971ac26fe3b921b02d9b.png"