batch size kmeans

Solutions on MaxInterview for batch size kmeans by the best coders in the world

showing results for - "batch size kmeans"
Pam
01 Jul 2018
1print __doc__
2
3import time
4
5import numpy as np
6import pylab as pl
7
8from sklearn.cluster import MiniBatchKMeans, KMeans
9from sklearn.metrics.pairwise import euclidean_distances
10from sklearn.datasets.samples_generator import make_blobs
11
12##############################################################################
13# Generate sample data
14np.random.seed(0)
15
16batch_size = 45
17centers = [[1, 1], [-1, -1], [1, -1]]
18n_clusters = len(centers)
19X, labels_true = make_blobs(n_samples=3000, centers=centers, cluster_std=0.7)
20
21##############################################################################
22# Compute clustering with Means
23
24k_means = KMeans(init='k-means++', k=3, n_init=10)
25t0 = time.time()
26k_means.fit(X)
27t_batch = time.time() - t0
28k_means_labels = k_means.labels_
29k_means_cluster_centers = k_means.cluster_centers_
30k_means_labels_unique = np.unique(k_means_labels)
31
32##############################################################################
33# Compute clustering with MiniBatchKMeans
34
35mbk = MiniBatchKMeans(init='k-means++', k=3, batch_size=batch_size,
36                      n_init=10, max_no_improvement=10, verbose=0)
37t0 = time.time()
38mbk.fit(X)
39t_mini_batch = time.time() - t0
40mbk_means_labels = mbk.labels_
41mbk_means_cluster_centers = mbk.cluster_centers_
42mbk_means_labels_unique = np.unique(mbk_means_labels)
43
44##############################################################################
45# Plot result
46
47fig = pl.figure(figsize=(8, 3))
48fig.subplots_adjust(left=0.02, right=0.98, bottom=0.05, top=0.9)
49colors = ['#4EACC5', '#FF9C34', '#4E9A06']
50
51# We want to have the same colors for the same cluster from the
52# MiniBatchKMeans and the KMeans algorithm. Let's pair the cluster centers per
53# closest one.
54
55distance = euclidean_distances(k_means_cluster_centers,
56                               mbk_means_cluster_centers,
57                               squared=True)
58order = distance.argmin(axis=1)
59
60# KMeans
61ax = fig.add_subplot(1, 3, 1)
62for k, col in zip(range(n_clusters), colors):
63    my_members = k_means_labels == k
64    cluster_center = k_means_cluster_centers[k]
65    ax.plot(X[my_members, 0], X[my_members, 1], 'w',
66            markerfacecolor=col, marker='.')
67    ax.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col,
68                                    markeredgecolor='k', markersize=6)
69ax.set_title('KMeans')
70ax.set_xticks(())
71ax.set_yticks(())
72pl.text(-3.5, 1.8,  'train time: %.2fs\ninertia: %f' % (
73    t_batch, k_means.inertia_))
74
75# MiniBatchKMeans
76ax = fig.add_subplot(1, 3, 2)
77for k, col in zip(range(n_clusters), colors):
78    my_members = mbk_means_labels == order[k]
79    cluster_center = mbk_means_cluster_centers[order[k]]
80    ax.plot(X[my_members, 0], X[my_members, 1], 'w',
81            markerfacecolor=col, marker='.')
82    ax.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col,
83                                    markeredgecolor='k', markersize=6)
84ax.set_title('MiniBatchKMeans')
85ax.set_xticks(())
86ax.set_yticks(())
87pl.text(-3.5, 1.8, 'train time: %.2fs\ninertia: %f' %
88        (t_mini_batch, mbk.inertia_))
89
90# Initialise the different array to all False
91different = (mbk_means_labels == 4)
92ax = fig.add_subplot(1, 3, 3)
93
94for l in range(n_clusters):
95    different += ((k_means_labels == k) != (mbk_means_labels == order[k]))
96
97identic = np.logical_not(different)
98ax.plot(X[identic, 0], X[identic, 1], 'w',
99        markerfacecolor='#bbbbbb', marker='.')
100ax.plot(X[different, 0], X[different, 1], 'w',
101        markerfacecolor='m', marker='.')
102ax.set_title('Difference')
103ax.set_xticks(())
104ax.set_yticks(())
105
106pl.show()
107
similar questions
queries leading to this page
batch size kmeans