k means clustering with a k means 2b 2b like initialization mode

Solutions on MaxInterview for k means clustering with a k means 2b 2b like initialization mode by the best coders in the world

showing results for - "k means clustering with a k means 2b 2b like initialization mode"
Matys
06 Jun 2016
1# K-means clustering with a k-means++ like initialization mode
2
3from pyspark.ml.linalg import Vectors
4data = [(Vectors.dense([0.0, 0.0]),), (Vectors.dense([1.0, 1.0]),),
5        (Vectors.dense([9.0, 8.0]),), (Vectors.dense([8.0, 9.0]),)]
6df = spark.createDataFrame(data, ["features"])
7kmeans = KMeans(k=2, seed=1)
8model = kmeans.fit(df)
9centers = model.clusterCenters()
10len(centers)
11# 2
12model.computeCost(df)
13# 2.000...
14transformed = model.transform(df).select("features", "prediction")
15rows = transformed.collect()
16rows[0].prediction == rows[1].prediction
17# True
18rows[2].prediction == rows[3].prediction
19# True
20model.hasSummary
21summary.k
22# 2
23summary.clusterSizes
24# [2, 2]
25kmeans_path = temp_path + "/kmeans"
26kmeans.save(kmeans_path)
27kmeans2 = KMeans.load(kmeans_path)
28kmeans2.getK()
29# 2
30model_path = temp_path + "/kmeans_model"
31model.save(model_path)
32model2 = KMeansModel.load(model_path)
33model2.hasSummary
34# False
35model.clusterCenters()[0] == model2.clusterCenters()[0]
36# array([ True, True], dtype=bool)
37model.clustersCenters()[1] == model2.clusterCenters()[1]
38# array([ True, True], dtype=bool)