correlation mlib

Solutions on MaxInterview for correlation mlib by the best coders in the world

showing results for - "correlation mlib"
Adèle
04 May 2017
1import org.apache.spark.ml.linalg.{Matrix, Vectors}
2import org.apache.spark.ml.stat.Correlation
3import org.apache.spark.sql.Row
4
5val data = Seq(
6  Vectors.sparse(4, Seq((0, 1.0), (3, -2.0))),
7  Vectors.dense(4.0, 5.0, 0.0, 3.0),
8  Vectors.dense(6.0, 7.0, 0.0, 8.0),
9  Vectors.sparse(4, Seq((0, 9.0), (3, 1.0)))
10)
11
12val df = data.map(Tuple1.apply).toDF("features")
13val Row(coeff1: Matrix) = Correlation.corr(df, "features").head
14println("Pearson correlation matrix:\n" + coeff1.toString)
15
16val Row(coeff2: Matrix) = Correlation.corr(df, "features", "spearman").head
17println("Spearman correlation matrix:\n" + coeff2.toString)
18