from pyspark.sql import Row
from pyspark.ml.linalg import Vectors
df = sc.parallelize([
Row(label=0.0, features=Vectors.dense(1.0, 0.8)),
Row(label=1.0, features=Vectors.sparse(2, [], [])),
Row(label=2.0, features=Vectors.dense(0.5, 0.5))]).toDF()
lr = LogisticRegression(maxIter=5, regParam=0.01)
ovr = OneVsRest(classifier=lr)
model = ovr.fit(df)
[x.coefficients for x in model.models]
[x.intercept for x in model.models]
test0 = sc.parallelize([Row(features=Vectors.dense(-1.0, 0.0))]).toDF()
model.transform(test0).head().prediction
test1 = sc.parallelize([Row(features=Vectors.sparse(2, [0], [1.0]))]).toDF()
model.transform(test1).head().prediction
test2 = sc.parallelize([Row(features=Vectors.dense(0.5, 0.4))]).toDF()
model.transform(test2).head().prediction