- Start the Spark shell:
$ spark-shell
- Import the statistics and related classes:
scala> import org.apache.spark.ml.linalg.Vectors
scala> import org.apache.spark.ml.regression.LinearRegression
- Create a DataFrame with the house price as the label:
scala> val points = spark.createDataFrame(Seq(
(1620000,Vectors.dense(2100)),
(1690000,Vectors.dense(2300)),
(1400000,Vectors.dense(2046)),
(2000000,Vectors.dense(4314)),
(1060000,Vectors.dense(1244)),
(3830000,Vectors.dense(4608)),
(1230000,Vectors.dense(2173)),
(2400000,Vectors.dense(2750)),
(3380000,Vectors.dense(4010)),
(1480000,Vectors.dense(1959))
)).toDF("label","features")
- Initialize linear regression:
scala> val lr = new LinearRegression()
- Train a model using this data:
scala> val model = lr.fit(points)
- Create some test data:
scala> val test = spark.createDataFrame(Seq(Vectors.dense(2100)).map(Tuple1.apply)).toDF("features")
- Make predictions for the test data:
scala> val predictions = model.transform(test)