(1)示例数据:`people.json
{"name":"Michael"}
{"name":"Andy", "age":30}
{"name":"Justin", "age":19}
```
(2)示例代码
```scala
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, Row, SparkSession}
object DataFrameToRDD {
def main(args: Array[String]): Unit = {
val spark:SparkSession = SparkSession.builder()
.master("local[4]")
.appName(this.getClass.getName)
.getOrCreate()
val sc:SparkContext = spark.sparkContext
import spark.implicits._
// 两种方式加载json/csv文件, spark.read.format("json") 或者 spark.read.json 效果一样
// 创建DataFrame
val df:DataFrame = spark.read.json("file:///E:\\hadoop\\input\\people.json")
val rdd:RDD[Row] = df.rdd // 只需调用rdd方法便可以将DataFrame转换为RDD[Row]
rdd.foreach(println)
// [null,Michael]
// [30,Andy]
// [19,Justin]
rdd.map(x => (x(0), x(1))).foreach(println)
// (null,Michael)
// (30,Andy)
// (19,Justin)
}
}
```
内容来源于网络,如有侵权,请联系作者删除!