Map vs FlatMap (Scala) val data = Seq ( "his is sample string" , "one more sample string" , "third line" , "line" , "this is one more line" ) data: Seq[String] = List(his is sample string, one more sample string, third line, line, this is one more line) val df = spark . sparkContext . parallelize ( data ) df: org.apache.spark.rdd.RDD[String] = ParallelCollectionRDD[7] at parallelize at command-2343346390787948:1 df . map ( x => ( x , x . length )). collect () res0: Array[(String, Int)] = Array((his is sample string,20), (one more sample string,22), (third line,10), (line,4), (this is one more line,21)) df . map ( x => x . split ( " " )). collect () res1: Array[Array[String]] = Array(Array(his, is, sample, string), Array(one, more,...