




scala> val df = spark.read .format("csv").option("header", "true").option("mode", "DROPMALFORMED").option("inferSchema","True").load("iris.csv")

// 各列の型表示
scala> df.printSchema()

scala> df.show()

scala> df.createGlobalTempView("iris")

scala> spark.sql("SELECT SepalLength+SepalWidth FROM global_temp.iris2").show()

scala> val df_columns = df.columns.slice(1,3)
df_sep: Array[String] = Array(SepalWidth, PetalLength)

scala> val df_columns = df.columns.slice(1,4)
df_columns: Array[String] = Array(SepalWidth, PetalLength, PetalWidth)
scala> df.select(df_columns.head,df_columns.tail:_*).show(3)
|       3.5|        1.4|       0.2|
|       3.0|        1.4|       0.2|
|       3.2|        1.3|       0.2|

import org.apache.spark.sql.functions._ 
scala> val idxDf = df.withColumn("idx", monotonicallyIncreasingId())
scala>  idxDf.show(3)
|SepalLength|SepalWidth|PetalLength|PetalWidth|       Name|idx|
|        5.1|       3.5|        1.4|       0.2|Iris-setosa|  0|
|        4.9|       3.0|        1.4|       0.2|Iris-setosa|  1|
|        4.7|       3.2|        1.3|       0.2|Iris-setosa|  2|

scala> val ex_Df1 = idxDf.filter("idx > 10")
scala> val ex_Df = ex_Df1.filter("idx < 15")
scala> ex_Df.show()
|SepalLength|SepalWidth|PetalLength|PetalWidth|       Name|idx|
|        4.8|       3.4|        1.6|       0.2|Iris-setosa| 11|
|        4.8|       3.0|        1.4|       0.1|Iris-setosa| 12|
|        4.3|       3.0|        1.1|       0.1|Iris-setosa| 13|
|        5.8|       4.0|        1.2|       0.2|Iris-setosa| 14|


