...
Блок кода |
---|
%SPARK_HOME%\bin\spark-shell --jar park-introduction-1.0.0-SNAPSHOT.jar |
Примеры
Блок кода | ||||
---|---|---|---|---|
| ||||
scala> val rdd = sc.parallelize(1 to 100)
rdd: org.apache.spark.rdd.RDD[Int] = ParallelCollectionRDD[0] at parallelize at <console>:24
|
Блок кода | ||||||
---|---|---|---|---|---|---|
| ||||||
scala> val df = rdd.toDF
df: org.apache.spark.sql.DataFrame = [value: int]
scala> df.printSchema
root
|-- value: integer (nullable = false)
scala> df.show(2)
+-----+
|value|
+-----+
| 1|
| 2|
+-----+
only showing top 2 rows |
Блок кода | ||||||
---|---|---|---|---|---|---|
| ||||||
scala> case class Person(id: Int, name: String)
defined class Person
scala> val persons = Seq(Person(1, "Ivan"), Person(2, "Petrov"), Person(3, "Sidorov")).toDF
persons: org.apache.spark.sql.DataFrame = [id: int, name: string]
scala> persons.select("name").show
+-------+
| name|
+-------+
| Ivan|
| Petrov|
|Sidorov|
+-------+
scala> persons.createGlobalTempView("persons")
scala> spark.sql("select id from global_temp.persons").show
+---+
| id|
+---+
| 1|
| 2|
| 3|
+---+ |
Минимальный проект на Spark
...