|
1 |
| -//package app.task4 |
2 |
| -// |
3 |
| -//import org.apache.spark.sql.{DataFrame, SparkSession} |
4 |
| -// |
5 |
| -//object Task4 { |
6 |
| -// def main(args: Array[String]) { |
7 |
| -// implicit val spark: SparkSession = bootstrapSpark() |
8 |
| -// val address: DataFrame = loadJsonFile("Task4_2") |
9 |
| -// |
10 |
| -// val customerInfo: DataFrame = loadCsvFile("Task4_1.csv") |
11 |
| -// |
12 |
| -// val enrichedAddress = address.join(customerInfo.as('info), customerInfo("CustomerId") === address("CustomerId")) |
13 |
| -// .select() |
14 |
| -// |
15 |
| -// investigate(enrichedAddress) |
16 |
| -// |
17 |
| -// spark.stop() |
18 |
| -// } |
19 |
| -// |
20 |
| -// def loadJsonFile(filePath: String)(implicit spark: SparkSession): DataFrame = |
21 |
| -// spark.read.json(filePath) |
22 |
| -// |
23 |
| -// def loadCsvFile(filePath: String)(implicit spark: SparkSession): DataFrame = |
24 |
| -// spark.read.option("header", "true").csv(filePath) |
25 |
| -// |
26 |
| -// def investigate(dataFrame: DataFrame): Unit = { |
27 |
| -// dataFrame.show() |
28 |
| -// dataFrame.printSchema() |
29 |
| -// } |
30 |
| -// |
31 |
| -// def bootstrapSpark(): SparkSession = { |
32 |
| -// val spark = SparkSession.builder |
33 |
| -// .appName("Simple Application") |
34 |
| -// .master("local") |
35 |
| -// .getOrCreate() |
36 |
| -// |
37 |
| -// spark.sparkContext.setLogLevel("ERROR") |
38 |
| -// |
39 |
| -// spark |
40 |
| -// } |
41 |
| -//} |
| 1 | +package app.task4 |
| 2 | + |
| 3 | +import org.apache.spark.sql.{DataFrame, SparkSession} |
| 4 | + |
| 5 | +object Task4 { |
| 6 | + def main(args: Array[String]) { |
| 7 | + |
| 8 | + implicit val spark: SparkSession = bootstrapSpark() |
| 9 | + |
| 10 | + val address: DataFrame = loadJsonFile("task4/Dataset2") |
| 11 | + |
| 12 | + val customerInfo: DataFrame = loadCsvFile("task4/Dataset1.csv") |
| 13 | + |
| 14 | + val enrichedAddress = address.join(customerInfo, "CustomerId") |
| 15 | + |
| 16 | + investigate(enrichedAddress) |
| 17 | + |
| 18 | + spark.stop() |
| 19 | + } |
| 20 | + |
| 21 | + def loadJsonFile(filePath: String)(implicit spark: SparkSession): DataFrame = |
| 22 | + spark.read.json(filePath) |
| 23 | + |
| 24 | + def loadCsvFile(filePath: String)(implicit spark: SparkSession): DataFrame = |
| 25 | + spark.read.option("header", "true").csv(filePath) |
| 26 | + |
| 27 | + def investigate(dataFrame: DataFrame): Unit = { |
| 28 | + dataFrame.show() |
| 29 | + dataFrame.printSchema() |
| 30 | + } |
| 31 | + |
| 32 | + def bootstrapSpark(): SparkSession = { |
| 33 | + val spark = SparkSession.builder |
| 34 | + .appName("Simple Application") |
| 35 | + .master("local") |
| 36 | + .getOrCreate() |
| 37 | + |
| 38 | + spark.sparkContext.setLogLevel("ERROR") |
| 39 | + |
| 40 | + spark |
| 41 | + } |
| 42 | +} |
0 commit comments