Skip to content

Commit 9522ad4

Browse files
committed
task4
1 parent 0643f03 commit 9522ad4

File tree

5 files changed

+43
-42
lines changed

5 files changed

+43
-42
lines changed

src/main/scala/app/task1/Task1.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import org.apache.spark.sql.{Dataset, SparkSession}
55
object Task1 extends App {
66

77
implicit val spark: SparkSession = bootstrapSpark()
8-
val file = loadFile("Task1.txt").cache()
8+
val file = loadFile("Dataset.txt").cache()
99
val fileAnalysis = new TextFileAnalysis(file)
1010
val numAs = fileAnalysis.lineContaining("a").count()
1111
val numBs = fileAnalysis.lineContaining("b").count()

src/main/scala/app/task4/Task4.scala

+42-41
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,42 @@
1-
//package app.task4
2-
//
3-
//import org.apache.spark.sql.{DataFrame, SparkSession}
4-
//
5-
//object Task4 {
6-
// def main(args: Array[String]) {
7-
// implicit val spark: SparkSession = bootstrapSpark()
8-
// val address: DataFrame = loadJsonFile("Task4_2")
9-
//
10-
// val customerInfo: DataFrame = loadCsvFile("Task4_1.csv")
11-
//
12-
// val enrichedAddress = address.join(customerInfo.as('info), customerInfo("CustomerId") === address("CustomerId"))
13-
// .select()
14-
//
15-
// investigate(enrichedAddress)
16-
//
17-
// spark.stop()
18-
// }
19-
//
20-
// def loadJsonFile(filePath: String)(implicit spark: SparkSession): DataFrame =
21-
// spark.read.json(filePath)
22-
//
23-
// def loadCsvFile(filePath: String)(implicit spark: SparkSession): DataFrame =
24-
// spark.read.option("header", "true").csv(filePath)
25-
//
26-
// def investigate(dataFrame: DataFrame): Unit = {
27-
// dataFrame.show()
28-
// dataFrame.printSchema()
29-
// }
30-
//
31-
// def bootstrapSpark(): SparkSession = {
32-
// val spark = SparkSession.builder
33-
// .appName("Simple Application")
34-
// .master("local")
35-
// .getOrCreate()
36-
//
37-
// spark.sparkContext.setLogLevel("ERROR")
38-
//
39-
// spark
40-
// }
41-
//}
1+
package app.task4
2+
3+
import org.apache.spark.sql.{DataFrame, SparkSession}
4+
5+
object Task4 {
6+
def main(args: Array[String]) {
7+
8+
implicit val spark: SparkSession = bootstrapSpark()
9+
10+
val address: DataFrame = loadJsonFile("task4/Dataset2")
11+
12+
val customerInfo: DataFrame = loadCsvFile("task4/Dataset1.csv")
13+
14+
val enrichedAddress = address.join(customerInfo, "CustomerId")
15+
16+
investigate(enrichedAddress)
17+
18+
spark.stop()
19+
}
20+
21+
def loadJsonFile(filePath: String)(implicit spark: SparkSession): DataFrame =
22+
spark.read.json(filePath)
23+
24+
def loadCsvFile(filePath: String)(implicit spark: SparkSession): DataFrame =
25+
spark.read.option("header", "true").csv(filePath)
26+
27+
def investigate(dataFrame: DataFrame): Unit = {
28+
dataFrame.show()
29+
dataFrame.printSchema()
30+
}
31+
32+
def bootstrapSpark(): SparkSession = {
33+
val spark = SparkSession.builder
34+
.appName("Simple Application")
35+
.master("local")
36+
.getOrCreate()
37+
38+
spark.sparkContext.setLogLevel("ERROR")
39+
40+
spark
41+
}
42+
}
File renamed without changes.
File renamed without changes.

Task4_2 renamed to task4/Dataset2

File renamed without changes.

0 commit comments

Comments
 (0)