Data I/O
Reading & Writing Data
import pyspark
from pyspark import SparkConf
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("Spark SQL example").getOrCreate()
print(spark.sparkContext.getConf().getAll())
path = "data/sparkify_log_small.json"
user_log = spark.read.json(path)
print(user_log.printSchema())
print(user_log.describe())
print(user_log.show(n=1))
print(user_log.take(5))
out_path = "data/sparkify_log_small.csv"
user_log.write.save(out_path, format="csv", header=True)
user_log_2 = spark.read.csv(out_path, header=True)
print(user_log_2.printSchema())
print(user_log_2.take(2))
print(user_log_2.select("userID").show())Imperative vs Declarative Programming
Last updated