from pyspark.sql import SparkSession

# 创建 SparkSession
spark = SparkSession.builder \\
    .appName("RDD vs Spark SQL") \\
    .getOrCreate()

# 创建一个 RDD
rdd = spark.sparkContext.parallelize([(1, 'Alice'), (2, 'Bob'), (3, 'Charlie')])

# 将 RDD 转换为 DataFrame
df = spark.createDataFrame(rdd, schema=['id', 'name'])

# 注册 DataFrame 为临时视图
df.createOrReplaceTempView("people")

# 使用 Spark SQL 查询
result_sql = spark.sql("SELECT * FROM people WHERE id = 2")

# 使用 RDD 转换操作
result_rdd = rdd.filter(lambda x: x[0] == 2)

# 输出结果
print("Result using Spark SQL:")
result_sql.show()
print("Result using RDD:")
print(result_rdd.collect())

# 停止 SparkSession
spark.stop()