#Pyspark - concat list of all items per day
from pyspark.sql.functions import *
from pyspark.sql import SparkSession
spark = SparkSession.builder.master('local')\
.appName('scdType2')\
.getOrCreate()
data1 = [
('2021-02-22', 'cricket_bat'),
('2021-02-22', 'cricket_ball'),
('2021-02-22', 'cricket_glove'),
('2021-02-23', 'shuttle_cock'),
('2021-02-24', 'shuttle_racket')
]
columns1 = ['date', 'product']
dataDF = spark.createDataFrame(data=data1, schema=columns1)
dataDF.show()
dataDF.groupBy('date').agg(count('product').alias('count'),
collect_list('product').alias('list'),
concat_ws(',', collect_list('product')).alias('concat')
).show(truncate=False)
Output:
No comments:
Post a Comment