Apr 18, 2023

Pyspark - concat list of all items per day

 #Pyspark - concat list of all items per day


from pyspark.sql.functions import *
from pyspark.sql import SparkSession
spark = SparkSession.builder.master('local')\
.appName('scdType2')\
.getOrCreate()

data1 = [
('2021-02-22', 'cricket_bat'),
('2021-02-22', 'cricket_ball'),
('2021-02-22', 'cricket_glove'),
('2021-02-23', 'shuttle_cock'),
('2021-02-24', 'shuttle_racket')
]

columns1 = ['date', 'product']
dataDF = spark.createDataFrame(data=data1, schema=columns1)
dataDF.show()

dataDF.groupBy('date').agg(count('product').alias('count'),
collect_list('product').alias('list'),
concat_ws(',', collect_list('product')).alias('concat')
).show(truncate=False)


Output:



No comments:

Post a Comment