May 13, 2023

Pyspark SQL Refresh table

 

REFRESH TABLE - Newly Inserted Hive records do not show in Spark Session of Spark Shell

#txnrecords_part_all_sat is partitioned table on Country

df33 = spark.sql("select * from test.txnrecords_part_all_sat where country='US'")

df33.count() #95905


#Inserted record in Hive

hive> insert into txnrecords_part_all_sat partition(country='US') values(13111,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL);


df33 = spark.sql("select * from test.txnrecords_part_all_sat where country='US'")

df33.count() #95905  (new record inserted not showing yet)


#Use refresh

spark.sql('REFRESH TABLE test.txnrecords_part_all_sat')


df33 = spark.sql("select * from test.txnrecords_part_all_sat where country='US'")

df33.count() #95906  (updated now)


No comments:

Post a Comment