Apr 18, 2023

Pyspark - join & keep right dataframe data

 #Pyspark - join & keep right dataframe data


from pyspark.sql.functions import *
from pyspark.sql import SparkSession
spark = SparkSession.builder.master('local')\
.appName('scdType2')\
.getOrCreate()

data1 = [
(1, 100),
(2, 200),
(3, 300)
]
change_data1 = [(1, 111), (3, 333)]
columns1 = ['id', 'num']

dataDF = spark.createDataFrame(data=data1, schema=columns1)
dataDF.show()
changeDF = spark.createDataFrame(data=change_data1, schema=columns1)
changeDF.show()

dataDF.alias('src').join(changeDF.alias('chg'), on=["id"], how="inner").show()

dataDF.drop('num').alias('src').join(changeDF.alias('chg'), on=["id"], how="inner").show()

Output:


No comments:

Post a Comment