Apr 18, 2023

Find Employee whose salary greater than the Manager Salary

 #EMP Salary > Manager Salary

from pyspark.sql.functions import *
from pyspark.sql import SparkSession
spark = SparkSession.builder.master('local')\
.appName('scdType2')\
.getOrCreate()
data1 = [
(1, "John", 35000, None),
(2, "Peter", 45000, 1),
(3, "Sam", 5000, 2),
(4, "Ramu", 55000, 2)]
columns1 = ['id', 'name', 'salary', 'mgr_id']
empDF = spark.createDataFrame(data = data1, schema = columns1)
empDF.show()

empDF.alias('E').join(empDF.alias('M'), on=[col("E.mgr_id") == col("M.id")], how='inner')\
.filter("E.salary > M.salary").show()

Output:


No comments:

Post a Comment