May 19, 2020

Pandas Tutorial 2 (data formats, db, files, pickle)

import numpy as np
import pandas as pd

# nrows - just read first N rows
# usecols - read only those columns
df = pd.read_csv("input.csv", index_col="Id")
print(df.head(10))
# Name Age City
# Id
# 1 John 30 Bangalore
# 2 Doe 25 Chennai
# 3 Mary 22 Hyderabad
# 4 Tom 35 Mumbai

df = pd.read_csv("input.csv", index_col="Id", nrows=2, usecols=['Id', 'Name'])
print(df.head(10))
# Name
# Id
# 1 John
# 2 Doe

# Serialize & save to disk
df = pd.read_csv("input.csv", index_col="Id")
df.to_pickle('data_frame.pickle')

df1 = pd.read_pickle('data_frame.pickle')
print(df1.head(10))
# Name Age City
# Id
# 1 John 30 Bangalore
# 2 Doe 25 Chennai
# 3 Mary 22 Hyderabad
# 4 Tom 35 Mumbai

# Dataframe To Table
table_dtype = {
'name': VARCHAR(),
'city': VARCHAR(),
'state': VARCHAR(),
'pincode': INTEGER()
}
df1.to_sql(con=db_conn, name=table_name, dtype=table_dtype,
if_exists='replace', index=False)


# From Table to Dataframe
raw_data_sql = f""" select id, name, city, state
from event_raw_data
"""
df1 = pd.read_sql(raw_data_sql, con=db_conn, index_col="id")





No comments:

Post a Comment