Prabhath Kota: February 2019

Feb 27, 2019

What is the difference between init and call?

# __init__ method is used when the class is called to initialize the instance
# while the __call__ method is called when the instance is called

class Foo:
def __init__(self, a, b, c):
print 'inside Foo __init__'
def __call__(self, a, b, c):
print 'inside Foo __call__'

class Bar:
def __init__(self):
print 'inside Bar __init__'
def __call__(self, a, b, c):
print 'inside Bar __call__'

f = Foo(1, 2, 3) # __init__
b = Bar()
b(1, 2, 3) # __call__

Output:

inside Foo __init__

inside Bar __init__

inside Bar __call__

Feb 22, 2019

all in Python

__all__ in a module

e.g. module.py:

__all__ = ['foo', 'Bar']
means that when you import * from the module, only those names in the __all__ are imported:

from module import *
# imports only foo and Bar (through wild card)

test1.py

__all__ = ['var1', 'func1']

var1 = 100

def func1():
return 'func1'

def func2():
return 'func2'

test2.py
from test import var1, func1, func2
print var1
print func1()
print func2()

Output:
100
func1
func2

test3.py
from test import *

print var1
print func1()
print func2() #NameError: name 'func2' is not defined

Output:
100
func1
NameError: name 'func2' is not defined

Python name, main

test1.py
if __name__ == '__main__':
print 'inside test1 main...'
else:
print 'test1 imported: ' + __name__

test2.py
import test1

if __name__ == '__main__':
print 'inside test2 main...'
else:
print 'test2 imported: ' + __name__

test3.py
import test2

if __name__ == '__main__':
print 'inside test3 main...'
else:
print 'test3 imported: ' + __name__

python test1.py

Output:

inside test1 main...

python test2.py

Output:

test1 imported: test1

inside test2 main...

python test3.py ##(from test1 -> test2 -> test3)

Output:

test1 imported: test1

test2 imported: test2

inside test3 main...

Python multi threading Vs multi processing

Python multi threading Vs multi processing

Ref:
https://medium.com/@nbosco/multithreading-vs-multiprocessing-in-python-c7dc88b50b5b

#Threading
#The Python threading module uses threads instead of processes. Threads run in the same unique memory heap.
#Whereas Processes run in separate memory heaps. This, makes sharing information harder with processes and object instances.
#One problem arises because threads use the same memory heap, multiple threads can write to the same location in the memory heap which is why the global interpreter lock(GIL) in CPython was created as a mutex to prevent it from happening.

#Multiprocessing

#The multiprocessing library uses separate memory space, multiple CPU cores, bypasses GIL limitations in CPython, child processes are killable(ex. function calls in program) and is much easier to use.

#Some caveats of the module are a larger memory footprint and IPC’s a little more complicated with more overhead.

import threading

#Threading
#The Python threading module uses threads instead of processes. Threads run in the same unique memory heap.
#Whereas Processes run in separate memory heaps. This, makes sharing information harder with processes and object instances.
#One problem arises because threads use the same memory heap, multiple threads can write to the same location in the memory heap which is why the global interpreter lock(GIL) in CPython was created as a mutex to prevent it from happening.

def calc_square(number):
print('Square:' , number * number)

def calc_quad(number):
print('Quad:' , number * number * number * number)

if __name__ == "__main__":
number = 7
thread1 = threading.Thread(target=calc_square, args=(number,))
thread2 = threading.Thread(target=calc_quad, args=(number,))
# Will execute both in parallel
thread1.start()
thread2.start()
# Joins threads back to the parent process, which is this
# program
thread1.join()
thread2.join()
# This program reduces the time of execution by running tasks in parallel

import multiprocessing

#Some caveats of the module are a larger memory footprint and IPC’s a little more complicated with more overhead.

def calc_square(number):

print('Square:' , number * number)

result = number * number

print(result)

def calc_quad(number):

print('Quad:' , number * number * number * number)

if __name__ == "__main__":

number = 7

result = None

p1 = multiprocessing.Process(target=calc_square, args=(number,))

p2 = multiprocessing.Process(target=calc_quad, args=(number,))

p1.start()

p2.start()

p1.join()

p2.join()

# Wont print because processes run using their own memory location

print(result)

Python read from XLS/XLSX

import xlrd

loc = ("/tmp/Students.xlsx")

wb = xlrd.open_workbook(loc)

sheet1 = wb.sheet_by_index(0)
sheet2 = wb.sheet_by_index(1)

#Sheet1
for i in range(1, sheet1.nrows):
each_list = sheet1.row_values(i)
print each_list

#Sheet2
for i in range(1, sheet1.nrows):
each_list = sheet2.row_values(i)
print each_list

Python Find second smallest number

Python Find second smallest number

def get_second_smallest(numbers):
num1, num2 = float('inf'), float('inf')
print num1, num2
print '#####'
for x in numbers:
if x <= num1:
num1, num2 = x, num1
print num1, '-------', num2
elif x < num2:
num2 = x
print num1, '-------', num2
else:
print 'pass... ' + str(x)
return num2

out = get_second_smallest([2, 3, 4, 5, 1, 1.3, 1.5])
print '\nOutput: ' + str(out)

Output:
2 ------- inf
2 ------- 3
pass... 4
pass... 5
1 ------- 2
1 ------- 1.3
pass... 1.5

Output: 1.3

AWS Create/change key pair for EC2 instance

AWS Create/change key pair for EC2 instance

Under EC2 -> Network & Security -> Key Pairs -> Create with name: Mirror
It will give you Mirror.pem

PuyttyGen:
PuttyGen -> Load this PEM -> Generate
Download public key as Mirror.pub & private key as Mirror.ppk

Login in instance:
vim ~/.ssh/authorized_keys
Open Mirror.pub, copy whole key as single line, without new line characters (after Comment: and before END SSH2 PUBLIC KEY)

ssh-rsa <copied text from above> <Mirror(without pem extension)>

e.g.,
ssh-rsa AAAAB3NzaC1yc2EAAA...urR2A5IUkqscHRU1Nc7TFz363UFJW6XMYae1116PO4 Mirror

Setup Mirror under Putty like below:
Host Name -> EC2 Host Name
Connection -> SSH -> Auth -> Add Mirror.pek path & save

Now login into EC2 as root

(it should not ask any password)

Feb 19, 2019

How to install pyspark in centos

Install spark ref:
http://devopspy.com/python/apache-spark-pyspark-centos-rhel/

cd /opt
wget http://www-eu.apache.org/dist/spark/spark-2.2.1/spark-2.2.1-bin-hadoop2.7.tgz
ln -s spark-2.4.0-bin-hadoop2.7 spark
check /etc/hosts

How to set path?
export SPARK_HOME = /opt/spark
export PATH = $PATH:/opt/spark
export export PYTHONPATH=$SPARK_HOME/python/lib/py4j-0.10.4-src.zip:$SPARK_HOME/python/lib/pyspark.zip:$PYTHONPATH
export PATH = $SPARK_HOME/python:$PATH

How to start master?
./sbin/start-master.sh

1) If you get error like blow:
"hostname: Unknown host" start-master.sh
set the hostname properly
hostname test.com
hostname -f #should give you some output

2) If you get error like below:
Getting "Unsupported major.minor version 52.0" exception while using
Spark Web Application framework
check Java version of jar files (/opt/spark/jars) and your installed java

How to start spark master?
cd /opt/spark
./sbin/start-master.sh
This internally runs command like below:
Spark Command: /opt/java/jdk1.8.0_201/bin/java -cp /opt/spark/conf/:/opt/spark/jars/* -Xmx1g org.apache.spark.deploy.master.Master --host test.com --port 7077 --webui-port 8080

How to access from web?
test.com:8080 (port: 8080)

How to start spark shell?
cd /opt/spark
./bin/pyspark
FYI, This internally runs command like below:
/opt/java/jdk1.8.0_201/bin/java -cp /opt/spark/conf/:/opt/spark/jars/* -Xmx1g org.apache.spark.deploy.SparkSubmit --name PySparkShell pyspark-shell

How to access the spark process in ps commands?
ps -ef | grep spark
e.g.,root 13770 1 0 14:18 pts/0 00:00:10 /opt/java/jdk1.8.0_201/bin/java -cp /opt/spark/conf/:/opt/spark/jars/* -Xmx1g org.apache.spark.deploy.master.Master --host test.com --port 7077 --webui-port 8080

PIP modules to install
pip install py4j

How to access in web?

http://localhost:8080

How to install java on centos

How to install java on centos?

Ref:
https://tecadmin.net/install-java-8-on-centos-rhel-and-fedora/

How to find java version of jar file

unzip test.jar
check for the file:
META-INF/MANIFEST.MF (to get more details)

You see details like:
Created-By: 1.8.0_144 (Oracle Corporation)
Bundle-Version: 1.3.2.2

Feb 14, 2019

Backup Apache log files using logrotate

/etc/logrotate.d/httpd

If you want to backup apache logs for all instances (you can copy this logrotate script/snippet in /etc/logrotate.d/httpd)

Prerequisite: setup s3cmd

/var/log/httpd/*log {
daily
dateext
#dateext dateformat -%Y-%m-%d-%s
missingok
notifempty
#size 3M
sharedscripts
delaycompress
rotate 4
create
postrotate
/sbin/service httpd reload > /dev/null 2>/dev/null || true

BUCKET=logs-backup
INSTANCE_ID=`curl --silent http://169.254.169.254/latest/meta-data/instance-id`
/usr/local/bin/s3cmd -c /root/.s3cfg -m text/plain sync /var/log/httpd/access_log* s3://${BUCKET}/system_logs/httpd/${INSTANCE_ID}/
/usr/local/bin/s3cmd -c /root/.s3cfg -m text/plain sync /var/log/httpd/error_log* s3://${BUCKET}/system_logs/httpd/${INSTANCE_ID}/

endscript
}

Feb 12, 2019

Python csv write

from users.models import *
import sys

up = UserProfile.objects.all()
print up.count()

header = "Name, User Type, Email, Phone, City, Country, TimeZone, Created Date\n"

fname = "/tmp/user_data.csv"
with open(fname, 'w') as FW:
FW.write(header)
for each in up:
try:
name = each.name
name = name.encode('utf-8').strip()
dts = each.created_date
dts = dts.strftime("%d %b %Y") #dts.strftime("%b %d %Y %I:%M %p")
str1 = "%s,%s,%s,%s,%s,%s,%s,%s\n" % (str(name), str(each.usertype), str(each.email_id), str(each.phone), str(each.city), str(each.country), str(each.timezone), str(dts))
#print str1
FW.write(str1)
except Exception, e:
print '---- Error: ' + str(e)

print fname

Output:
Name, User Type, Email, Phone, City, Country, TimeZone, Created Date
User2,Student,user2@abc.com,None,Bangalore,IN,Asia/Kolkata,16 Mar 2018
User3,Student,user3@gmail.com,None,Pune,IN,Asia/Kolkata,28 Mar 2018
....

Feb 5, 2019

Python Zip

"""
input_data.txt
#########
First row contains column name
Second row contains data type
From third row, it contains student records.
How to map all the reocords using zip function?

name, age, city
varchar, int, varchar
prabhath, 32, bangalore
vamsi, 30, hyderabad
lakshmi, 30, vizag
ramesh, 50, chennai

"""

with open('input_data.txt') as input_file:
rows = input_file.readlines()

rows = [row.strip() for row in rows]
rows = [row.split(',') for row in rows]
print rows

columns = rows[0]
data_types = rows[1]
rows = rows[2:]

for row in rows:
print '----'
for column, data_type, val in zip(columns, data_types, row):
print column.strip(), data_type.strip(), val.strip()

"""
Output:

[['name', ' age', ' city'], ['varchar', ' int', ' varchar'], ['prabhath', ' 32', ' bangalore'], ['vamsi', ' 30', ' hyderabad'], ['lakshmi', ' 30', ' vizag'], ['ramesh', ' 50', ' chennai']]
----
name varchar prabhath
age int 32
city varchar bangalore
----
name varchar vamsi
age int 30
city varchar hyderabad
----
name varchar lakshmi
age int 30
city varchar vizag
----
name varchar ramesh
age int 50
city varchar chennai
"""

Python read characters vertically in a file

with open('input_vertical.txt') as input_file:
rows = input_file.readlines()
print rows
print '---'
rows = [row.strip() for row in rows]
print rows
##The single star * unpacks the sequence/collection into positional arguments
rows = zip(*rows)
print rows
rows = [''.join(row) for row in rows]
print rows

"""
Output:

['prabhath\n', 'test\n', 'vertical\n', 'lines']
---
['prabhath', 'test', 'vertical', 'lines']
[('p', 't', 'v', 'l'), ('r', 'e', 'e', 'i'), ('a', 's', 'r', 'n'), ('b', 't', 't', 'e')]
['ptvl', 'reei', 'asrn', 'btte']
"""

Python week of the month

from math import ceil
from datetime import datetime

def week_of_month(dt):
#This will add the first day of the month weekday & present day of month
first_day = dt.replace(day=1)
day_of_month = dt.day
#print day_of_month
adj_day_of_month = day_of_month + first_day.weekday()
return int(ceil(adj_day_of_month/7.0))

if __name__ == '__main__':
#dt = datetime.now()
#dt = datetime.strptime('Feb 21 2019 1:00PM', '%b %d %Y %I:%M%p')
dt = datetime.strptime('Feb 19 2019', '%b %d %Y')
print dt
print week_of_month(dt)

"""
Output:
2019-02-19 00:00:00
3
"""

Django update UTC time to MySQL

from datetime import datetime
from django.utils.timezone import utc

sch = Schedule.objects.get(id=25)
sch.start_time = datetime.utcnow().replace(tzinfo=utc)
sch.save()

Compare List Vs List Comprehension, timeit

import timeit

# timeit.timeit(stmt, setup, timer, number)
# stmt - which is the statement you want to measure it defaults to 'pass'.
# setup - which is the code that you run before running the stmt; it defaults to 'pass'. We generally use this to import the required modules for our code.
# number - number of executions you like to run the stmt.

# List Comprehension

lst = [3, 2, 41, 3, 34, 99]
print(lst)

# If Condition alone - keep after the For loop
print([number for number in lst if number % 2 != 0])

# If-Else Condition - keep before For loop
print([number if number % 2 != 0 else -1 for number in lst])

#List Vs List Comprehension
#Ref: https://stackoverflow.com/questions/16341775/what-is-the-advantage-of-a-list-comprehension-over-a-for-loop
#List comprehensions are more compact and faster than an explicit for loop building a list:
#This is because calling .append() on a list causes the list object to grow (in chunks) to make space for new elements individually, while the list comprehension gathers all elements first before creating the list to fit the elements in one go:

def for_loop_test():
result = []
for elem in iter_var:
result.append(elem)
return result

def list_comprehension_test():
return [elem for elem in iter_var]

if __name__ == '__main__':
iter_var = range(1000)
print timeit.timeit('f()', 'from __main__ import for_loop_test as f', number=10000)
print timeit.timeit('f()', 'from __main__ import list_comprehension_test as f', number=10000)

"""
Output:
1.41242463295
0.474811508482
"""'