Prabhath Kota: March 2020

Mar 29, 2020

Python Puzzle Remove even numbers

Python Puzzle Remove even numbers

# Wrong approach (incorrect - using For loop)

def removeEven(List):

print(id(List)) # 139909029905664

for each in List:

i f each % 2 == 0:

List.remove(each)

myList = [152, 168, 154, 32, -55, 81, 146, -34, -124, -9, 4, -31, -131, -86, -190, -38]

print(id(myList)) # 139909029905664

print(myList) # [1, 2, 4, 5, 10, 6, 3]

removeEven(myList)

print(myList) # [168, 32, -55, 81, -34, -9, -31, -131, -190]

# Wrong as when element gets deleted, index goes down

print('-' * 60)

# Correct approach (using While loop)

def removeEvenNew(List):

print(id(List))

i = 0

while i < len(List):

if List[i] % 2 == 0:

List.remove(List[i])

else:

i += 1

myList = [152, 168, 154, 32, -55, 81, 146, -34, -124, -9, 4, -31, -131, -86, -190, -38]

print(id(myList))

print(myList) #

removeEvenNew(myList)

print(myList) # [-55, 81, -9, -31, -131]

Output:

140407115793920

[152, 168, 154, 32, -55, 81, 146, -34, -124, -9, 4, -31, -131, -86, -190, -38]

140407115793920

[168, 32, -55, 81, -34, -9, -31, -131, -190]

------------------------------------------------------------

140407114861888

[152, 168, 154, 32, -55, 81, 146, -34, -124, -9, 4, -31, -131, -86, -190, -38]

140407114861888

[-55, 81, -9, -31, -131]

Python Lists Advanced

ll = [10, 20, 30, 40, 50]

# insert, remove, pop
ll.remove(20) #[10, 30, 40, 50]
ll.pop() # #[10, 30, 40]

ll = [1, 3, 5, 'seven']
ll.insert(0, 2)
print(ll) # [2, 1, 3, 5, 'seven']

ll.pop(2) # pops 2nd index element
print(ll) # [2, 1, 5, 'seven']

ll.pop() # takes out last item
print(ll) # [2, 1, 5]

# Slice
gg = [1, 3, 5, 'seven', 'eight', 'nine', [10, 20,]]
print(gg[1:4]) # [3, 5, 'seven']
print(gg[3:]) # ['seven', 'eight', 'nine', [10, 20]]
print(gg[:3]) # [1, 3, 5]
print(gg[:]) # [1, 3, 5, 'seven', 'eight', 'nine', [10, 20]]

print(gg[-1:]) # [[10, 20]]
print(gg[:-1]) # [1, 3, 5, 'seven', 'eight', 'nine']

print(gg[-3:-1]) # ['eight', 'nine']

# list[start:stop:step]
print(gg[0:7:2]) # [1, 5, 'eight', [10, 20]]

ff = [1, 3, 5, 'seven', 'eight', 'nine']
print(ff) #[1, 3, 5, 'seven', 'eight', 'nine']
ff[2:2] = ['test']
print(ff) # [1, 3, 'test', 5, 'seven', 'eight', 'nine']
ff[1:3] = []
print(ff) # [1, 5, 'seven', 'eight', 'nine']

del ff[::2] # Delete even numbred indeces
print(ff) # [5, 'eight']

# Concatenate
kk = [1, 2, 3, 4] # [1, 2, 3, 4]
kk += 'ab' # since string, it takes as two elements
print(kk) # [1, 2, 3, 4, 'a', 'b']

kk += ['c', 'd']
print(kk) # [1, 2, 3, 4, 'a', 'b', 'c', 'd']

kk.extend(['e', 'f'])
print(kk) # [1, 2, 3, 4, 'a', 'b', 'c', 'd', 'e', 'f']

# List Vs Array
# Array has homogenous elements
# Python arrays are just wrappers for C language
import array
# type: 'd' (float), initializer list: [1, 2, 3]
newArray = array.array('i', [1, 2, 3])
print(newArray) # array('i', [1, 2, 3])

Python AsyncIO Example

import asyncio
import aiohttp
import time

async def crawl_one_url(url, session):
get_request = session.get(url)
print(url)
res = await get_request
txt = await res.text()
get_request.close()
return txt

async def crawl_urls(urls_to_crawl):
session = aiohttp.ClientSession()

work_to_do = list()
for url in urls_to_crawl:
work_to_do.append(crawl_one_url(url, session))
print(*work_to_do)
res = await asyncio.gather(*work_to_do)
# print(res)
await session.close()
return res

def main():
t0 = time.time()
urls_to_crawl = list()
urls_to_crawl.append('http://blog.prabhathkota.com/search/label/python')
urls_to_crawl.append('http://blog.prabhathkota.com/search/label/perl')
urls_to_crawl.append('http://blog.prabhathkota.com/search/label/unix')
urls_to_crawl.append('http://blog.prabhathkota.com/search/label/aws')
urls_to_crawl.append('http://blog.prabhathkota.com/search/label/java')
asyncio.run(crawl_urls(urls_to_crawl))
elapsed = time.time() - t0
print(f"{len(urls_to_crawl)} URLS downloaded in {elapsed:.2f}")

if __name__ == '__main__':
main()

Output:
<coroutine object crawl_one_url at 0x7f5fe36181c0> <coroutine object crawl_on
e_url at 0x7f5fe3618240> <coroutine object crawl_one_url at 0x7f5fe36182c0> <
coroutine object crawl_one_url at 0x7f5fe3618340> <coroutine object crawl_one
_url at 0x7f5fe36183c0>
http://blog.prabhathkota.com/search/label/python
http://blog.prabhathkota.com/search/label/perl
http://blog.prabhathkota.com/search/label/unix
http://blog.prabhathkota.com/search/label/aws
http://blog.prabhathkota.com/search/label/java
5 URLS downloaded in 0.48

Python Generator Send & Recieve

#############################################
# Yield returns a generator object
# use next() to access the return value
# If you attempt to invoke next() on a generator object that had already produced (yielded) all its values, it will throw StopIteration exception
# We can pass data to a generator function using the send() method defined
# Use generators to generate values and coroutines to consume values. Generator functions receive values are called coroutines.

#############################################

############# Test1 (send & receive) ############
def test_yield_send():
while True:
item = yield
print(f'Received item: {item}')

if __name__ == '__main__':
gen = test_yield_send()
print(gen) #<generator object test_yield at 0x7ff2f0510c80>
next(gen)
gen.send(100)

Output:

Received item: 100

############ Test2 (send & receive) ############
def generate_num():
i = 0
while True:
i += 1
t = (yield i)
print(t)

if __name__ == "__main__":
gen = generate_num()

item = gen.send(None)
print("First received " + str(item))

for i in range(0, 5):
item = gen.send(100 + i)
print("Other received " + str(item))

Output:
First received 1
100
Other received 2
101
Other received 3
102
Other received 4
103
Other received 5
104
Other received 6

Python Generator yield

"""
Yield returns a generator object
use next() to access the return value
If you attempt to invoke next() on a generator object that had already produced (yielded) all its values, it will throw StopIteration exception
It can yield multiple items as well (as shown in second example)

"""

################# Test1 #################
def test_yield():
yield 'hi'

if __name__ == '__main__':
a = test_yield()
print(a) #<generator object test_yield at 0x7ff2f0510c80>
val = next(a)
print(val) # hi

Output:

hi

################# Test2 #################
# You can yield multiple values
def test_yield(name):
yield 'hi'
yield name

if __name__ == '__main__':
a = test_yield('John')
print(a) #<generator object test_yield at 0x7ff2f0510c85>
for item in a:
print(item)

Output:

John

Mar 27, 2020

Python multiprocessing spawn

Fork Vs Spawn
Fork child inherit all resources from the parent process

Fork is the default method for multi-processing
Spawn child doesn't inherit any resources from the parent process other than those required to execute the specified callable target.
Spawning a process is slower (because it re-imports) than forking a process

Spawn
Spawn is essentially a combination of fork followed by an exec system call
When a child process is spawned, anything imported at the module level in the (above )__main__ module of the parent process gets reimported in the child
Anything below __main__ will not be copied

test1.py
#########
Inside test1

test1.py
#########
Inside test2

spawn_test.py
############
from multiprocessing import Process
import test1
import test2

def process_task():
print("I am inside child process")

if __name__ == '__main__':

# Change the method to 'spawn' and verify
# that the modules are reimported in the child process

### Spawn
multiprocessing.set_start_method('spawn')
process = Process(target=process_task)
process.start()
process.join()
print("I am inside parent process")

# Child reimports the test1, test2 module again...

Output:
Inside test1
Inside test2
Inside test1
Inside test2
I am child process
I am parent process

Python multiprocessing fork

Fork
# When we fork, the entire Python process is duplicated in memory including the Python interpreter, code, libraries, current stack, etc.
# This creates a new copy of the python interpreter.
# Fork creates two python interpreters each with its own GIL.
# Fork is faster than Spawn (Fork child inherit all resources from the parent process, Spawn re-imports all above main() method)

# Fork is the default method for multi-processing

#Disadvantages of Fork
# It won't work on windows
# When child shares parent libraries, values, data-structures, if a lock acquired by parent, child ends up waiting for that lock ever
# Very hard to debug when you import a third-party module/library that uses threads behind the scenes
# Fork and Multi-threading won't go well

from multiprocessing import Process
import multiprocessing
import os

file_desc = None

def process_task1():
# write to the file in child process
file_desc.write(f"\nWritten by child process with id {os.getpid()}")
file_desc.flush()

if __name__ == '__main__':
# create a file in the parent process
file_desc = open("sample.txt", "w")
file_desc.write(f"\nWritten by parent process with id {os.getpid()}")
file_desc.flush()

# Fork is default method to create a process
# multiprocessing.set_start_method('fork')

p = Process(target=process_task1)
p.start()
p.join()
file_desc.close()

file_des = open("sample.txt", "r")
print(file_des.read())

os.remove("sample.txt")

Output:
Written by parent process with id 288
Written by child process with id 294

Mar 25, 2020

python how to overcome GIL

Ref: https://realpython.com/python-gil/

# single_threaded.py
import time
from threading import Thread

COUNT = 50000000

def countdown(n):
while n>0:
n -= 1

start = time.time()
countdown(COUNT)
end = time.time()

print('Time taken in seconds -', end - start)
#Time taken in seconds - 6.20024037361145

# multi_threaded.py
import time
from threading import Thread

COUNT = 50000000

def countdown(n):
while n>0:
n -= 1

t1 = Thread(target=countdown, args=(COUNT//2,))
t2 = Thread(target=countdown, args=(COUNT//2,))

start = time.time()
t1.start()
t2.start()
t1.join()
t2.join()
end = time.time()

print('Time taken in seconds -', end - start)
# Time taken in seconds - 6.924342632293701
# No improvement here due to GIL & due to sharing of lock & releases

# multi_processing.py
# Uses cores instead of threads
from multiprocessing import Pool
import time

COUNT = 50000000
def countdown(n):
while n>0:
n -= 1

if __name__ == '__main__':
pool = Pool(processes=2)
start = time.time()
r1 = pool.apply_async(countdown, [COUNT//2])
r2 = pool.apply_async(countdown, [COUNT//2])
pool.close()
pool.join()
end = time.time()
print('Time taken in seconds -', end - start)
#Time taken in seconds - 4.060242414474487

Python GIL, lock, mutex, semaphore

Lock
#####
A lock allows only one thread to enter the part that's locked and the lock is not shared with any other processes.

Mutex
#######
Mutex as the name means mutual exclusion.
A mutex is used to guard shared data such as a list, dict.
A mutex allows only a single thread to access a resource or critical section.
A mutex is the same as a lock but it can be system wide (shared by multiple processes).
Same thread can acquire and release lock
Owned by thread

Semaphore
##########
Semaphore, is used for limiting access to a collection of resources.
A semaphore does the same as a mutex but allows x number of threads to enter, this can be used
E.g., to limit the number of cpu, io or ram intensive tasks running at the same time.
Pool of DB connections to be handed out to requesting threads
Different threads can call acquire and release on the semaphore
No ownership

GIL
####
Ref: https://realpython.com/python-gil/

The Python interpreter can only execute a single thread at a time
If your machine has one or hundred processors, the Python interpreter is only able to run a single thread at a time using a single processor.
Two threads on a machine with two available processors can't be executed in parallel each running on a single CPU.
This lock is known as the Global Interpreter Lock using CPython (to avoid deadlocks)
Python implementations which overcome the GIL altogether. E.g., Jython, IronPython and pypy-stm.

Python uses reference counting for memory management. (unlike garbage collection in other languages)
It means that objects created in Python have a reference count variable that keeps track of the number of references that point to the object. When this count reaches zero, the memory occupied by the object is released.

This reference count variable can be kept safe by adding locks to all data structures that are shared across threads so that they are not modified inconsistently.

But adding a lock to each object or groups of objects means multiple locks will exist which can cause another problem—Deadlocks (deadlocks can only happen if there is more than one lock). Another side effect would be decreased performance caused by the repeated acquisition and release of locks.

The GIL is a single lock on the interpreter itself which adds a rule that execution of any Python bytecode requires acquiring the interpreter lock. This prevents deadlocks (as there is only one lock) and doesn’t introduce much performance overhead. But it effectively makes any CPU-bound Python program single-threaded.

Python sum using multiprocessing pool

#####
# Read all text files & calculate Sum of all numbers in txt files

# Threading/input1.txt
# 100
# 200
# 300
# 400
# 500

# Threading/input2.txt
# 600
# 700
# 800
# 900
# 1000
#####

import glob
import multiprocessing

def get_txt_files():
my_files = []
for each in glob.glob('Threading/*.txt'):
my_files.append(each)
return my_files

def calculate_sum(input_file):
with open(input_file) as fh:
return sum(map(int, fh.readlines()))

if __name__ == '__main__':
my_files = get_txt_files()
#['Threading/input1.txt', 'Threading/input2.txt']
print(my_files)

# sys.exit()
pool = multiprocessing.Pool(processes=2)
results = pool.map(calculate_sum, my_files)
print(f'Sum of individual files: {results}')
total_sum = sum(results)
print(f'Total Sum: {total_sum}')

Output:
######
['Threading/input1.txt', 'Threading/input2.txt']
Sum of individual files: [1500, 4000]
Total Sum: 5500

Python thread lock release

#####
#If you don't use lock, sum will be not printed as 500000
#If you use lock, you get the sum as expected 500000 (5 threadsList, 100000 each)
#####

from threading import Thread
from threading import Lock
import sys

class CounterClass:
def __init__(self):
self.count = 0
self.lock = Lock()

def increment(self):
for _ in range(100000):
self.lock.acquire()
self.count += 1
self.lock.release()

if __name__ == "__main__":

# Sets the thread switch interval
sys.setswitchinterval(0.005)

numThreads = 5
threadsList = []
counterObj = CounterClass()

for i in range(0, numThreads):
threadsList.append(Thread(target=counterObj.increment))

for i in range(0, numThreads):
threadsList[i].start()

for i in range(0, numThreads):
threadsList[i].join()

if counterObj.count != 500000:
print(" count = {0}".format(counterObj.count))
else:
print(" count = 500000")

Mar 24, 2020

Python Palindrome

Using Recursion

def isPalindrome(testVariable):
print(testVariable)
if len(testVariable) <= 1:
return True

length = len(testVariable)
if testVariable[0] == testVariable[length-1]:
return isPalindrome(testVariable[1:length-1])

return False

a = isPalindrome('MADAM')
print(a) ## True

Using normal way

def isPalindrome(testVariable):
if testVariable == testVariable[::-1]:
return True
return False

a = isPalindrome('MADAM')
print(a) ## True

Python recursion

def factorial(num):
if num == 1:
return num
else:
return num * factorial(num-1)

target = factorial(5)
print(target) ## 120

def square(num):
if num == 1:
return num
else:
return square(num-1) + 2*num - 1

target = square(6)

print(target) ## 36

python reverse string

def reverse_fuc(input_str):
reverse = ''
length = len(input_str) - 1
while length >= 0:
reverse = reverse + input_str[length]
length = length - 1
return reverse

target = reverse_fuc('prabhath')
print(target) #htahbarp