import urllib.request
from concurrent.futures import ThreadPoolExecutor, as_completed
URLS = ['https://www.google.com',
'http://www.cnn.com/',
'http://europe.wsj.com/',
'http://www.bbc.co.uk/',
'http://abc.abc.com' #invalid
]
def load_url(url, timeout):
with urllib.request.urlopen(url, timeout=timeout) as conn:
txt = conn.read()
return txt
with ThreadPoolExecutor(max_workers = 5) as executor:
#Forming Key-Value pairs
future_to_url = {executor.submit(load_url, url, 50): url for url in URLS}
print(future_to_url)
print('----')
for future in as_completed(future_to_url):
url = future_to_url[future]
try:
data = future.result()
print('%s length is %d' % (url, len(data)))
except Exception as e:
print('Error in URL: %s is %s' % (url, e))
Output:
{<Future at 0x7f40d262d2d0 state=running>: 'https://www.google.com', <Future at 0x7f40cadc4ad0 state=running>: 'http://www.cnn.com/', <Future at 0x7f40cadcd710 state=running>: 'http://europe.wsj.com/', <Future at 0x7f40cadcd410 state=running>: 'http://www.bbc.co.uk/', <Future at 0x7f40cade0a10 state=running>: 'http://abc.abc.com'}
----
Error in URL: http://abc.abc.com is <urlopen error [Errno -2] Name or servicenot known>
https://www.google.com length is 12571
http://www.cnn.com/ length is 1134562
http://europe.wsj.com/ length is 1006417
http://www.bbc.co.uk/ length is 311008
from concurrent.futures import ThreadPoolExecutor, as_completed
URLS = ['https://www.google.com',
'http://www.cnn.com/',
'http://europe.wsj.com/',
'http://www.bbc.co.uk/',
'http://abc.abc.com' #invalid
]
def load_url(url, timeout):
with urllib.request.urlopen(url, timeout=timeout) as conn:
txt = conn.read()
return txt
with ThreadPoolExecutor(max_workers = 5) as executor:
#Forming Key-Value pairs
future_to_url = {executor.submit(load_url, url, 50): url for url in URLS}
print(future_to_url)
print('----')
for future in as_completed(future_to_url):
url = future_to_url[future]
try:
data = future.result()
print('%s length is %d' % (url, len(data)))
except Exception as e:
print('Error in URL: %s is %s' % (url, e))
Output:
{<Future at 0x7f40d262d2d0 state=running>: 'https://www.google.com', <Future at 0x7f40cadc4ad0 state=running>: 'http://www.cnn.com/', <Future at 0x7f40cadcd710 state=running>: 'http://europe.wsj.com/', <Future at 0x7f40cadcd410 state=running>: 'http://www.bbc.co.uk/', <Future at 0x7f40cade0a10 state=running>: 'http://abc.abc.com'}
----
Error in URL: http://abc.abc.com is <urlopen error [Errno -2] Name or servicenot known>
https://www.google.com length is 12571
http://www.cnn.com/ length is 1134562
http://europe.wsj.com/ length is 1006417
http://www.bbc.co.uk/ length is 311008
No comments:
Post a Comment