Sep 24, 2019

Python concurrent.futures ThreadPoolExecutor as_completed

import urllib.request 
from concurrent.futures import ThreadPoolExecutor, as_completed

URLS = ['https://www.google.com', 
               'http://www.cnn.com/',
               'http://europe.wsj.com/', 
               'http://www.bbc.co.uk/', 
               'http://abc.abc.com'   #invalid
             ]

def load_url(url, timeout):
  with urllib.request.urlopen(url, timeout=timeout) as conn:
    txt = conn.read()
    return txt

with ThreadPoolExecutor(max_workers = 5) as executor:
  #Forming Key-Value pairs
  future_to_url = {executor.submit(load_url, url, 50): url for url in URLS}
  print(future_to_url)
  print('----')
  for future in as_completed(future_to_url):
    url = future_to_url[future]
    try:
      data = future.result()
      print('%s length is %d' % (url, len(data)))
    except Exception as e:
      print('Error in URL: %s is %s' % (url, e))


Output:
{<Future at 0x7f40d262d2d0 state=running>: 'https://www.google.com', <Future at 0x7f40cadc4ad0 state=running>: 'http://www.cnn.com/', <Future at 0x7f40cadcd710 state=running>: 'http://europe.wsj.com/', <Future at 0x7f40cadcd410 state=running>: 'http://www.bbc.co.uk/', <Future at 0x7f40cade0a10 state=running>: 'http://abc.abc.com'}
----
Error in URL: http://abc.abc.com is <urlopen error [Errno -2] Name or servicenot known>
https://www.google.com length is 12571
http://www.cnn.com/ length is 1134562
http://europe.wsj.com/ length is 1006417
http://www.bbc.co.uk/ length is 311008

No comments:

Post a Comment