Mar 29, 2020

Python AsyncIO Example

import asyncio
import aiohttp
import time

async def crawl_one_url(url, session):
    get_request = session.get(url)
    print(url)
    res = await get_request
    txt = await res.text()
    get_request.close()
    return txt


async def crawl_urls(urls_to_crawl):
    session = aiohttp.ClientSession()

    work_to_do = list()
    for url in urls_to_crawl:
        work_to_do.append(crawl_one_url(url, session))
    print(*work_to_do)
    res = await asyncio.gather(*work_to_do)
    # print(res)
    await session.close()
    return res


def main():
    t0 = time.time()
    urls_to_crawl = list()
    urls_to_crawl.append('http://blog.prabhathkota.com/search/label/python')
    urls_to_crawl.append('http://blog.prabhathkota.com/search/label/perl')
    urls_to_crawl.append('http://blog.prabhathkota.com/search/label/unix')
    urls_to_crawl.append('http://blog.prabhathkota.com/search/label/aws')
    urls_to_crawl.append('http://blog.prabhathkota.com/search/label/java')
    asyncio.run(crawl_urls(urls_to_crawl))
    elapsed = time.time() - t0
    print(f"{len(urls_to_crawl)} URLS downloaded in {elapsed:.2f}")


if __name__ == '__main__':
    main()


Output:
<coroutine object crawl_one_url at 0x7f5fe36181c0> <coroutine object crawl_on
e_url at 0x7f5fe3618240> <coroutine object crawl_one_url at 0x7f5fe36182c0> <
coroutine object crawl_one_url at 0x7f5fe3618340> <coroutine object crawl_one
_url at 0x7f5fe36183c0>
http://blog.prabhathkota.com/search/label/python
http://blog.prabhathkota.com/search/label/perl
http://blog.prabhathkota.com/search/label/unix
http://blog.prabhathkota.com/search/label/aws
http://blog.prabhathkota.com/search/label/java
5 URLS downloaded in 0.48

No comments:

Post a Comment