import asyncio
import aiohttp
import time
async def crawl_one_url(url, session):
get_request = session.get(url)
print(url)
res = await get_request
txt = await res.text()
get_request.close()
return txt
async def crawl_urls(urls_to_crawl):
session = aiohttp.ClientSession()
work_to_do = list()
for url in urls_to_crawl:
work_to_do.append(crawl_one_url(url, session))
print(*work_to_do)
res = await asyncio.gather(*work_to_do)
# print(res)
await session.close()
return res
def main():
t0 = time.time()
urls_to_crawl = list()
urls_to_crawl.append('http://blog.prabhathkota.com/search/label/python')
urls_to_crawl.append('http://blog.prabhathkota.com/search/label/perl')
urls_to_crawl.append('http://blog.prabhathkota.com/search/label/unix')
urls_to_crawl.append('http://blog.prabhathkota.com/search/label/aws')
urls_to_crawl.append('http://blog.prabhathkota.com/search/label/java')
asyncio.run(crawl_urls(urls_to_crawl))
elapsed = time.time() - t0
print(f"{len(urls_to_crawl)} URLS downloaded in {elapsed:.2f}")
if __name__ == '__main__':
main()
Output:
<coroutine object crawl_one_url at 0x7f5fe36181c0> <coroutine object crawl_on
e_url at 0x7f5fe3618240> <coroutine object crawl_one_url at 0x7f5fe36182c0> <
coroutine object crawl_one_url at 0x7f5fe3618340> <coroutine object crawl_one
_url at 0x7f5fe36183c0>
http://blog.prabhathkota.com/search/label/python
http://blog.prabhathkota.com/search/label/perl
http://blog.prabhathkota.com/search/label/unix
http://blog.prabhathkota.com/search/label/aws
http://blog.prabhathkota.com/search/label/java
5 URLS downloaded in 0.48
import aiohttp
import time
async def crawl_one_url(url, session):
get_request = session.get(url)
print(url)
res = await get_request
txt = await res.text()
get_request.close()
return txt
async def crawl_urls(urls_to_crawl):
session = aiohttp.ClientSession()
work_to_do = list()
for url in urls_to_crawl:
work_to_do.append(crawl_one_url(url, session))
print(*work_to_do)
res = await asyncio.gather(*work_to_do)
# print(res)
await session.close()
return res
def main():
t0 = time.time()
urls_to_crawl = list()
urls_to_crawl.append('http://blog.prabhathkota.com/search/label/python')
urls_to_crawl.append('http://blog.prabhathkota.com/search/label/perl')
urls_to_crawl.append('http://blog.prabhathkota.com/search/label/unix')
urls_to_crawl.append('http://blog.prabhathkota.com/search/label/aws')
urls_to_crawl.append('http://blog.prabhathkota.com/search/label/java')
asyncio.run(crawl_urls(urls_to_crawl))
elapsed = time.time() - t0
print(f"{len(urls_to_crawl)} URLS downloaded in {elapsed:.2f}")
if __name__ == '__main__':
main()
Output:
<coroutine object crawl_one_url at 0x7f5fe36181c0> <coroutine object crawl_on
e_url at 0x7f5fe3618240> <coroutine object crawl_one_url at 0x7f5fe36182c0> <
coroutine object crawl_one_url at 0x7f5fe3618340> <coroutine object crawl_one
_url at 0x7f5fe36183c0>
http://blog.prabhathkota.com/search/label/python
http://blog.prabhathkota.com/search/label/perl
http://blog.prabhathkota.com/search/label/unix
http://blog.prabhathkota.com/search/label/aws
http://blog.prabhathkota.com/search/label/java
5 URLS downloaded in 0.48
No comments:
Post a Comment