Html to Text Conversion in Python (Using BeautifulSoup)
import urllib
url = "<url>"
html = urllib.urlopen(url).read()
soup = BeautifulSoup(html)
# kill all script and style elements
for script in soup(["script", "style"]):
script.extract() # rip it out
# get text
text = soup.get_text()
print text
Html to Text Conversion in Django (strip_tags)
from django.utils.html import strip_tags
import urllib
url = "<url>"
html = urllib.urlopen(url).read()
text = strip_tags(html)
print text
import urllib
url = "<url>"
html = urllib.urlopen(url).read()
soup = BeautifulSoup(html)
# kill all script and style elements
for script in soup(["script", "style"]):
script.extract() # rip it out
# get text
text = soup.get_text()
print text
Html to Text Conversion in Django (strip_tags)
from django.utils.html import strip_tags
import urllib
url = "<url>"
html = urllib.urlopen(url).read()
text = strip_tags(html)
print text
No comments:
Post a Comment