|
2 | 2 | # AUTHORS: nindogo
|
3 | 3 | # CONTRIBUTORS: Diego de las Heras (ngosang@hotmail.es)
|
4 | 4 |
|
| 5 | +import io |
| 6 | +import gzip |
| 7 | +import urllib.error |
| 8 | +import urllib.parse |
| 9 | +import urllib.request |
5 | 10 | from html.parser import HTMLParser
|
6 | 11 |
|
7 | 12 | from novaprinter import prettyPrinter
|
8 |
| -from helpers import retrieve_url |
| 13 | +from helpers import htmlentitydecode |
| 14 | + |
| 15 | +# Some sites blocks default python User-agent |
| 16 | +headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:125.0) Gecko/20100101 Firefox/125.0'} |
| 17 | + |
| 18 | +# We must implement our own retrieve_url because helpers.py versions prior to 1.49 did not |
| 19 | +# support POST requests. That version is taken from helpers.py 1.45 |
| 20 | +def retrieve_url(url, data=None): |
| 21 | + """ Return the content of the url page as a string """ |
| 22 | + req = urllib.request.Request(url, data, headers) |
| 23 | + try: |
| 24 | + response = urllib.request.urlopen(req) |
| 25 | + except urllib.error.URLError as errno: |
| 26 | + print(" ".join(("Connection error:", str(errno.reason)))) |
| 27 | + return "" |
| 28 | + dat = response.read() |
| 29 | + # Check if it is gzipped |
| 30 | + if dat[:2] == b'\x1f\x8b': |
| 31 | + # Data is gzip encoded, decode it |
| 32 | + compressedstream = io.BytesIO(dat) |
| 33 | + gzipper = gzip.GzipFile(fileobj=compressedstream) |
| 34 | + extracted_data = gzipper.read() |
| 35 | + dat = extracted_data |
| 36 | + info = response.info() |
| 37 | + charset = 'utf-8' |
| 38 | + try: |
| 39 | + ignore, charset = info['Content-Type'].split('charset=') |
| 40 | + except Exception: |
| 41 | + pass |
| 42 | + dat = dat.decode(charset, 'replace') |
| 43 | + dat = htmlentitydecode(dat) |
| 44 | + # return dat.encode('utf-8', 'replace') |
| 45 | + return dat |
9 | 46 |
|
10 | 47 |
|
11 | 48 | class eztv(object):
|
|
0 commit comments