Added retrieve_url function to eztv plugin so that it works for everybody

ducalex · ducalex · commit 1a9f683b4928 · 2024-08-13T12:32:50.000-04:00
diff --git a/nova3/engines/eztv.py b/nova3/engines/eztv.py
@@ -2,10 +2,47 @@
 # AUTHORS: nindogo
 # CONTRIBUTORS: Diego de las Heras (ngosang@hotmail.es)
 
+import io
+import gzip
+import urllib.error
+import urllib.parse
+import urllib.request
 from html.parser import HTMLParser
 
 from novaprinter import prettyPrinter
-from helpers import retrieve_url
+from helpers import htmlentitydecode
+
+# Some sites blocks default python User-agent
+headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:125.0) Gecko/20100101 Firefox/125.0'}
+
+# We must implement our own retrieve_url because helpers.py versions prior to 1.49 did not 
+# support POST requests. That version is taken from helpers.py 1.45
+def retrieve_url(url, data=None):
+    """ Return the content of the url page as a string """
+    req = urllib.request.Request(url, data, headers)
+    try:
+        response = urllib.request.urlopen(req)
+    except urllib.error.URLError as errno:
+        print(" ".join(("Connection error:", str(errno.reason))))
+        return ""
+    dat = response.read()
+    # Check if it is gzipped
+    if dat[:2] == b'\x1f\x8b':
+        # Data is gzip encoded, decode it
+        compressedstream = io.BytesIO(dat)
+        gzipper = gzip.GzipFile(fileobj=compressedstream)
+        extracted_data = gzipper.read()
+        dat = extracted_data
+    info = response.info()
+    charset = 'utf-8'
+    try:
+        ignore, charset = info['Content-Type'].split('charset=')
+    except Exception:
+        pass
+    dat = dat.decode(charset, 'replace')
+    dat = htmlentitydecode(dat)
+    # return dat.encode('utf-8', 'replace')
+    return dat
 
 
 class eztv(object):