Merge branch 'develop' into 'master'

Develop See merge request namibsun/python/xdcc-dl!8
namboy94 · Jul 29, 2021 · aef8a35 · aef8a35
2 parents 213c06f + 4002a32
commit aef8a35
Show file tree

Hide file tree

Showing 9 changed files with 82 additions and 79 deletions.
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
@@ -5,7 +5,7 @@ stages:
   - release
 
 default:
-  image: namboy94/ci-docker-environment:0.14.0
+  image: namboy94/ci-docker-environment:0.18.0
   before_script:
     - echo "$SERVER_ACCESS_KEY" > ~/.ssh/id_rsa
     - chmod 0600 ~/.ssh/id_rsa

diff --git a/CHANGELOG b/CHANGELOG
@@ -1,3 +1,7 @@
+V 5.2.0:
+  - Made XDCC Client slightly more reliable
+  - Fixed nibl search engine
+  - Better error handling for subsplease engine (which is currently broken due to CloudFlare DDoS protection)
 V 5.1.0:
   - iXIRC search now uses the API, courtesy of Jean Wicht
 V 5.0.0:

diff --git a/README.md b/README.md
@@ -78,7 +78,7 @@ from xdcc_dl.entities import XDCCPack, IrcServer
 # Generate packs
 manual = XDCCPack(IrcServer("irc.rizon.net"), "bot", 1)
 from_message = XDCCPack.from_xdcc_message("/msg bot xdcc send #2-10")
-search_results = SearchEngines.HORRIBLESUBS.value.search("Test")
+search_results = SearchEngines.SUBSPLEASE.value.search("Test")
 combined = [manual] + from_message + search_results
 
 # Start download

diff --git a/bin/xdcc-browse b/bin/xdcc-browse
@@ -52,30 +52,31 @@ def main(args: argparse.Namespace, logger: logging.Logger):
         for pack in packs:
             logger.info("Downloading pack {}".format(pack))
 
-        download_packs(
-            packs,
-            timeout=args.timeout,
-            fallback_channel=args.fallback_channel,
-            throttle=args.throttle,
-            wait_time=args.wait_time,
-            username=args.username,
-            channel_join_delay=args.channel_join_delay
-        )
+        try:
+            download_packs(
+                packs,
+                timeout=args.timeout,
+                fallback_channel=args.fallback_channel,
+                throttle=args.throttle,
+                wait_time=args.wait_time,
+                username=args.username,
+                channel_join_delay=args.channel_join_delay
+            )
+        except ValueError:
+            print("Invalid throttle value {}".format(args.throttle))
 
     except ConnectionError:
         print("Connection Error, could not conduct search")
     except DownloadIncomplete:
         logger.warning("Download incomplete.")
         raise KeyboardInterrupt()
-    except ValueError:
-        print("Invalid throttle value {}".format(args.throttle))
 
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("search_term", help="The term to search for")
     parser.add_argument("--search-engine",
-                        default=SearchEngineType.SUBSPLEASE.name.lower(),
+                        default=SearchEngineType.NIBL.name.lower(),
                         choices=SearchEngineType.choices(True),
                         help="The Search Engine to use")
     add_xdcc_argparse_arguments(parser)

diff --git a/bin/xdcc-search b/bin/xdcc-search
@@ -19,7 +19,7 @@ along with xdcc-dl.  If not, see <http://www.gnu.org/licenses/>.
 LICENSE"""
 
 import argparse
-from puffotter.init import cli_start
+from puffotter.init import cli_start, argparse_add_verbosity
 from requests.exceptions import ConnectionError
 from xdcc_dl import sentry_dsn
 from xdcc_dl.pack_search.SearchEngine import SearchEngineType
@@ -56,6 +56,7 @@ if __name__ == "__main__":
     parser.add_argument("search_engine",
                         choices=SearchEngineType.choices(True),
                         help="The Search Engine to use")
+    argparse_add_verbosity(parser)
     cli_start(
         main, parser,
         sentry_dsn=sentry_dsn,

diff --git a/version b/version
@@ -1 +1 @@
-5.1.0
+5.2.0
diff --git a/xdcc_dl/pack_search/procedures/nibl.py b/xdcc_dl/pack_search/procedures/nibl.py
@@ -17,7 +17,6 @@
 along with xdcc-dl.  If not, see <http://www.gnu.org/licenses/>.
 LICENSE"""
 
-# imports
 import requests
 from typing import List
 from bs4 import BeautifulSoup
@@ -33,46 +32,27 @@ def find_nibl_packs(search_phrase: str) -> List[XDCCPack]:
     :param search_phrase: The search phrase to search for
     :return:              The list of found XDCC Packs
     """
-
-    # Prepare the search term, nibl.co.uk uses + symbols as spaces.
-    split_search_term = search_phrase.split(" ")
-    prepared_search_term = split_search_term[0]
-    i = 1
-    while i < len(split_search_term):
-        prepared_search_term += "+" + split_search_term[i]
-        i += 1
-
-    # Get the data from the website
-
-    url = "https://nibl.co.uk/bots.php?search=" + prepared_search_term
+    query = "+".join(search_phrase.split(" "))
+    url = f"https://nibl.co.uk/search?query={query}"
     html = requests.get(url).text
 
     content = BeautifulSoup(html, "html.parser")
-    file_names = content.select(".filename")
-    pack_numbers = content.select(".packnumber")
-    bot_names = content.select(".name")
-    file_sizes = content.select(".filesize")
-
-    results = []
-    i = 0  # We need a counter variable since we have four lists of data
-
-    while i < len(file_names):
-
-        # The filename has two links after it, which need to be cut out
-        filename = file_names[i].text.rsplit(" \n", 1)[0]
-
-        # The bot name has a link after it, which needs to be cut out
-        bot = bot_names[i].text.rsplit(" ", 1)[0]
-
-        server = "irc.rizon.net"
-        packnumber = int(pack_numbers[i].text)
-        size = file_sizes[i].text.lower()
-
-        result = XDCCPack(IrcServer(server), bot, packnumber)
-
-        result.set_size(byte_string_to_byte_count(size))
-        result.set_filename(filename)
-        results.append(result)
-        i += 1
-
-    return results
+    rows = content.find_all("tr")
+    header = rows.pop(0)
+    keys = [x.text for x in header.find_all("th")]
+    results = [
+        {
+            keys[i]: column.text.strip()
+            for i, column in enumerate(row.find_all("td"))
+        }
+        for row in rows
+    ]
+
+    server = IrcServer("irc.rizon.net")
+    packs = []
+    for result in results:
+        pack = XDCCPack(server, result["Bot"], int(result["Pack"]))
+        pack.set_size(byte_string_to_byte_count(result["Size"]))
+        pack.set_filename(result["Filename"])
+        packs.append(pack)
+    return packs
diff --git a/xdcc_dl/pack_search/procedures/subsplease.py b/xdcc_dl/pack_search/procedures/subsplease.py
@@ -17,7 +17,7 @@
 along with xdcc-dl.  If not, see <http://www.gnu.org/licenses/>.
 LICENSE"""
 
-# imports
+import logging
 import cfscrape
 from typing import List, Dict
 from xdcc_dl.entities.XDCCPack import XDCCPack
@@ -38,7 +38,15 @@ def find_subsplease_packs(search_phrase: str) -> List[XDCCPack]:
 
     url = "https://subsplease.org/xdcc/search.php?t=" + search_query
     scraper = cfscrape.create_scraper()
-    results = scraper.get(url).text.split(";")
+    response = scraper.get(url)
+
+    if response.status_code >= 300:
+        logging.warning("Failed to load data from subsplease. "
+                        "Most likely has something to do with CloudFlare's "
+                        "DDoS protection")
+        return []
+
+    results = response.text.split(";")
 
     packs = []
     for result in results:

diff --git a/xdcc_dl/xdcc/XDCCClient.py b/xdcc_dl/xdcc/XDCCClient.py
@@ -118,6 +118,8 @@ def __init__(
         self.xdcc_connection = None  # type: Optional[DCCConnection]
         self.retry = retry
         self.struct_format = b"!I"
+        self.ack_queue: List[bytes] = []
+        self.ack_thread = Thread(target=self.send_acks)
         self.ack_lock = Lock()
 
         if not self.retry:
@@ -214,8 +216,10 @@ def download(self) -> str:
         finally:
             self.connected = False
             self.disconnected = True
+            self.logger.info("Joining threads")
             self.timeout_watcher_thread.join()
             self.progress_printer_thread.join()
+            self.ack_thread.join()
             print("\n" + message)
 
             self.logger.info("Disconnecting")
@@ -379,7 +383,9 @@ def start_download(append: bool = False):
             self.xdcc_file = open(self.pack.get_filepath(), mode)
             self.xdcc_connection = self.dcc("raw")
             self.xdcc_connection.connect(self.peer_address, self.peer_port)
-            self.xdcc_connection.socket.settimeout(5)
+            # self.xdcc_connection.socket.settimeout(5)
+
+            self.ack_thread.start()
 
         self.logger.info("CTCP Message: " + str(event.arguments))
         if event.arguments[0] == "DCC":
@@ -529,6 +535,7 @@ def _ack(self):
         # Whenever the old one gets too small
         try:
             payload = struct.pack(self.struct_format, self.progress)
+            self.ack_queue.append(payload)
         except struct.error:
 
             if self.struct_format == b"!I":
@@ -543,28 +550,12 @@ def _ack(self):
             self._ack()
             return
 
-        def acker():
-            """
-            The actual ack will be sent using a different thread since that
-            somehow avoids the socket timing out for some reason.
-            :return: None
-            """
-
-            self.ack_lock.acquire()
-            try:
-                self.xdcc_connection.socket.send(payload)
-            except socket.timeout:
-                self.logger.debug("ACK timed out")
-                self._disconnect()
-            finally:
-                self.ack_lock.release()
-        Thread(target=acker).start()
-
     def _disconnect(self):
         """
         Disconnects all connections of the XDCC Client
         :return: None
         """
+        self.logger.info("Initializing Disconnect")
         self.connection.reactor.disconnect_all()
 
     def timeout_watcher(self):
@@ -575,17 +566,35 @@ def timeout_watcher(self):
         """
         while not self.connected \
                 or self.connect_start_time + self.wait_time > time.time():
-            pass
+            time.sleep(0.5)
+
         self.logger.info("Timeout watcher started")
         while not self.message_sent and not self.disconnected:
             time.sleep(1)
-            self.logger.debug("Iterating timeout thread")
             if self.timeout < (time.time() - self.connect_start_time):
                 self.logger.info("Timeout detected")
                 self.connection.ping(self.server.address)
                 break
         self.logger.info("Message sent without timeout")
 
+    def send_acks(self):
+        while self.downloading:
+            self.ack_lock.acquire()
+            try:
+                if len(self.ack_queue) > 0:
+                    self.xdcc_connection.socket.send(self.ack_queue.pop(0))
+                else:
+                    time.sleep(0.5)
+            except socket.timeout:
+                self.logger.debug("ACK timed out")
+                continue
+            except AttributeError:
+                self.logger.warning("Missing XDCC socket")
+                # This happens sometimes, don't ask me why though
+                continue
+            finally:
+                self.ack_lock.release()
+
     def progress_printer(self):
         """
         Prints the download progress