Merge pull request #73 from grrttedwards/cloudflare-bot-buster

Cloudflare bot buster
grrttedwards · Sep 28, 2019 · 2866c0e · 2866c0e
2 parents 6a71609 + cad3395
commit 2866c0e
Show file tree

Hide file tree

Showing 14 changed files with 73 additions and 44 deletions.
diff --git a/Pipfile b/Pipfile
@@ -9,5 +9,6 @@ coverage = "*"
 [packages]
 requests = "*"
 beautifulsoup4 = "*"
+cfscrape = "*"
 
 [requires]
diff --git a/Pipfile.lock b/Pipfile.lock
diff --git a/README.md b/README.md
@@ -8,20 +8,22 @@ _Now supporting both retail and classic addon management!_
 
 ## First-time setup
 
-You must have a version of [Python](https://www.python.org/) 3.6+.
+### System dependencies
+- You must have a version of [Python](https://www.python.org/) 3.6+.
 
-_If you know how to manage Python packages and virtual environments, you can skip this section._
+- You must have [Node.js](https://nodejs.org/en/) installed (for Curse and WoWAce addons only).
+    - Used by `cfscrape` to circumvent the Cloudflare _"bot-detection"_ found on CurseForge sites.
+
+### Python module dependencies
 
 You should already have `pip` included with your Python installation. This is the default package manager for Python.
 If not, download the latest version of Python  for your platform, with `pip` bundled.
 
-
-### Installing the dependencies
-
-This utility has two external dependencies:
+This utility has three Python module dependencies:
 
 - The [requests](https://pypi.org/project/requests/) module, for making HTTP requests
 - The [BeautifulSoup4](https://pypi.org/project/beautifulsoup4/) module, for HTML document parsing
+- The [cfscrape](https://pypi.org/project/cfscrape/) module, for bypassing Curse's bot-detection measures
 
 It's recommended you manage this with [`pipenv`](https://github.com/pypa/pipenv). All you need to do is run the following to install `pipenv` and the dependencies:
 

diff --git a/changelog.txt b/changelog.txt
@@ -1,15 +1,18 @@
 Changelog
 
-* 9/24/2019
+* 9/28/2019 - v1.2.0
+Integrated cfscrape as an anti-measure for Cloudflare bot-detection, for Curse and WoWAce. Node.js is now a requirement for Curse-based sites.
+
+* 9/24/2019 - v1.1.2
 Fix WoWInterface site to accept any characters for the addon name instead of a more rigid regex.
 
-* 9/23/2019
+* 9/23/2019 - v1.1.1
 Add message for when Curse is being a jerk and blocking your requests.
 
-* 9/3/2019
+* 9/3/2019 - v1.1.0
 Add support for WoWInterface classic addons.
 
-* 8/31/2019
+* 8/31/2019 - v1.0
 Add command-line argument for specifying a configuration file. Now multiple independent configurations can be used i.e. one for retail, and one for classic.
 
 * 8/31/2019

diff --git a/test/site/test_curse.py b/test/site/test_curse.py
@@ -12,7 +12,6 @@
 ]
 
 
-@unittest.skip
 class TestCurse(unittest.TestCase):
     def setUp(self):
         self.url = 'https://www.curseforge.com/wow/addons/bartender4'

diff --git a/test/site/test_wowace.py b/test/site/test_wowace.py
@@ -4,7 +4,6 @@
 from updater.site.enum import GameVersion
 
 
-@unittest.skip
 class TestWowAce(unittest.TestCase):
     def setUp(self):
         self.url = 'https://www.wowace.com/projects/bartender4'

diff --git a/updater/__main__.py b/updater/__main__.py
@@ -7,7 +7,13 @@
 
 CHANGELOG_URL = 'https://raw.githubusercontent.com/grrttedwards/wow-addon-updater/master/changelog.txt'
 CHANGELOG_FILE = 'changelog.txt'
-NEW_UPDATE_MESSAGE = 'A new update is available! Check it out at https://github.com/grrttedwards/wow-addon-updater !'
+NEW_UPDATE_MESSAGE = """
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+A new update is available! Check it out at https://github.com/grrttedwards/wow-addon-updater/releases
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+"""
 
 
 def confirm_exit():
@@ -25,16 +31,15 @@ def check_version():
 
 
 def main():
-    check_version()
-
     parser = argparse.ArgumentParser(description='Update your WoW addons.')
     parser.add_argument('-c', '--config', nargs='?', default='config.ini', type=str, metavar='FILE',
                         help='the file to be used for configuration')
-
     args = parser.parse_args()
 
     AddonManager(args.config).update_all()
 
+    check_version()
+
 
 if __name__ == "__main__":
     # execute only if run as a script

diff --git a/updater/manager/addon_manager.py b/updater/manager/addon_manager.py
@@ -94,7 +94,7 @@ def update_addon(self, addon_entry):
 
             try:
                 zip_url = site.find_zip_url()
-                addon_zip = self.get_addon_zip(zip_url)
+                addon_zip = self.get_addon_zip(site.session, zip_url)
                 self.extract_to_addons(addon_zip, subfolder, site)
             except HTTPError:
                 print(f"Failed to download zip for [{addon_name}]")
@@ -112,8 +112,8 @@ def update_addon(self, addon_entry):
         addon_entry = [addon_name, addon_url, installed_version, latest_version]
         self.manifest.append(addon_entry)
 
-    def get_addon_zip(self, zip_url):
-        r = requests.get(zip_url, stream=True)
+    def get_addon_zip(self, session: requests.Session, zip_url):
+        r = session.get(zip_url, stream=True)
         r.raise_for_status()  # Raise an exception for HTTP errors
         return zipfile.ZipFile(BytesIO(r.content))
 

diff --git a/updater/site/abstract_site.py b/updater/site/abstract_site.py
@@ -8,6 +8,9 @@ class SiteError(Exception):
 
 
 class AbstractSite(ABC):
+    # each implementation should create a static session for itself
+    session = None
+
     def __init__(self, url: str, game_version: GameVersion):
         self.url = url
         self.game_version = game_version

diff --git a/updater/site/curse.py b/updater/site/curse.py
@@ -1,6 +1,6 @@
 import re
 
-import requests
+import cfscrape
 
 from updater.site.abstract_site import AbstractSite, SiteError
 from updater.site.enum import GameVersion
@@ -11,8 +11,10 @@ class Curse(AbstractSite):
     _OLD_URL = 'https://mods.curse.com/addons/wow/'
     _OLD_PROJECT_URL = 'https://wow.curseforge.com/projects/'
 
+    session = cfscrape.create_scraper("https://www.curseforge.com/")
+
     def __init__(self, url: str, game_version: GameVersion):
-        url = Curse._convert_old_curse_urls(url)
+        url = self._convert_old_curse_urls(url)
         super().__init__(url, game_version)
 
     @classmethod
@@ -21,7 +23,7 @@ def get_supported_urls(cls):
 
     def find_zip_url(self):
         try:
-            page = requests.get(self.url)
+            page = Curse.session.get(self.url)
             page.raise_for_status()  # Raise an exception for HTTP errors
             content_string = str(page.content)
             main_zip_url, *classic_zip_url = re.findall(
@@ -35,10 +37,9 @@ def find_zip_url(self):
 
     def get_latest_version(self):
         try:
-            page = requests.get(self.url)
+            page = Curse.session.get(self.url)
             if page.status_code in [403, 503]:
-                print("Curse is temporarily blocking requests because it thinks you are a bot... please try later. "
-                      "Consider finding this addon on WoWInterface or GitHub.")
+                print("Curse is blocking requests because it thinks you are a bot... please try later.")
             page.raise_for_status()  # Raise an exception for HTTP errors
             content_string = str(page.content)
             # the first one encountered will be the WoW retail version
@@ -56,7 +57,7 @@ def _convert_old_curse_urls(cls, url: str) -> str:
             try:
                 # Some old URL's may point to nonexistent pages. Rather than guess at what the new
                 # name and URL is, just try to load the old URL and see where Curse redirects us to.
-                page = requests.get(url)
+                page = Curse.session.get(url)
                 page.raise_for_status()
                 return page.url
             except Exception as e:

diff --git a/updater/site/github.py b/updater/site/github.py
@@ -9,6 +9,8 @@
 class GitHub(AbstractSite):
     _URL = 'https://github.com/'
 
+    session = requests.session()
+
     def __init__(self, url: str):
         if '/tree/master' not in url:
             url = (url + '/tree/master')
@@ -23,7 +25,7 @@ def find_zip_url(self):
 
     def get_latest_version(self):
         try:
-            response = requests.get(self.url)
+            response = GitHub.session.get(self.url)
             response.raise_for_status()
             content = str(response.content)
             version = re.search(

diff --git a/updater/site/tukui.py b/updater/site/tukui.py
@@ -9,6 +9,8 @@ class Tukui(AbstractSite):
     _URL = 'https://git.tukui.org/elvui/'
     latest_version = None
 
+    session = requests.session()
+
     def __init__(self, url: str):
         super().__init__(url, GameVersion.agnostic)
 
@@ -25,7 +27,7 @@ def get_latest_version(self):
         if self.latest_version:
             return self.latest_version
         try:
-            response = requests.get(self.url + '/-/tags')
+            response = Tukui.session.get(self.url + '/-/tags')
             response.raise_for_status()
             tags_page = BeautifulSoup(response.text, 'html.parser')
             version = tags_page.find('div', {'class': 'tags'}).find('a').string

diff --git a/updater/site/wowace.py b/updater/site/wowace.py
@@ -1,6 +1,6 @@
 import re
 
-import requests
+import cfscrape
 
 from updater.site.abstract_site import AbstractSite
 from updater.site.enum import GameVersion
@@ -9,6 +9,8 @@
 class WoWAce(AbstractSite):
     _URL = 'https://www.wowace.com/projects/'
 
+    session = cfscrape.create_scraper()
+
     def __init__(self, url: str, game_version: GameVersion):
         if game_version != GameVersion.retail:
             raise NotImplementedError("Updating classic addons are not yet supported for WoWAce.")
@@ -23,10 +25,9 @@ def find_zip_url(self):
 
     def get_latest_version(self):
         try:
-            page = requests.get(self.url + '/files')
+            page = WoWAce.session.get(self.url + '/files')
             if page.status_code in [403, 503]:
-                print("WoWAce (Curse) is temporarily blocking requests because it thinks you are a bot... please try later. "
-                      "Consider finding this addon on WoWInterface or GitHub.")
+                print("WoWAce (Curse) is blocking requests because it thinks you are a bot... please try later.")
             page.raise_for_status()  # Raise an exception for HTTP errors
             content_string = str(page.content)
             # the first one encountered will be the WoW retail version

diff --git a/updater/site/wowinterface.py b/updater/site/wowinterface.py
@@ -9,6 +9,8 @@
 class WoWInterface(AbstractSite):
     _URL = 'https://www.wowinterface.com/downloads/'
 
+    session = requests.session()
+
     def __init__(self, url: str, game_version: GameVersion):
         super().__init__(url, game_version)
 
@@ -19,7 +21,7 @@ def get_supported_urls(cls) -> [str]:
     def find_zip_url(self):
         downloadpage = self.url.replace('info', 'download')
         try:
-            page = requests.get(downloadpage + '/download')
+            page = WoWInterface.session.get(downloadpage + '/download')
             page.raise_for_status()  # Raise an exception for HTTP errors
             content_string = str(page.content)
             index_of_ziploc = content_string.find('Problems with the download? <a href="') + 37  # first char of the url
@@ -30,7 +32,7 @@ def find_zip_url(self):
 
     def get_latest_version(self):
         try:
-            page = requests.get(self.url)
+            page = WoWInterface.session.get(self.url)
             page.raise_for_status()  # Raise an exception for HTTP errors
             content_string = str(page.content)
             index_of_ver = content_string.find('id="version"') + 22  # first char of the version string