diff --git a/.github/.wordlist.txt b/.github/.wordlist.txt index 28862b855..359d76db0 100644 --- a/.github/.wordlist.txt +++ b/.github/.wordlist.txt @@ -41,6 +41,7 @@ bw cappella cd chazlarson +cloudscraper codec codecs collectionless diff --git a/CHANGELOG b/CHANGELOG index 4e3f4a05e..eb38d979a 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,14 +1,15 @@ # Requirements Update (requirements will need to be reinstalled) Add pywin32 requirement at 308 (windows only) +Add cloudscraper requirement at 1.2.71 Update gitpython requirement to 3.1.44 Update lxml requirement to 5.3.1 Update num2words requirement to 0.5.14 Update pathvalidate requirement to 3.2.3 Update pillow requirement to 11.1.0 Update plexapi requirement to 4.16.1 -Update psutil requirement to 6.1.1 +Update psutil requirement to 7.0.0 Update ruamel-yaml requirement to 0.18.9 -Update setuptools requirement to 75.8.0 +Update setuptools requirement to 75.8.1 Update tmdbapis requirement to 1.2.28 # Important Changes @@ -88,3 +89,4 @@ Fixes an issue with boolean filter matching. Fixes an issue where the decade default collection names were incorrect. Fixes the playlist default to automatically work with a supplied list. Remove an unnecessary request to Plex while processing overlays. +Fixes issue with ICheckMovies parsing. diff --git a/modules/builder.py b/modules/builder.py index 2964532b0..401140d89 100644 --- a/modules/builder.py +++ b/modules/builder.py @@ -1477,11 +1477,11 @@ def _anilist(self, method_name, method_data): def _icheckmovies(self, method_name, method_data): if method_name.startswith("icheckmovies_list"): - icheckmovies_lists = self.config.ICheckMovies.validate_icheckmovies_lists(method_data, self.language) + icheckmovies_lists = self.config.ICheckMovies.validate_icheckmovies_lists(method_data) for icheckmovies_list in icheckmovies_lists: self.builders.append(("icheckmovies_list", icheckmovies_list)) if method_name.endswith("_details"): - self.summaries[method_name] = self.config.ICheckMovies.get_list_description(icheckmovies_lists[0], self.language) + self.summaries[method_name] = self.config.ICheckMovies.get_list_description(icheckmovies_lists[0]) def _imdb(self, method_name, method_data): if method_name == "imdb_id": @@ -2214,7 +2214,7 @@ def gather_ids(self, method, value): elif "imdb" in method: ids = self.config.IMDb.get_imdb_ids(method, value, self.language) elif "icheckmovies" in method: - ids = self.config.ICheckMovies.get_imdb_ids(method, value, self.language) + ids = self.config.ICheckMovies.get_imdb_ids(method, value) elif "letterboxd" in method: ids = self.config.Letterboxd.get_tmdb_ids(method, value, self.language) elif "reciperr" in method or "stevenlu" in method: diff --git a/modules/icheckmovies.py b/modules/icheckmovies.py index c138256f6..e28925b3f 100644 --- a/modules/icheckmovies.py +++ b/modules/icheckmovies.py @@ -10,33 +10,33 @@ class ICheckMovies: def __init__(self, requests): self.requests = requests - def _request(self, url, language, xpath): + def _request(self, url, xpath): logger.trace(f"URL: {url}") - return self.requests.get_html(url, language=language).xpath(xpath) + return self.requests.get_scrape_html(url).xpath(xpath) - def _parse_list(self, list_url, language): - imdb_urls = self._request(list_url, language, "//a[@class='optionIcon optionIMDB external']/@href") + def _parse_list(self, list_url): + imdb_urls = self._request(list_url, "//a[@class='optionIcon optionIMDB external']/@href") return [(t[t.find("/tt") + 1:-1], "imdb") for t in imdb_urls] - def get_list_description(self, list_url, language): - descriptions = self._request(list_url, language, "//div[@class='span-19 last']/p/em/text()") + def get_list_description(self, list_url): + descriptions = self._request(list_url, "//div[@class='span-19 last']/p/em/text()") return descriptions[0] if len(descriptions) > 0 and len(descriptions[0]) > 0 else None - def validate_icheckmovies_lists(self, icheckmovies_lists, language): + def validate_icheckmovies_lists(self, icheckmovies_lists): valid_lists = [] for icheckmovies_list in util.get_list(icheckmovies_lists, split=False): list_url = icheckmovies_list.strip() if not list_url.startswith(base_url): raise Failed(f"ICheckMovies Error: {list_url} must begin with: {base_url}") - elif len(self._parse_list(list_url, language)) > 0: + elif len(self._parse_list(list_url)) > 0: valid_lists.append(list_url) else: raise Failed(f"ICheckMovies Error: {list_url} failed to parse") return valid_lists - def get_imdb_ids(self, method, data, language): + def get_imdb_ids(self, method, data): if method == "icheckmovies_list": logger.info(f"Processing ICheckMovies List: {data}") - return self._parse_list(data, language) + return self._parse_list(data) else: raise Failed(f"ICheckMovies Error: Method {method} not supported") diff --git a/modules/request.py b/modules/request.py index ad93bef62..e7cff9350 100644 --- a/modules/request.py +++ b/modules/request.py @@ -1,4 +1,4 @@ -import base64, os, ruamel.yaml, requests +import base64, cloudscraper, os, ruamel.yaml, requests from lxml import html from modules import util from modules.poster import ImageData @@ -71,6 +71,7 @@ def __init__(self, local, part, env_branch, git_branch, verify_ssl=True): self._latest = None self._newest = None self.session = self.create_session() + self.scraper = cloudscraper.create_scraper() self.global_ssl = verify_ssl if not self.global_ssl: self.no_verify_ssl() @@ -141,6 +142,9 @@ def get_stream(self, url, location, info="Item"): logger.ghost(f"Downloading {info}: {dl / total_length * 100:6.2f}%") logger.exorcise() + def get_scrape_html(self, url): + html.fromstring(self.scraper.get(url).content) + def get_html(self, url, headers=None, params=None, header=None, language=None): return html.fromstring(self.get(url, headers=headers, params=params, header=header, language=language).content) diff --git a/requirements.txt b/requirements.txt index 30378589d..e60c745f0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,17 +1,18 @@ arrapi==1.4.13 +cloudscraper==1.2.71 GitPython==3.1.44 lxml==5.3.1 num2words==0.5.14 pathvalidate==3.2.3 pillow==11.1.0 PlexAPI==4.16.1 -psutil==6.1.1 +psutil==7.0.0 python-dateutil==2.9.0.post0 python-dotenv==1.0.1 pywin32==308; sys_platform == 'win32' requests==2.32.3 ruamel.yaml==0.18.10 schedule==1.2.2 -setuptools==75.8.0 +setuptools==75.8.1 tenacity==9.0.0 tmdbapis==1.2.28 \ No newline at end of file