From c8310b7e93a8dce85f88e9fb9f8aec63a5002fa6 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Fri, 2 Feb 2024 09:28:24 +0100 Subject: [PATCH] Updating inscriptis library, removing fixes from 2.2 --- changedetectionio/html_tools.py | 17 ----------------- changedetectionio/processors/text_json_diff.py | 1 - requirements.txt | 2 +- 3 files changed, 1 insertion(+), 19 deletions(-) diff --git a/changedetectionio/html_tools.py b/changedetectionio/html_tools.py index 7c9844c8aa8..24c02dcddbb 100644 --- a/changedetectionio/html_tools.py +++ b/changedetectionio/html_tools.py @@ -409,23 +409,6 @@ def has_ldjson_product_info(content): x=bool(pricing_data) return x - -def workarounds_for_obfuscations(content): - """ - Some sites are using sneaky tactics to make prices and other information un-renderable by Inscriptis - This could go into its own Pip package in the future, for faster updates - """ - - # HomeDepot.com style $90.74 - # https://github.com/weblyzard/inscriptis/issues/45 - if not content: - return content - - content = re.sub('', '', content) - - return content - - def get_triggered_text(content, trigger_text): triggered_text = [] result = strip_ignore_text(content=content, diff --git a/changedetectionio/processors/text_json_diff.py b/changedetectionio/processors/text_json_diff.py index 305cfe17633..e6790deaffe 100644 --- a/changedetectionio/processors/text_json_diff.py +++ b/changedetectionio/processors/text_json_diff.py @@ -151,7 +151,6 @@ def run_changedetection(self, uuid, skip_when_checksum_same=True): if is_html or watch.is_source_type_url: # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text - self.fetcher.content = html_tools.workarounds_for_obfuscations(self.fetcher.content) html_content = self.fetcher.content # If not JSON, and if it's not text/plain.. diff --git a/requirements.txt b/requirements.txt index 5bc269c9c25..7c036e35789 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,7 +8,7 @@ flask_expects_json~=1.7 flask_restful flask_wtf~=1.2 flask~=2.3 -inscriptis~=2.2 +inscriptis~=2.4 pytz timeago~=1.0 validators~=0.21