Skip to content

Commit

Permalink
Updating inscriptis library, removing fixes from 2.2
Browse files Browse the repository at this point in the history
  • Loading branch information
dgtlmoon committed Feb 2, 2024
1 parent 3b16b19 commit c8310b7
Show file tree
Hide file tree
Showing 3 changed files with 1 addition and 19 deletions.
17 changes: 0 additions & 17 deletions changedetectionio/html_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,23 +409,6 @@ def has_ldjson_product_info(content):
x=bool(pricing_data)
return x


def workarounds_for_obfuscations(content):
"""
Some sites are using sneaky tactics to make prices and other information un-renderable by Inscriptis
This could go into its own Pip package in the future, for faster updates
"""

# HomeDepot.com style <span>$<!-- -->90<!-- -->.<!-- -->74</span>
# https://github.com/weblyzard/inscriptis/issues/45
if not content:
return content

content = re.sub('<!--\s+-->', '', content)

return content


def get_triggered_text(content, trigger_text):
triggered_text = []
result = strip_ignore_text(content=content,
Expand Down
1 change: 0 additions & 1 deletion changedetectionio/processors/text_json_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,6 @@ def run_changedetection(self, uuid, skip_when_checksum_same=True):
if is_html or watch.is_source_type_url:

# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
self.fetcher.content = html_tools.workarounds_for_obfuscations(self.fetcher.content)
html_content = self.fetcher.content

# If not JSON, and if it's not text/plain..
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ flask_expects_json~=1.7
flask_restful
flask_wtf~=1.2
flask~=2.3
inscriptis~=2.2
inscriptis~=2.4
pytz
timeago~=1.0
validators~=0.21
Expand Down

0 comments on commit c8310b7

Please sign in to comment.