Skip to content

Commit

Permalink
Filters should apply at the end of the chain
Browse files Browse the repository at this point in the history
  • Loading branch information
dgtlmoon committed Sep 10, 2024
1 parent 7289e4e commit 5108201
Showing 1 changed file with 14 additions and 12 deletions.
26 changes: 14 additions & 12 deletions changedetectionio/processors/text_json_diff/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,18 +204,6 @@ def run_changedetection(self, watch, skip_when_checksum_same=True):
is_rss=is_rss # #1874 activate the <title workaround hack
)

if watch.get('sort_text_alphabetically') and stripped_text_from_html:
# Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap
# we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here.
stripped_text_from_html = stripped_text_from_html.replace('\n\n', '\n')
stripped_text_from_html = '\n'.join( sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower() ))

if watch.get('trim_text_whitespace') and stripped_text_from_html:
stripped_text_from_html = '\n'.join(line.strip() for line in stripped_text_from_html.splitlines())

if watch.get('remove_duplicate_lines') and stripped_text_from_html:
stripped_text_from_html = '\n'.join(dict.fromkeys(line for line in stripped_text_from_html.splitlines()))

# Re #340 - return the content before the 'ignore text' was applied
text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')

Expand Down Expand Up @@ -304,6 +292,20 @@ def run_changedetection(self, watch, skip_when_checksum_same=True):
stripped_text_from_html = b''.join(regex_matched_output)
text_content_before_ignored_filter = stripped_text_from_html


if watch.get('sort_text_alphabetically') and stripped_text_from_html:
# Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap
# we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here.
stripped_text_from_html = stripped_text_from_html.replace(b'\n\n', b'\n')
stripped_text_from_html = '\n'.join(sorted(stripped_text_from_html.decode('utf-8').splitlines(), key=lambda x: x.lower())).encode('utf-8')

#
if watch.get('trim_text_whitespace') and stripped_text_from_html:
stripped_text_from_html = '\n'.join(line.strip() for line in stripped_text_from_html.decode('utf-8').splitlines()).encode('utf-8')
#
if watch.get('remove_duplicate_lines') and stripped_text_from_html:
stripped_text_from_html = '\n'.join(dict.fromkeys(line for line in stripped_text_from_html.decode('utf-8').splitlines())).encode('utf-8')

# Re #133 - if we should strip whitespaces from triggering the change detected comparison
if self.datastore.data['settings']['application'].get('ignore_whitespace', False):
fetched_md5 = hashlib.md5(stripped_text_from_html.translate(None, b'\r\n\t ')).hexdigest()
Expand Down

0 comments on commit 5108201

Please sign in to comment.