Skip to content

Commit

Permalink
optimize - compile the regex while building the automaton
Browse files Browse the repository at this point in the history
Signed-off-by: Tamar Galer <tamar@ox.security>
  • Loading branch information
tamar-ox committed Jul 10, 2024
1 parent a29e4c2 commit 37e36c7
Showing 1 changed file with 11 additions and 4 deletions.
15 changes: 11 additions & 4 deletions maskerlogger/ahocorasick_regex_match.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,26 @@ def _extract_keywords_and_patterns(self, config) -> dict:
for keyword in rule.get('keywords', []):
if keyword not in keyword_to_patterns:
keyword_to_patterns[keyword] = []
keyword_to_patterns[keyword].append(rule['regex'])
keyword_to_patterns[keyword].append(self._get_compiled_regex(
rule['regex']))
return keyword_to_patterns

def _get_compiled_regex(self, regex: str) -> str:
if '(?i)' in regex:
regex = regex.replace('(?i)', '')
return re.compile(regex, re.IGNORECASE)
return re.compile(regex)

def _filter_by_keywords(self, line):
matched_regexes = set()
for end_index, regex_values in self.automaton.iter(line):
matched_regexes.update(regex_values)
return matched_regexes

def _get_match_regex(self, line, matched_regex) -> List[re.Match]:
def _get_match_regex(self, line: str,
matched_regex: List[re.Pattern]) -> List[re.Match]:
matches = []
for pattern in matched_regex:
regex = re.compile(pattern)
for regex in matched_regex:
if match := regex.search(line):
matches.append(match)
return matches
Expand Down

0 comments on commit 37e36c7

Please sign in to comment.