diff --git a/autoreporter_addons/ssl_checks/filtered_website_fragments_for_bad_redirect.txt b/autoreporter_addons/ssl_checks/filtered_website_fragments_waf_or_ratelimits.txt
similarity index 59%
rename from autoreporter_addons/ssl_checks/filtered_website_fragments_for_bad_redirect.txt
rename to autoreporter_addons/ssl_checks/filtered_website_fragments_waf_or_ratelimits.txt
index ddb0415..52f7019 100644
--- a/autoreporter_addons/ssl_checks/filtered_website_fragments_for_bad_redirect.txt
+++ b/autoreporter_addons/ssl_checks/filtered_website_fragments_waf_or_ratelimits.txt
@@ -1 +1,2 @@
Please wait while your request is being verified...
+
Unauthorized Access
diff --git a/autoreporter_addons/ssl_checks/reporter.py b/autoreporter_addons/ssl_checks/reporter.py
index 1e54f23..9007119 100644
--- a/autoreporter_addons/ssl_checks/reporter.py
+++ b/autoreporter_addons/ssl_checks/reporter.py
@@ -25,12 +25,16 @@
with open(str(Path(__file__).parents[0] / "filtered_website_fragments.txt"), "r") as f:
FILTERED_WEBSITE_FRAGMENTS = [line.strip() for line in f.readlines() if line]
-with open(str(Path(__file__).parents[0] / "filtered_website_fragments_for_bad_redirect.txt"), "r") as f:
- # These fragments, if occur, mean that we shouldn't treat this website as containing a bad redirect.
+with open(str(Path(__file__).parents[0] / "filtered_website_fragments_waf_or_ratelimits.txt"), "r") as f:
+ # These fragments, if occur, mean that we shouldn't report SSL problems for this site.
# For instance, if Cloudflare returned HTTP 200 with a message "Please wait while your request is being verified...",
- # that doesn't meant that the original website doesn't redirect to https:// - that means only, that our request
- # got intercepted via Cloudflare.
- FILTERED_WEBSITE_FRAGMENTS_FOR_BAD_REDIRECT = [line.strip() for line in f.readlines() if line]
+ # that tells us that we don't know what was the original site content - maybe something unimportant
+ # that would get filtered by FILTERED_WEBSITE_FRAGMENTS?
+ #
+ # Therefore, to keep the number of false positives low, we don't report such sites.
+ # **In case the decision changes, let's keep the list of WAF- or ratelimit-related matchers in a separate
+ # file.**
+ FILTERED_WEBSITE_FRAGMENTS_WAF_OR_RATELIMITS = [line.strip() for line in f.readlines() if line]
class SSLChecksReporter(Reporter): # type: ignore
@@ -81,8 +85,15 @@ def create_reports(task_result: Dict[str, Any], language: Language) -> List[Repo
# This one is important - sometimes we reported false positives after getting a 5xx error (and thus no redirect)
or (response_status_code >= 500 and response_status_code <= 599)
)
- filter_by_content = " List[Repo
)
if result.get("bad_redirect", False):
response_content_prefix = result.get("response_content_prefix", "")
- if not any(
- [fragment in response_content_prefix for fragment in FILTERED_WEBSITE_FRAGMENTS_FOR_BAD_REDIRECT]
- ):
- # If there is some kind of HTML redirect, let's better not report that, as it might be
- # a proper SSL redirect - here, we want to decrease the number of false positives at the
- # cost of true positives.
- try:
- soup = BeautifulSoup(response_content_prefix.lower(), "html.parser")
- except Exception: # parsing errors
- logger.exception("Unable to parse HTML from %s", payload["domain"])
- soup = None
-
- if not soup or not soup.find_all("meta", attrs={"http-equiv": "refresh"}):
- reports.append(
- Report(
- top_level_target=get_top_level_target(task_result),
- target=f'http://{payload["domain"]}:80/',
- report_type=SSLChecksReporter.NO_HTTPS_REDIRECT,
- additional_data={},
- timestamp=task_result["created_at"],
- )
+ # If there is some kind of HTML redirect, let's better not report that, as it might be
+ # a proper SSL redirect - here, we want to decrease the number of false positives at the
+ # cost of true positives.
+ try:
+ soup = BeautifulSoup(response_content_prefix.lower(), "html.parser")
+ except Exception: # parsing errors
+ logger.exception("Unable to parse HTML from %s", payload["domain"])
+ soup = None
+
+ if not soup or not soup.find_all("meta", attrs={"http-equiv": "refresh"}):
+ reports.append(
+ Report(
+ top_level_target=get_top_level_target(task_result),
+ target=f'http://{payload["domain"]}:80/',
+ report_type=SSLChecksReporter.NO_HTTPS_REDIRECT,
+ additional_data={},
+ timestamp=task_result["created_at"],
)
+ )
if result.get("cn_different_from_hostname", False):
# If the domain starts with www. but the version without www. is in the names list,