Skip to content

Commit

Permalink
Allow disabling checker
Browse files Browse the repository at this point in the history
  • Loading branch information
monosans committed Sep 18, 2024
1 parent aa57dbf commit 88ca641
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 17 deletions.
2 changes: 2 additions & 0 deletions config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ max_connections = 512
# Example:
# check_website = "https://google.com"
# check_website = "https://httpbin.smp.io/ip"
# Disable checking:
# check_website = ""
check_website = "https://checkip.amazonaws.com"

# Set to false to sort proxies alphabetically.
Expand Down
16 changes: 9 additions & 7 deletions proxy_scraper_checker/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,15 +153,17 @@ async def main() -> None:
await session.close()
count_before_checking = storage.get_count()
should_save = True
await checker.check_all(
settings=settings,
storage=storage,
progress=progress,
proxies_count=count_before_checking,
)
if settings.check_website:
await checker.check_all(
settings=settings,
storage=storage,
progress=progress,
proxies_count=count_before_checking,
)
finally:
if should_save:
storage.remove_unchecked()
if settings.check_website:
storage.remove_unchecked()
count_after_checking = storage.get_count()
console.print(
get_summary_table(
Expand Down
26 changes: 16 additions & 10 deletions proxy_scraper_checker/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,8 @@ async def _get_check_website_type_and_real_ip(
Literal[CheckWebsiteType.PLAIN_IP, CheckWebsiteType.HTTPBIN_IP], str
]
):
if not check_website:
return CheckWebsiteType.UNKNOWN, None
try:
async with session.get(check_website) as response:
content = await response.read()
Expand Down Expand Up @@ -252,16 +254,20 @@ def _validate_check_website(
value: str,
/,
) -> None:
parsed_url = urlparse(value)
if parsed_url.scheme not in {"http", "https"} or not parsed_url.netloc:
msg = f"invalid check_website: {value}"
raise ValueError(msg)

if parsed_url.scheme == "http":
logger.warning(
"check_website uses the http protocol. "
"It is recommended to use https for correct checking."
)
if value:
parsed_url = urlparse(value)
if (
parsed_url.scheme not in {"http", "https"}
or not parsed_url.netloc
):
msg = f"invalid check_website: {value}"
raise ValueError(msg)

if parsed_url.scheme == "http":
logger.warning(
"check_website uses the http protocol. "
"It is recommended to use https for correct checking."
)

@timeout.validator
def _validate_timeout(
Expand Down

0 comments on commit 88ca641

Please sign in to comment.