diff --git a/proxy_scraper_checker/constants.py b/proxy_scraper_checker/constants.py new file mode 100644 index 000000000..e3e2e8184 --- /dev/null +++ b/proxy_scraper_checker/constants.py @@ -0,0 +1,12 @@ +from __future__ import annotations + +from types import MappingProxyType + +from aiohttp import hdrs + +DEFAULT_CHECK_WEBSITE = "http://ip-api.com/json/?fields=8217" +HEADERS: MappingProxyType[str, str] = MappingProxyType({ + hdrs.USER_AGENT: ( + "Mozilla/5.0 (Windows NT 10.0; rv:120.0) Gecko/20100101 Firefox/120.0" + ) +}) diff --git a/proxy_scraper_checker/proxy.py b/proxy_scraper_checker/proxy.py index a2754794e..e9ebd30b9 100644 --- a/proxy_scraper_checker/proxy.py +++ b/proxy_scraper_checker/proxy.py @@ -3,22 +3,15 @@ import asyncio from dataclasses import dataclass from time import perf_counter -from types import MappingProxyType from typing import Union from aiohttp import ClientSession, ClientTimeout from aiohttp.abc import AbstractCookieJar from aiohttp_socks import ProxyConnector, ProxyType +from .constants import HEADERS from .null_context import AsyncNullContext -DEFAULT_CHECK_WEBSITE = "http://ip-api.com/json/?fields=8217" -HEADERS = MappingProxyType({ - "User-Agent": ( - "Mozilla/5.0 (Windows NT 10.0; rv:120.0) Gecko/20100101 Firefox/120.0" - ) -}) - @dataclass(repr=False, unsafe_hash=True) class Proxy: @@ -35,9 +28,8 @@ async def check( cookie_jar: AbstractCookieJar, proto: ProxyType, timeout: ClientTimeout, + set_geolocation: bool, ) -> None: - if website == "default": - website = DEFAULT_CHECK_WEBSITE async with sem: start = perf_counter() connector = ProxyConnector( @@ -51,14 +43,14 @@ async def check( ) as session, session.get( website, raise_for_status=True ) as response: - if website == DEFAULT_CHECK_WEBSITE: + if set_geolocation: await response.read() self.timeout = perf_counter() - start - if website == DEFAULT_CHECK_WEBSITE: + if set_geolocation: data = await response.json(content_type=None) self.is_anonymous = self.host != data["query"] - self.geolocation = "|{}|{}|{}".format( - data["country"], data["regionName"], data["city"] + self.geolocation = ( + f"|{data['country']}|{data['regionName']}|{data['city']}" ) def as_str(self, *, include_geolocation: bool) -> str: diff --git a/proxy_scraper_checker/proxy_scraper_checker.py b/proxy_scraper_checker/proxy_scraper_checker.py index 73ea31e24..69f74bc82 100644 --- a/proxy_scraper_checker/proxy_scraper_checker.py +++ b/proxy_scraper_checker/proxy_scraper_checker.py @@ -32,9 +32,10 @@ from typing_extensions import Self from . import sort, validators +from .constants import DEFAULT_CHECK_WEBSITE, HEADERS from .folder import Folder from .null_context import AsyncNullContext -from .proxy import HEADERS, Proxy +from .proxy import Proxy logger = logging.getLogger(__name__) @@ -47,6 +48,7 @@ class ProxyScraperChecker: "console", "cookie_jar", "folders", + "geolocation_enabled", "path", "proxies_count", "proxies", @@ -112,7 +114,12 @@ def __init__( self.path = save_path self.folders = folders - if self.check_website != "default": + if self.check_website == "default": + self.check_website = DEFAULT_CHECK_WEBSITE + + if self.check_website == DEFAULT_CHECK_WEBSITE: + validators.folders(self.folders) + else: validators.check_website(check_website) logger.info( "CheckWebsite is not 'default', " @@ -123,8 +130,13 @@ def __init__( folder.is_enabled = ( not folder.for_anonymous and not folder.for_geolocation ) - else: - validators.folders(self.folders) + + self.geolocation_enabled = any( + self.check_website == DEFAULT_CHECK_WEBSITE + and folder.is_enabled + and folder.for_geolocation + for folder in self.folders + ) self.sources: Dict[ProxyType, FrozenSet[str]] = { proto: frozenset(filter(None, sources.splitlines())) @@ -292,6 +304,7 @@ async def check_proxy( cookie_jar=self.cookie_jar, proto=proto, timeout=self.timeout, + set_geolocation=self.geolocation_enabled, ) except Exception as e: # Too many open files