diff --git a/wapitiCore/net/crawler.py b/wapitiCore/net/crawler.py index 884aec545..1e0151155 100644 --- a/wapitiCore/net/crawler.py +++ b/wapitiCore/net/crawler.py @@ -592,9 +592,7 @@ def __init__(self, crawler_instance: AsyncCrawler, stop_event: asyncio.Event, pa # Event to stop processing tasks self._stopped = stop_event - # CPU count + 4 is default concurrent tasks for CPython ThreadPoolExecutor with a high limit set at 32 - self._max_tasks = min(parallelism, 32, (cpu_count() or 1) + 4) - self._max_tasks += round(self._max_tasks / 2) + self._max_tasks = min(parallelism, 32) @property def max_depth(self) -> int: @@ -824,7 +822,7 @@ async def async_analyze(self, request) -> Tuple[bool, List]: # Sur les ressources statiques le content-length est généralement indiqué if self._max_page_size > 0: if page.raw_size > self._max_page_size: - page.clean() + await page.clean() return False, [] await asyncio.sleep(0) diff --git a/wapitiCore/net/page.py b/wapitiCore/net/page.py index db83e8003..1865249bf 100644 --- a/wapitiCore/net/page.py +++ b/wapitiCore/net/page.py @@ -294,11 +294,11 @@ def soup(self): self._soup = BeautifulSoup('', parser_name) return self._soup - def clean(self): + async def clean(self): if self._soup is not None: self._soup.decompose() del self._soup - self._response.close() + await self._response.aclose() @property @lru_cache(maxsize=2) @@ -565,10 +565,10 @@ def is_visible(element): def text_only_md5(self) -> str: return md5(self.text_only.encode(errors="ignore")).hexdigest() - def empty(self): + async def empty(self): """Modify the current Page object to make it appears as if the content-length was 0.""" self._is_empty = True - self.clean() + await self.clean() @property def encoding(self):