From e5793bc5f8e6ce6eab5a3d7fe8752d26f721e7c5 Mon Sep 17 00:00:00 2001 From: devloop Date: Sun, 22 Sep 2024 14:16:55 +0200 Subject: [PATCH] crawler: remove async_post, now handled by async_request directly --- doc/cookies_and_scripts_auth.md | 2 +- tests/net/test_crawler.py | 46 ++++++++++++++++ wapitiCore/net/crawler.py | 93 ++++++--------------------------- 3 files changed, 62 insertions(+), 79 deletions(-) diff --git a/doc/cookies_and_scripts_auth.md b/doc/cookies_and_scripts_auth.md index 343ef8878..d6262864d 100644 --- a/doc/cookies_and_scripts_auth.md +++ b/doc/cookies_and_scripts_auth.md @@ -74,7 +74,7 @@ async def run(crawler_configuration: CrawlerConfiguration, auth_url: str,headles enctype="application/json" ) # Send it - response = await crawler.async_post(request) + response = await crawler.async_send(request) data = response.json if not data: print("authentication failed") diff --git a/tests/net/test_crawler.py b/tests/net/test_crawler.py index 8959ebf26..a12b37d9c 100644 --- a/tests/net/test_crawler.py +++ b/tests/net/test_crawler.py @@ -360,3 +360,49 @@ async def test_async_send(): assert response.headers.get("abc") == "123" assert "user-agent" in request.headers assert request.headers.get("foo") == "bar" + + +@respx.mock +@pytest.mark.asyncio +async def test_async_put_enctype(): + request = Request("http://perdu.com/", "PUT", post_params='{"id": 31337}', enctype="application/json") + + route = respx.put("http://perdu.com/").mock( + return_value=httpx.Response( + status_code=200, + text="Whatever", + ) + ) + + crawler_configuration = CrawlerConfiguration(Request("http://perdu.com/"), timeout=1) + async with AsyncCrawler.with_configuration(crawler_configuration) as crawler: + await crawler.async_send(request) + + assert route.called + # Check if the request headers contain the expected headers + caught_request = route.calls[0].request + assert caught_request.headers["Content-Type"] == "application/json" + + +@respx.mock +@pytest.mark.asyncio +async def test_async_put_missing_enctype(): + request = Request("http://perdu.com/", "PUT", post_params='a=b') + + route = respx.put("http://perdu.com/").mock( + return_value=httpx.Response( + status_code=200, + text="Whatever", + ) + ) + + crawler_configuration = CrawlerConfiguration(Request("http://perdu.com/"), timeout=1) + async with AsyncCrawler.with_configuration(crawler_configuration) as crawler: + await crawler.async_send(request) + + # Check if the request was made + assert route.called + # Check if the request headers contain the expected headers + caught_request = route.calls[0].request + assert caught_request.headers["Content-Type"] == "application/x-www-form-urlencoded" + assert caught_request.content == b"a=b" diff --git a/wapitiCore/net/crawler.py b/wapitiCore/net/crawler.py index 2560c6389..c83c05965 100644 --- a/wapitiCore/net/crawler.py +++ b/wapitiCore/net/crawler.py @@ -249,8 +249,9 @@ async def async_get( return Response(response) @retry(delay=1, times=3) - async def async_post( + async def async_request( self, + method: str, form: web.Request, follow_redirects: bool = False, headers: dict = None, @@ -259,6 +260,7 @@ async def async_post( ) -> Response: """Submit the given form, returns a Response on success, None otherwise. + @type method: str @type form: web.Request @type follow_redirects: bool @type headers: dict @@ -267,10 +269,11 @@ async def async_post( @rtype: Response """ form_headers = {} + if form.enctype and not form.is_multipart: form_headers = {"Content-Type": form.enctype} - if isinstance(headers, (dict, httpx.Headers)) and headers: + if isinstance(headers, dict) and headers: form_headers.update(headers) if form.referer: @@ -296,77 +299,12 @@ async def async_post( post_params = None request = self._client.build_request( - "POST", + method, form.path, params=form.get_params, data=post_params, # httpx expects a dict, hope to see more types soon content=content, - files=file_params or None, - headers=form_headers, - timeout=self.timeout if timeout is None else httpx.Timeout(timeout) - ) - try: - response = await self._client.send( - request, stream=stream, follow_redirects=follow_redirects - ) - except httpx.TransportError as exception: - if "Read timed out" in str(exception): - raise httpx.ReadTimeout("Request time out", request=None) - - raise exception - - return Response(response) - - @retry(delay=1, times=3) - async def async_request( - self, - method: str, - form: web.Request, - follow_redirects: bool = False, - headers: dict = None, - stream: bool = False, - timeout: float = None, - ) -> Response: - """Submit the given form, returns a Response on success, None otherwise. - - @type method: str - @type form: web.Request - @type follow_redirects: bool - @type headers: dict - @type stream: bool - @type timeout: float - @rtype: Response - """ - form_headers = {} - - if form.enctype and not form.is_multipart: - form_headers = {"Content-Type": form.enctype} - - if isinstance(headers, dict) and headers: - form_headers.update(headers) - - if form.referer: - form_headers["referer"] = form.referer - - post_params = form.post_params - content = None - - if post_params: - if isinstance(post_params, str): - content = post_params - post_params = None - else: - content = None - post_params = dict(post_params) - else: - post_params = None - - request = self._client.build_request( - method, - form.url, - data=post_params, - content=content, - files=form.file_params or None, + files=file_params, headers=form_headers, timeout=self.timeout if timeout is None else httpx.Timeout(timeout) ) @@ -391,12 +329,7 @@ async def async_send( timeout: float = None ) -> Response: if request.method == "GET": - response = await self.async_get(request, headers=headers, - follow_redirects=follow_redirects, stream=stream, - timeout=timeout - ) - elif request.method == "POST": - response = await self.async_post( + response = await self.async_get( request, headers=headers, follow_redirects=follow_redirects, @@ -404,9 +337,13 @@ async def async_send( timeout=timeout ) else: - response = await self.async_request(request.method, request, - headers=headers, follow_redirects=follow_redirects, - stream=stream, timeout=timeout + response = await self.async_request( + request.method, + request, + headers=headers, + follow_redirects=follow_redirects, + stream=stream, + timeout=timeout ) request.set_cookies(self._client.cookies)