Skip to content

Commit

Permalink
crawler: remove async_post, now handled by async_request directly
Browse files Browse the repository at this point in the history
  • Loading branch information
devl00p committed Sep 22, 2024
1 parent 74fb706 commit e5793bc
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 79 deletions.
2 changes: 1 addition & 1 deletion doc/cookies_and_scripts_auth.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ async def run(crawler_configuration: CrawlerConfiguration, auth_url: str,headles
enctype="application/json"
)
# Send it
response = await crawler.async_post(request)
response = await crawler.async_send(request)
data = response.json
if not data:
print("authentication failed")
Expand Down
46 changes: 46 additions & 0 deletions tests/net/test_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,3 +360,49 @@ async def test_async_send():
assert response.headers.get("abc") == "123"
assert "user-agent" in request.headers
assert request.headers.get("foo") == "bar"


@respx.mock
@pytest.mark.asyncio
async def test_async_put_enctype():
request = Request("http://perdu.com/", "PUT", post_params='{"id": 31337}', enctype="application/json")

route = respx.put("http://perdu.com/").mock(
return_value=httpx.Response(
status_code=200,
text="Whatever",
)
)

crawler_configuration = CrawlerConfiguration(Request("http://perdu.com/"), timeout=1)
async with AsyncCrawler.with_configuration(crawler_configuration) as crawler:
await crawler.async_send(request)

assert route.called
# Check if the request headers contain the expected headers
caught_request = route.calls[0].request
assert caught_request.headers["Content-Type"] == "application/json"


@respx.mock
@pytest.mark.asyncio
async def test_async_put_missing_enctype():
request = Request("http://perdu.com/", "PUT", post_params='a=b')

route = respx.put("http://perdu.com/").mock(
return_value=httpx.Response(
status_code=200,
text="Whatever",
)
)

crawler_configuration = CrawlerConfiguration(Request("http://perdu.com/"), timeout=1)
async with AsyncCrawler.with_configuration(crawler_configuration) as crawler:
await crawler.async_send(request)

# Check if the request was made
assert route.called
# Check if the request headers contain the expected headers
caught_request = route.calls[0].request
assert caught_request.headers["Content-Type"] == "application/x-www-form-urlencoded"
assert caught_request.content == b"a=b"
93 changes: 15 additions & 78 deletions wapitiCore/net/crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,8 +249,9 @@ async def async_get(
return Response(response)

@retry(delay=1, times=3)
async def async_post(
async def async_request(
self,
method: str,
form: web.Request,
follow_redirects: bool = False,
headers: dict = None,
Expand All @@ -259,6 +260,7 @@ async def async_post(
) -> Response:
"""Submit the given form, returns a Response on success, None otherwise.
@type method: str
@type form: web.Request
@type follow_redirects: bool
@type headers: dict
Expand All @@ -267,10 +269,11 @@ async def async_post(
@rtype: Response
"""
form_headers = {}

if form.enctype and not form.is_multipart:
form_headers = {"Content-Type": form.enctype}

if isinstance(headers, (dict, httpx.Headers)) and headers:
if isinstance(headers, dict) and headers:
form_headers.update(headers)

if form.referer:
Expand All @@ -296,77 +299,12 @@ async def async_post(
post_params = None

request = self._client.build_request(
"POST",
method,
form.path,
params=form.get_params,
data=post_params, # httpx expects a dict, hope to see more types soon
content=content,
files=file_params or None,
headers=form_headers,
timeout=self.timeout if timeout is None else httpx.Timeout(timeout)
)
try:
response = await self._client.send(
request, stream=stream, follow_redirects=follow_redirects
)
except httpx.TransportError as exception:
if "Read timed out" in str(exception):
raise httpx.ReadTimeout("Request time out", request=None)

raise exception

return Response(response)

@retry(delay=1, times=3)
async def async_request(
self,
method: str,
form: web.Request,
follow_redirects: bool = False,
headers: dict = None,
stream: bool = False,
timeout: float = None,
) -> Response:
"""Submit the given form, returns a Response on success, None otherwise.
@type method: str
@type form: web.Request
@type follow_redirects: bool
@type headers: dict
@type stream: bool
@type timeout: float
@rtype: Response
"""
form_headers = {}

if form.enctype and not form.is_multipart:
form_headers = {"Content-Type": form.enctype}

if isinstance(headers, dict) and headers:
form_headers.update(headers)

if form.referer:
form_headers["referer"] = form.referer

post_params = form.post_params
content = None

if post_params:
if isinstance(post_params, str):
content = post_params
post_params = None
else:
content = None
post_params = dict(post_params)
else:
post_params = None

request = self._client.build_request(
method,
form.url,
data=post_params,
content=content,
files=form.file_params or None,
files=file_params,
headers=form_headers,
timeout=self.timeout if timeout is None else httpx.Timeout(timeout)
)
Expand All @@ -391,22 +329,21 @@ async def async_send(
timeout: float = None
) -> Response:
if request.method == "GET":
response = await self.async_get(request, headers=headers,
follow_redirects=follow_redirects, stream=stream,
timeout=timeout
)
elif request.method == "POST":
response = await self.async_post(
response = await self.async_get(
request,
headers=headers,
follow_redirects=follow_redirects,
stream=stream,
timeout=timeout
)
else:
response = await self.async_request(request.method, request,
headers=headers, follow_redirects=follow_redirects,
stream=stream, timeout=timeout
response = await self.async_request(
request.method,
request,
headers=headers,
follow_redirects=follow_redirects,
stream=stream,
timeout=timeout
)

request.set_cookies(self._client.cookies)
Expand Down

0 comments on commit e5793bc

Please sign in to comment.