Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ci: Use a local httpbin instance for tests #166

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .github/workflows/_unit_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@ jobs:
python-version: ["3.9", "3.10", "3.11", "3.12"]
runs-on: ${{ matrix.os }}

services:
httpbin:
image: kennethreitz/httpbin
ports: 80:80
env:
HTTPBIN_URL: http://httpbin/

steps:
# We need to check out the head commit in case of PRs, and the default ref otherwise (during release).
- name: Checkout repository
Expand Down
40 changes: 20 additions & 20 deletions tests/unit/browsers/test_browser_pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,52 +6,52 @@
from crawlee.browsers.playwright_browser_plugin import PlaywrightBrowserPlugin


async def test_new_page_single_plugin() -> None:
async def test_new_page_single_plugin(httpbin: str) -> None:
plugin = PlaywrightBrowserPlugin(browser_type='chromium')

async with BrowserPool([plugin]) as browser_pool:
assert browser_pool.plugins == [plugin]

page_1 = await browser_pool.new_page()
await page_1.page.goto('https://httpbin.org/get')
await page_1.page.goto(f'{httpbin}/get')
assert page_1.browser_type == 'chromium'
assert page_1.page.url == 'https://httpbin.org/get'
assert page_1.page.url == f'{httpbin}/get'
assert '<html' in await page_1.page.content() # there is some HTML content

page_2 = await browser_pool.new_page()
await page_2.page.goto('https://httpbin.org/status/200')
await page_2.page.goto(f'{httpbin}/status/200')
assert page_2.browser_type == 'chromium'
assert page_2.page.url == 'https://httpbin.org/status/200'
assert page_2.page.url == f'{httpbin}/status/200'
assert '<html' in await page_1.page.content() # there is some HTML content


async def test_new_page_multiple_plugins() -> None:
async def test_new_page_multiple_plugins(httpbin: str) -> None:
plugin_chromium = PlaywrightBrowserPlugin(browser_type='chromium')
plugin_firefox = PlaywrightBrowserPlugin(browser_type='firefox')

async with BrowserPool([plugin_chromium, plugin_firefox]) as browser_pool:
assert browser_pool.plugins == [plugin_chromium, plugin_firefox]

page_1 = await browser_pool.new_page()
await page_1.page.goto('https://httpbin.org/get')
await page_1.page.goto(f'{httpbin}/get')
assert page_1.browser_type == 'chromium'
assert page_1.page.url == 'https://httpbin.org/get'
assert page_1.page.url == f'{httpbin}/get'
assert '<html' in await page_1.page.content() # there is some HTML content

page_2 = await browser_pool.new_page()
await page_2.page.goto('https://httpbin.org/headers')
await page_2.page.goto(f'{httpbin}/headers')
assert page_2.browser_type == 'firefox'
assert page_2.page.url == 'https://httpbin.org/headers'
assert page_2.page.url == f'{httpbin}/headers'
assert '<html' in await page_2.page.content() # there is some HTML content

page_3 = await browser_pool.new_page()
await page_3.page.goto('https://httpbin.org/user-agent')
await page_3.page.goto(f'{httpbin}/user-agent')
assert page_3.browser_type == 'chromium'
assert page_3.page.url == 'https://httpbin.org/user-agent'
assert page_3.page.url == f'{httpbin}/user-agent'
assert '<html' in await page_3.page.content() # there is some HTML content


async def test_new_page_with_each_plugin() -> None:
async def test_new_page_with_each_plugin(httpbin: str) -> None:
plugin_chromium = PlaywrightBrowserPlugin(browser_type='chromium')
plugin_firefox = PlaywrightBrowserPlugin(browser_type='firefox')

Expand All @@ -63,22 +63,22 @@ async def test_new_page_with_each_plugin() -> None:
assert pages[0].browser_type == 'chromium'
assert pages[1].browser_type == 'firefox'

await pages[0].page.goto('https://httpbin.org/get')
assert pages[0].page.url == 'https://httpbin.org/get'
await pages[0].page.goto(f'{httpbin}/get')
assert pages[0].page.url == f'{httpbin}/get'
assert '<html' in await pages[0].page.content() # there is some HTML content

await pages[1].page.goto('https://httpbin.org/headers')
assert pages[1].page.url == 'https://httpbin.org/headers'
await pages[1].page.goto(f'{httpbin}/headers')
assert pages[1].page.url == f'{httpbin}/headers'
assert '<html' in await pages[1].page.content()


async def test_resource_management() -> None:
async def test_resource_management(httpbin: str) -> None:
playwright_plugin = PlaywrightBrowserPlugin(browser_type='chromium')

async with BrowserPool([playwright_plugin]) as browser_pool:
page = await browser_pool.new_page()
await page.page.goto('https://httpbin.org/get')
assert page.page.url == 'https://httpbin.org/get'
await page.page.goto(f'{httpbin}/get')
assert page.page.url == f'{httpbin}/get'
assert '<html' in await page.page.content() # there is some HTML content

# The page should be closed
Expand Down
16 changes: 8 additions & 8 deletions tests/unit/browsers/test_playwright_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
from crawlee.browsers.playwright_browser_plugin import PlaywrightBrowserPlugin


async def test_new_page() -> None:
async def test_new_page(httpbin: str) -> None:
async with PlaywrightBrowserPlugin() as plugin:
# Get a new page with default options
page_1 = await plugin.new_page()
await page_1.goto('https://httpbin.org/get')
assert page_1.url == 'https://httpbin.org/get'
await page_1.goto(f'{httpbin}/get')
assert page_1.url == f'{httpbin}/get'
assert '<html' in await page_1.content() # there is some HTML content

page_options = {
Expand All @@ -22,20 +22,20 @@ async def test_new_page() -> None:
async with PlaywrightBrowserPlugin(page_options=page_options) as plugin:
# Get a new page with custom options
page_2 = await plugin.new_page()
await page_2.goto('https://httpbin.org/user-agent')
assert page_2.url == 'https://httpbin.org/user-agent'
await page_2.goto(f'{httpbin}/user-agent')
assert page_2.url == f'{httpbin}/user-agent'
assert '<html' in await page_2.content() # there is some HTML content


async def test_resource_management() -> None:
async def test_resource_management(httpbin: str) -> None:
async with PlaywrightBrowserPlugin() as plugin:
assert plugin.browser is not None
# Browser should be connected
assert plugin.browser.is_connected() is True

page = await plugin.new_page()
await page.goto('https://httpbin.org/get')
assert page.url == 'https://httpbin.org/get'
await page.goto(f'{httpbin}/get')
assert page.url == f'{httpbin}/get'
assert '<html' in await page.content() # there is some HTML content

# The page should be closed
Expand Down
5 changes: 5 additions & 0 deletions tests/unit/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,8 @@ def memory_storage_client(tmp_path: Path) -> MemoryStorageClient:
crawlee_local_storage_dir=str(tmp_path), # type: ignore
)
return MemoryStorageClient(cfg)


@pytest.fixture()
def httpbin() -> str:
return os.environ.get('HTTPBIN_URL', 'https://httpbin.org')
8 changes: 4 additions & 4 deletions tests/unit/http_crawler/test_http_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,17 +107,17 @@ async def test_handles_server_error(
assert server['500_endpoint'].called


async def test_stores_cookies() -> None:
async def test_stores_cookies(httpbin: str) -> None:
visit = Mock()
track_session_usage = Mock()

session_pool = SessionPool(max_pool_size=1)
crawler = HttpCrawler(
request_provider=RequestList(
[
'https://httpbin.org/cookies/set?a=1',
'https://httpbin.org/cookies/set?b=2',
'https://httpbin.org/cookies/set?c=3',
f'{httpbin}/cookies/set?a=1',
f'{httpbin}/cookies/set?b=2',
f'{httpbin}/cookies/set?c=3',
]
),
session_pool=session_pool,
Expand Down
8 changes: 4 additions & 4 deletions tests/unit/httpx_client/test_httpx_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,9 @@ async def proxy(proxy_info: ProxyInfo) -> AsyncGenerator[ProxyInfo, None]:
yield proxy_info


async def test_proxy(proxy: ProxyInfo) -> None:
async def test_proxy(proxy: ProxyInfo, httpbin: str) -> None:
client = HttpxClient()
request = Request(url='https://httpbin.org/status/222', unique_key='42', id='42', user_data={})
request = Request(url=f'{httpbin}/status/222', unique_key='42', id='42', user_data={})

async with Statistics() as statistics:
result = await client.crawl(request, None, proxy, statistics)
Expand All @@ -65,9 +65,9 @@ async def disabled_proxy(proxy_info: ProxyInfo) -> AsyncGenerator[ProxyInfo, Non
yield proxy_info


async def test_proxy_disabled(disabled_proxy: ProxyInfo) -> None:
async def test_proxy_disabled(disabled_proxy: ProxyInfo, httpbin: str) -> None:
client = HttpxClient()
request = Request(url='https://httpbin.org/status/222', unique_key='42', id='42', user_data={})
request = Request(url=f'{httpbin}/status/222', unique_key='42', id='42', user_data={})

with pytest.raises(ProxyError):
async with Statistics() as statistics:
Expand Down
6 changes: 3 additions & 3 deletions tests/unit/playwright_crawler/test_playwright_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
from crawlee.playwright_crawler import PlaywrightCrawlingContext


async def test_basic_request() -> None:
request_provider = RequestList(['https://httpbin.org/'])
async def test_basic_request(httpbin: str) -> None:
request_provider = RequestList([f'{httpbin}/'])
crawler = PlaywrightCrawler(request_provider=request_provider)
result: dict = {}

Expand All @@ -24,6 +24,6 @@ async def request_handler(context: PlaywrightCrawlingContext) -> None:

await crawler.run()

assert result.get('request_url') == result.get('page_url') == 'https://httpbin.org/'
assert result.get('request_url') == result.get('page_url') == f'{httpbin}/'
assert 'httpbin' in result.get('page_title', '')
assert '<html' in result.get('page_content', '') # there is some HTML content
Loading