Skip to content

Commit

Permalink
ci: Make integrations tests more stable (#311)
Browse files Browse the repository at this point in the history
Both the build Actor step and the run Actor step.

Closes #301
  • Loading branch information
vdusek authored Oct 30, 2024
1 parent e200a8e commit 80a2a7f
Show file tree
Hide file tree
Showing 17 changed files with 550 additions and 396 deletions.
13 changes: 6 additions & 7 deletions .github/workflows/run_release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,11 @@ jobs:
needs: [should_release]
uses: ./.github/workflows/_version_conflict_check.yaml

# tmp disabled due to instability
# integration_tests:
# name: Integration tests
# needs: [should_release]
# uses: apify/workflows/.github/workflows/python_integration_tests.yaml@main
# secrets: inherit
integration_tests:
name: Integration tests
needs: [should_release]
uses: apify/workflows/.github/workflows/python_integration_tests.yaml@main
secrets: inherit

publish_to_pypi:
name: Publish to PyPI
Expand All @@ -81,7 +80,7 @@ jobs:
unit_tests,
changelog_entry_check,
version_conflict_check,
# integration_tests, # tmp disabled due to instability
integration_tests,
]
runs-on: ubuntu-latest
permissions:
Expand Down
8 changes: 4 additions & 4 deletions src/apify/scrapy/middlewares/apify_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class ApifyHttpProxyMiddleware:
proxy_settings = {'useApifyProxy': true, 'apifyProxyGroups': []}
"""

def __init__(self: ApifyHttpProxyMiddleware, proxy_settings: dict) -> None:
def __init__(self, proxy_settings: dict) -> None:
"""Create a new instance.
Args:
Expand Down Expand Up @@ -66,7 +66,7 @@ def from_crawler(cls: type[ApifyHttpProxyMiddleware], crawler: Crawler) -> Apify

return cls(proxy_settings)

async def process_request(self: ApifyHttpProxyMiddleware, request: Request, spider: Spider) -> None:
async def process_request(self, request: Request, spider: Spider) -> None:
"""Process a Scrapy request by assigning a new proxy.
Args:
Expand All @@ -89,7 +89,7 @@ async def process_request(self: ApifyHttpProxyMiddleware, request: Request, spid
Actor.log.debug(f'ApifyHttpProxyMiddleware.process_request: updated request.meta={request.meta}')

def process_exception(
self: ApifyHttpProxyMiddleware,
self,
request: Request,
exception: Exception,
spider: Spider,
Expand All @@ -116,7 +116,7 @@ def process_exception(
'reason="{exception}", skipping...'
)

async def _get_new_proxy_url(self: ApifyHttpProxyMiddleware) -> ParseResult:
async def _get_new_proxy_url(self) -> ParseResult:
"""Get a new proxy URL.
Raises:
Expand Down
2 changes: 1 addition & 1 deletion src/apify/scrapy/pipelines/actor_dataset_push.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class ActorDatasetPushPipeline:
"""

async def process_item(
self: ActorDatasetPushPipeline,
self,
item: Item,
spider: Spider,
) -> Item:
Expand Down
10 changes: 5 additions & 5 deletions src/apify/scrapy/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class ApifyScheduler(BaseScheduler):
This scheduler requires the asyncio Twisted reactor to be installed.
"""

def __init__(self: ApifyScheduler) -> None:
def __init__(self) -> None:
"""Create a new instance."""
if not is_asyncio_reactor_installed():
raise ValueError(
Expand All @@ -40,7 +40,7 @@ def __init__(self: ApifyScheduler) -> None:
self._rq: RequestQueue | None = None
self.spider: Spider | None = None

def open(self: ApifyScheduler, spider: Spider) -> None: # this has to be named "open"
def open(self, spider: Spider) -> None: # this has to be named "open"
"""Open the scheduler.
Args:
Expand All @@ -58,7 +58,7 @@ async def open_queue() -> RequestQueue:
traceback.print_exc()
raise

def has_pending_requests(self: ApifyScheduler) -> bool:
def has_pending_requests(self) -> bool:
"""Check if the scheduler has any pending requests.
Returns:
Expand All @@ -75,7 +75,7 @@ def has_pending_requests(self: ApifyScheduler) -> bool:

return not is_finished

def enqueue_request(self: ApifyScheduler, request: Request) -> bool:
def enqueue_request(self, request: Request) -> bool:
"""Add a request to the scheduler.
This could be called from either from a spider or a downloader middleware (e.g. redirect, retry, ...).
Expand Down Expand Up @@ -111,7 +111,7 @@ def enqueue_request(self: ApifyScheduler, request: Request) -> bool:
Actor.log.debug(f'[{call_id}]: rq.add_request.result={result}...')
return bool(result.was_already_present)

def next_request(self: ApifyScheduler) -> Request | None:
def next_request(self) -> Request | None:
"""Fetch the next request from the scheduler.
Returns:
Expand Down
84 changes: 38 additions & 46 deletions tests/integration/README.md
Original file line number Diff line number Diff line change
@@ -1,22 +1,16 @@
Integration tests
=================
# Integration tests

We have integration tests which build and run Actors using the Python SDK on the Apify Platform.
To run these tests, you need to set the `APIFY_TEST_USER_API_TOKEN` environment variable to the API token of the Apify user you want to use for the tests,
and then start them with `make integration-tests`.
We have integration tests which build and run Actors using the Python SDK on the Apify Platform. To run these tests, you need to set the `APIFY_TEST_USER_API_TOKEN` environment variable to the API token of the Apify user you want to use for the tests, and then start them with `make integration-tests`.

If you want to run the integration tests on a different environment than the main Apify Platform,
you need to set the `APIFY_INTEGRATION_TESTS_API_URL` environment variable to the right URL to the Apify API you want to use.
If you want to run the integration tests on a different environment than the main Apify Platform, you need to set the `APIFY_INTEGRATION_TESTS_API_URL` environment variable to the right URL to the Apify API you want to use.

How to write tests
------------------
## How to write tests

There are two fixtures which you can use to write tests:

### `apify_client_async`

This fixture just gives you an instance of `ApifyClientAsync` configured with the right token and API URL,
so you don't have to do that yourself.
This fixture just gives you an instance of `ApifyClientAsync` configured with the right token and API URL, so you don't have to do that yourself.

```python
async def test_something(apify_client_async: ApifyClientAsync) -> None:
Expand All @@ -27,64 +21,62 @@ async def test_something(apify_client_async: ApifyClientAsync) -> None:

This fixture returns a factory function for creating Actors on the Apify Platform.

For the Actor source, the fixture takes the files from `tests/integration/actor_source_base`,
builds the Apify SDK wheel from the current codebase,
and adds the Actor source you passed to the fixture as an argument.
You have to pass exactly one of the `main_func`, `main_py` and `source_files` arguments.
For the Actor source, the fixture takes the files from `tests/integration/actor_source_base`, builds the Apify SDK wheel from the current codebase, and adds the Actor source you passed to the fixture as an argument. You have to pass exactly one of the `main_func`, `main_py` and `source_files` arguments.

The created Actor will be uploaded to the platform, built there, and after the test finishes, it will be automatically deleted.
If the Actor build fails, it will not be deleted, so that you can check why the build failed.
The created Actor will be uploaded to the platform, built there, and after the test finishes, it will be automatically deleted. If the Actor build fails, it will not be deleted, so that you can check why the build failed.

### Creating test Actor straight from a Python function

You can create Actors straight from a Python function.
This is great because you can have the test Actor source code checked with the linter.
You can create Actors straight from a Python function. This is great because you can have the test Actor source code checked with the linter.

```python
async def test_something(self, make_actor: ActorFactory) -> None:
async def test_something(
make_actor: MakeActorFunction,
run_actor: RunActorFunction,
) -> None:
async def main() -> None:
async with Actor:
print('Hello!')

actor = await make_actor('something', main_func=main)
actor = await make_actor(label='something', main_func=main)
run_result = await run_actor(actor)

run_result = await actor.call()

assert run_result is not None
assert run_result['status'] == 'SUCCEEDED'
assert run_result.status == 'SUCCEEDED'
```

These Actors will have the `src/main.py` file set to the `main` function definition,
prepended with `import asyncio` and `from apify import Actor`, for your convenience.
These Actors will have the `src/main.py` file set to the `main` function definition, prepended with `import asyncio` and `from apify import Actor`, for your convenience.

You can also pass extra imports directly to the main function:

```python
async def test_something(self, make_actor: ActorFactory) -> None:
async def test_something(
make_actor: MakeActorFunction,
run_actor: RunActorFunction,
) -> None:
async def main():
import os
from apify_shared.consts import ActorEventTypes, ActorEnvVars
async with Actor:
print('The Actor is running with ' + os.getenv(ActorEnvVars.MEMORY_MBYTES) + 'MB of memory')
await Actor.on(ActorEventTypes.SYSTEM_INFO, lambda event_data: print(event_data))

actor = await make_actor('something', main_func=main)

run_result = await actor.call()
actor = await make_actor(label='something', main_func=main)
run_result = await run_actor(actor)

assert run_result is not None
assert run_result['status'] == 'SUCCEEDED'
assert run_result.status == 'SUCCEEDED'
```

### Creating Actor from source files

You can also pass the source files directly if you need something more complex
(e.g. pass some fixed value to the Actor source code or use multiple source files).
You can also pass the source files directly if you need something more complex (e.g. pass some fixed value to the Actor source code or use multiple source files).

To pass the source code of the `src/main.py` file directly, use the `main_py` argument to `make_actor`:

```python
async def test_something(self, make_actor: ActorFactory) -> None:
async def test_something(
make_actor: MakeActorFunction,
run_actor: RunActorFunction,
) -> None:
expected_output = f'ACTOR_OUTPUT_{crypto_random_object_id(5)}'
main_py_source = f"""
import asyncio
Expand All @@ -96,21 +88,22 @@ async def test_something(self, make_actor: ActorFactory) -> None:
await Actor.set_value('OUTPUT', '{expected_output}')
"""

actor = await make_actor('something', main_py=main_py_source)

await actor.call()
actor = await make_actor(label='something', main_py=main_py_source)
await run_actor(actor)

output_record = await actor.last_run().key_value_store().get_record('OUTPUT')
assert output_record is not None
assert output_record['value'] == expected_output

```

Or you can pass multiple source files with the `source_files` argument,
if you need something really complex:
Or you can pass multiple source files with the `source_files` argument, if you need something really complex:

```python
async def test_something(self, make_actor: ActorFactory) -> None:
async def test_something(
make_actor: MakeActorFunction,
run_actor: RunActorFunction,
) -> None:
actor_source_files = {
'src/utils.py': """
from datetime import datetime, timezone
Expand All @@ -129,9 +122,8 @@ async def test_something(self, make_actor: ActorFactory) -> None:
print('Hello! It is ' + current_datetime.time())
""",
}
actor = await make_actor('something', source_files=actor_source_files)
actor = await make_actor(label='something', source_files=actor_source_files)
actor_run = await run_actor(actor)

actor_run = await actor.call()
assert actor_run is not None
assert actor_run['status'] == 'SUCCEEDED'
assert actor_run.status == 'SUCCEEDED'
```
Loading

0 comments on commit 80a2a7f

Please sign in to comment.