diff --git a/.github/workflows/k3d-nightly-ci.yaml b/.github/workflows/k3d-nightly-ci.yaml index c350f41e1c..3060fcba2c 100644 --- a/.github/workflows/k3d-nightly-ci.yaml +++ b/.github/workflows/k3d-nightly-ci.yaml @@ -8,8 +8,24 @@ on: workflow_dispatch: jobs: + collect-test-modules: + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + steps: + - uses: actions/checkout@v3 + - id: set-matrix + run: | + echo matrix="$(ls ./backend/test_nightly/ | grep -o "^test_.*" | jq -R -s -c 'split("\n")[:-1]')" >> $GITHUB_OUTPUT + btrix-k3d-nightly-test: + name: ${{ matrix.module }} + needs: collect-test-modules runs-on: ubuntu-latest + strategy: + matrix: + module: ${{fromJSON(needs.collect-test-modules.outputs.matrix)}} + fail-fast: false steps: - name: Create k3d Cluster uses: AbsaOSS/k3d-action@v2 @@ -82,7 +98,7 @@ jobs: run: kubectl exec -i deployment/local-minio -c minio -- mkdir /data/replica-0 - name: Run Tests - run: pytest -vv ./backend/test_nightly/test_*.py + run: pytest -vv ./backend/test_nightly/${{ matrix.module }} - name: Print Backend Logs (API) if: ${{ failure() }} diff --git a/backend/test_nightly/conftest.py b/backend/test_nightly/conftest.py index 275217a210..28eb4a8b94 100644 --- a/backend/test_nightly/conftest.py +++ b/backend/test_nightly/conftest.py @@ -320,3 +320,67 @@ def org_with_quotas(admin_auth_headers): data = r.json() return data["id"] + + +@pytest.fixture(scope="session") +def deleted_crawl_id(admin_auth_headers, default_org_id): + # Start crawl. + crawl_data = { + "runNow": True, + "name": "Test crawl", + "config": { + "seeds": [{"url": "https://webrecorder.net/"}], + "limit": 1, + }, + } + r = requests.post( + f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/", + headers=admin_auth_headers, + json=crawl_data, + ) + data = r.json() + + crawl_id = data["run_now_job"] + + # Wait for it to complete + while True: + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{crawl_id}/replay.json", + headers=admin_auth_headers, + ) + data = r.json() + if data["state"] == "complete": + break + time.sleep(5) + + # Wait until replica background job completes + while True: + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}/jobs/?jobType=create-replica&success=True", + headers=admin_auth_headers, + ) + assert r.status_code == 200 + if r.json()["total"] == 1: + break + time.sleep(5) + + # Delete crawl + r = requests.post( + f"{API_PREFIX}/orgs/{default_org_id}/crawls/delete", + headers=admin_auth_headers, + json={"crawl_ids": [crawl_id]}, + ) + assert r.status_code == 200 + + # Wait until delete replica background job completes + while True: + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}/jobs/?jobType=delete-replica&success=True", + headers=admin_auth_headers, + ) + assert r.status_code == 200 + if r.json()["total"] == 1: + break + time.sleep(5) + + return crawl_id diff --git a/backend/test_nightly/test_crawlconfig_crawl_stats.py b/backend/test_nightly/test_crawlconfig_crawl_stats.py index 690ea3bdd2..dc68edd8e9 100644 --- a/backend/test_nightly/test_crawlconfig_crawl_stats.py +++ b/backend/test_nightly/test_crawlconfig_crawl_stats.py @@ -71,17 +71,25 @@ def test_crawlconfig_crawl_stats(admin_auth_headers, default_org_id, crawl_confi data = r.json() assert data["deleted"] - time.sleep(10) - # Verify crawl stats from /crawlconfigs - r = requests.get( - f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/{crawl_config_id}", - headers=admin_auth_headers, - ) - assert r.status_code == 200 - data = r.json() - assert data["crawlAttemptCount"] == 2 - assert data["crawlCount"] == 0 - assert not data["lastCrawlId"] - assert not data["lastCrawlState"] - assert not data["lastCrawlTime"] + max_attempts = 18 + attempts = 1 + while True: + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/{crawl_config_id}", + headers=admin_auth_headers, + ) + assert r.status_code == 200 + data = r.json() + + if data["crawlAttemptCount"] == 2 and data["crawlCount"] == 0: + assert not data["lastCrawlId"] + assert not data["lastCrawlState"] + assert not data["lastCrawlTime"] + break + + if attempts >= max_attempts: + assert False + + time.sleep(10) + attempts += 1 diff --git a/backend/test_nightly/test_z_background_jobs.py b/backend/test_nightly/test_z_background_jobs.py index 2d91280c4f..df308609b1 100644 --- a/backend/test_nightly/test_z_background_jobs.py +++ b/backend/test_nightly/test_z_background_jobs.py @@ -10,7 +10,7 @@ job_id = None -def test_background_jobs_list(admin_auth_headers, default_org_id): +def test_background_jobs_list(admin_auth_headers, default_org_id, deleted_crawl_id): r = requests.get( f"{API_PREFIX}/orgs/{default_org_id}/jobs/", headers=admin_auth_headers ) @@ -37,7 +37,7 @@ def test_background_jobs_list(admin_auth_headers, default_org_id): @pytest.mark.parametrize("job_type", [("create-replica"), ("delete-replica")]) def test_background_jobs_list_filter_by_type( - admin_auth_headers, default_org_id, job_type + admin_auth_headers, default_org_id, deleted_crawl_id, job_type ): r = requests.get( f"{API_PREFIX}/orgs/{default_org_id}/jobs/?jobType={job_type}", @@ -54,7 +54,9 @@ def test_background_jobs_list_filter_by_type( assert item["type"] == job_type -def test_background_jobs_list_filter_by_success(admin_auth_headers, default_org_id): +def test_background_jobs_list_filter_by_success( + admin_auth_headers, default_org_id, deleted_crawl_id +): r = requests.get( f"{API_PREFIX}/orgs/{default_org_id}/jobs/?success=True", headers=admin_auth_headers, @@ -70,7 +72,9 @@ def test_background_jobs_list_filter_by_success(admin_auth_headers, default_org_ assert item["success"] -def test_background_jobs_no_failures(admin_auth_headers, default_org_id): +def test_background_jobs_no_failures( + admin_auth_headers, default_org_id, deleted_crawl_id +): r = requests.get( f"{API_PREFIX}/orgs/{default_org_id}/jobs/?success=False", headers=admin_auth_headers, @@ -81,7 +85,7 @@ def test_background_jobs_no_failures(admin_auth_headers, default_org_id): assert data["total"] == 0 -def test_get_background_job(admin_auth_headers, default_org_id): +def test_get_background_job(admin_auth_headers, default_org_id, deleted_crawl_id): r = requests.get( f"{API_PREFIX}/orgs/{default_org_id}/jobs/{job_id}", headers=admin_auth_headers ) @@ -100,7 +104,7 @@ def test_get_background_job(admin_auth_headers, default_org_id): assert data["replica_storage"] -def test_retry_all_failed_bg_jobs_not_superuser(crawler_auth_headers): +def test_retry_all_failed_bg_jobs_not_superuser(crawler_auth_headers, deleted_crawl_id): r = requests.post( f"{API_PREFIX}/orgs/all/jobs/retryFailed", headers=crawler_auth_headers ) diff --git a/chart/test/test-nightly-addons.yaml b/chart/test/test-nightly-addons.yaml index 1a64d54f37..d1ccd46f4a 100644 --- a/chart/test/test-nightly-addons.yaml +++ b/chart/test/test-nightly-addons.yaml @@ -21,7 +21,8 @@ storages: bucket_name: *local_bucket_name endpoint_url: "http://local-minio.default:9000/" - is_default_primary: True + is_default_primary: true + access_endpoint_url: "/data/" - name: "replica-0" type: "s3" @@ -30,6 +31,6 @@ storages: bucket_name: "replica-0" endpoint_url: "http://local-minio.default:9000/" - is_default_replica: True + is_default_replica: true