diff --git a/.github/scripts/prepare-python-image-tags.js b/.github/scripts/prepare-python-image-tags.js deleted file mode 100644 index d3bf98c..0000000 --- a/.github/scripts/prepare-python-image-tags.js +++ /dev/null @@ -1,22 +0,0 @@ -module.exports = () => { - const { CURRENT_PYTHON, LATEST_PYTHON, FRAMEWORK_VERSION, RELEASE_TAG, IMAGE_NAME } = process.env - const tags = []; - - if (CURRENT_PYTHON === LATEST_PYTHON) { - tags.push(`${IMAGE_NAME}:${RELEASE_TAG}`); - } - - if (RELEASE_TAG === "latest") { - if (FRAMEWORK_VERSION) { - tags.push(`${IMAGE_NAME}:${CURRENT_PYTHON}-${FRAMEWORK_VERSION}`) - } - tags.push(`${IMAGE_NAME}:${CURRENT_PYTHON}`); - } else { - if (FRAMEWORK_VERSION) { - tags.push(`${IMAGE_NAME}:${CURRENT_PYTHON}-${FRAMEWORK_VERSION}-${RELEASE_TAG}`); - } - tags.push(`${IMAGE_NAME}:${CURRENT_PYTHON}-${RELEASE_TAG}`); - } - - return { allTags: tags.join(","), firstImageName: tags[0] }; -} diff --git a/.github/workflows/release-bun-node-puppeteer.yaml b/.github/workflows/release-bun-node-puppeteer.yaml index 2fd4ca7..4410846 100644 --- a/.github/workflows/release-bun-node-puppeteer.yaml +++ b/.github/workflows/release-bun-node-puppeteer.yaml @@ -3,6 +3,7 @@ name: Release/test bun-node puppeteer images on: # TODO: If you @apify employees are reading this, please update it schedule: + # It's 1 hour after bun-node normally start building # Every day at 01:00 UTC - cron: "0 1 * * *" workflow_dispatch: diff --git a/.github/workflows/release-node.yaml b/.github/workflows/release-node.yaml deleted file mode 100644 index 603518b..0000000 --- a/.github/workflows/release-node.yaml +++ /dev/null @@ -1,101 +0,0 @@ -name: Release/test node images - -on: - workflow_dispatch: - inputs: - release_tag: - description: "Tag for the images (e.g.: beta)" - required: true - apify_version: - description: "Apify SDK version (e.g.: ^1.0.0)" - required: true - crawlee_version: - description: "Crawlee version (e.g.: ^1.0.0)" - required: true - - repository_dispatch: - types: - - build-node-images - - build-node-image-only - - pull_request: - -env: - RELEASE_TAG: ${{ github.event.inputs.release_tag || github.event.client_payload.release_tag || 'CI_TEST' }} - APIFY_VERSION: ${{ github.event.inputs.apify_version || github.event.client_payload.apify_version }} - CRAWLEE_VERSION: ${{ github.event.inputs.crawlee_version || github.event.client_payload.crawlee_version }} - NODE_LATEST: 20 - -jobs: - # Build master images that are not dependent on existing builds. - build-main: - runs-on: ubuntu-latest - strategy: - # By the time some build fails, other build can be already finished - # so fail-fast does not really prevent the publishing of all parallel builds. - fail-fast: false - matrix: - image-name: [node] - node-version: [18, 20, 22] - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Prepare image tags - id: prepare-tags - uses: actions/github-script@v7 - env: - CURRENT_NODE: ${{ matrix.node-version }} - LATEST_NODE: ${{ env.NODE_LATEST }} - RELEASE_TAG: ${{ env.RELEASE_TAG }} - IMAGE_NAME: apify/actor-${{ matrix.image-name }} - # Force this to true, as these images have no browsers - IS_LATEST_BROWSER_IMAGE: "true" - with: - script: | - const generateTags = require("./.github/scripts/prepare-node-image-tags.js"); - return generateTags(); - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Set Dependency Versions - run: | - cd ${{ matrix.image-name }} - node ../.github/scripts/set-dependency-versions.js ${{ github.event_name == 'pull_request' }} - - - # It seems that it takes at least two minutes before a newly published version - # becomes available in the NPM registry. We wait before starting the image builds. - name: Wait For Package Registry - uses: nick-fields/retry@v3 - with: - timeout_minutes: 2 # timeout for a single attempt - max_attempts: 3 - retry_wait_seconds: 60 # wait between retries - command: cd ${{ matrix.image-name }} && npm i --dry-run - - - name: Build and tag image - uses: docker/build-push-action@v5 - with: - context: ./${{ matrix.image-name }} - file: ./${{ matrix.image-name }}/Dockerfile - build-args: NODE_VERSION=${{ matrix.node-version }} - load: true - tags: ${{ fromJson(steps.prepare-tags.outputs.result).allTags }} - - - name: Test image - run: docker run ${{ fromJson(steps.prepare-tags.outputs.result).firstImageName }} - - - name: Login to DockerHub - if: github.event_name != 'pull_request' - uses: docker/login-action@v3 - with: - username: ${{ secrets.APIFY_SERVICE_ACCOUNT_DOCKERHUB_USERNAME }} - password: ${{ secrets.APIFY_SERVICE_ACCOUNT_DOCKERHUB_TOKEN }} - - - name: Push images - if: github.event_name != 'pull_request' - run: docker push apify/actor-${{ matrix.image-name }} --all-tags diff --git a/.github/workflows/release-playwright.yaml b/.github/workflows/release-playwright.yaml deleted file mode 100644 index c190c22..0000000 --- a/.github/workflows/release-playwright.yaml +++ /dev/null @@ -1,120 +0,0 @@ -name: Release/test playwright images - -on: - workflow_dispatch: - inputs: - release_tag: - description: "Tag for the images (e.g.: beta)" - required: true - apify_version: - description: "Apify SDK version (e.g.: ^1.0.0)" - required: true - crawlee_version: - description: "Crawlee version (e.g.: ^1.0.0)" - required: true - playwright_version: - description: "Playwright version (e.g.: 1.7.1) (must not be semver range)" - required: true - is_latest_browser_image: - description: If this is a release of the latest browser image. This gets autofilled by CI in crawlee - type: boolean - default: false - - repository_dispatch: - types: - - build-node-images - - build-node-images-playwright - - pull_request: - -env: - RELEASE_TAG: ${{ github.event.inputs.release_tag || github.event.client_payload.release_tag || 'CI_TEST' }} - APIFY_VERSION: ${{ github.event.inputs.apify_version || github.event.client_payload.apify_version }} - CRAWLEE_VERSION: ${{ github.event.inputs.crawlee_version || github.event.client_payload.crawlee_version }} - PLAYWRIGHT_VERSION: ${{ github.event.inputs.playwright_version || github.event.client_payload.playwright_version }} - IS_LATEST_BROWSER_IMAGE: ${{ github.event.inputs.is_latest_browser_image || github.event.client_payload.is_latest_browser_image || false }} - NODE_LATEST: 20 - -jobs: - # Build master images that are not dependent on existing builds. - build-main: - runs-on: ubuntu-latest - strategy: - # By the time some build fails, other build can be already finished - # so fail-fast does not really prevent the publishing of all parallel builds - fail-fast: false - matrix: - image-name: - [ - node-playwright, - node-playwright-chrome, - node-playwright-firefox, - node-playwright-webkit, - ] - node-version: [18, 20, 22] - - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Prepare image tags - id: prepare-tags - uses: actions/github-script@v7 - env: - CURRENT_NODE: ${{ matrix.node-version }} - LATEST_NODE: ${{ env.NODE_LATEST }} - RELEASE_TAG: ${{ env.RELEASE_TAG }} - IMAGE_NAME: apify/actor-${{ matrix.image-name }} - FRAMEWORK_VERSION: ${{ env.PLAYWRIGHT_VERSION }} - IS_LATEST_BROWSER_IMAGE: ${{ env.IS_LATEST_BROWSER_IMAGE }} - with: - script: | - const generateTags = require("./.github/scripts/prepare-node-image-tags.js"); - return generateTags(); - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Set Dependency Versions - run: | - cd ${{ matrix.image-name }} - node ../.github/scripts/set-dependency-versions.js ${{ github.event_name == 'pull_request' }} - - - # It seems that it takes at least two minutes before a newly published version - # becomes available in the NPM registry. We wait before starting the image builds. - name: Wait For Package Registry - uses: nick-fields/retry@v3 - with: - timeout_minutes: 2 # timeout for a single attempt - max_attempts: 3 - retry_wait_seconds: 60 # wait between retries - command: cd ${{ matrix.image-name }} && npm i --dry-run - - - name: Build and tag image - uses: docker/build-push-action@v5 - with: - context: ./${{ matrix.image-name }} - file: ./${{ matrix.image-name }}/Dockerfile - # For some reason build-args doesn't want to be a list, so we manually make it one - build-args: | - NODE_VERSION=${{ matrix.node-version }} - PLAYWRIGHT_VERSION=${{ (github.event_name != 'pull_request' && format('v{0}-', env.PLAYWRIGHT_VERSION)) || '' }} - load: true - tags: ${{ fromJson(steps.prepare-tags.outputs.result).allTags }} - - - name: Test image - run: docker run ${{ fromJson(steps.prepare-tags.outputs.result).firstImageName }} - - - name: Login to DockerHub - if: github.event_name != 'pull_request' - uses: docker/login-action@v3 - with: - username: ${{ secrets.APIFY_SERVICE_ACCOUNT_DOCKERHUB_USERNAME }} - password: ${{ secrets.APIFY_SERVICE_ACCOUNT_DOCKERHUB_TOKEN }} - - - name: Push images - if: github.event_name != 'pull_request' - run: docker push apify/actor-${{ matrix.image-name }} --all-tags diff --git a/.github/workflows/release-puppeteer.yaml b/.github/workflows/release-puppeteer.yaml deleted file mode 100644 index 316e085..0000000 --- a/.github/workflows/release-puppeteer.yaml +++ /dev/null @@ -1,110 +0,0 @@ -name: Release/test puppeteer images - -on: - workflow_dispatch: - inputs: - release_tag: - description: "Tag for the images (e.g.: beta)" - required: true - apify_version: - description: "Apify SDK version (e.g.: ^1.0.0)" - required: true - crawlee_version: - description: "Crawlee version (e.g.: ^1.0.0)" - required: true - puppeteer_version: - description: "Puppeteer version (e.g.: 5.5.0)" - required: true - is_latest_browser_image: - description: If this is a release of the latest browser image. This gets autofilled by CI in crawlee - type: boolean - default: false - - repository_dispatch: - types: - - build-node-images - - build-node-images-puppeteer - - pull_request: - -env: - RELEASE_TAG: ${{ github.event.inputs.release_tag || github.event.client_payload.release_tag || 'CI_TEST' }} - APIFY_VERSION: ${{ github.event.inputs.apify_version || github.event.client_payload.apify_version }} - CRAWLEE_VERSION: ${{ github.event.inputs.crawlee_version || github.event.client_payload.crawlee_version }} - PUPPETEER_VERSION: ${{ github.event.inputs.puppeteer_version || github.event.client_payload.puppeteer_version }} - IS_LATEST_BROWSER_IMAGE: ${{ github.event.inputs.is_latest_browser_image || github.event.client_payload.is_latest_browser_image || false }} - NODE_LATEST: 20 - -jobs: - # Build master images that are not dependent on existing builds. - build-main: - runs-on: ubuntu-latest - strategy: - # By the time some build fails, other build can be already finished - # so fail-fast does not really prevent the publishing of all parallel builds - fail-fast: false - matrix: - image-name: [node-puppeteer-chrome] - node-version: [18, 20, 22] - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Prepare image tags - id: prepare-tags - uses: actions/github-script@v7 - env: - CURRENT_NODE: ${{ matrix.node-version }} - LATEST_NODE: ${{ env.NODE_LATEST }} - RELEASE_TAG: ${{ env.RELEASE_TAG }} - IMAGE_NAME: apify/actor-${{ matrix.image-name }} - FRAMEWORK_VERSION: ${{ env.PUPPETEER_VERSION }} - IS_LATEST_BROWSER_IMAGE: ${{ env.IS_LATEST_BROWSER_IMAGE }} - with: - script: | - const generateTags = require("./.github/scripts/prepare-node-image-tags.js"); - return generateTags(); - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Set Dependency Versions - run: | - cd ${{ matrix.image-name }} - node ../.github/scripts/set-dependency-versions.js ${{ github.event_name == 'pull_request' }} - - - # It seems that it takes at least two minutes before a newly published version - # becomes available in the NPM registry. We wait before starting the image builds. - name: Wait For Package Registry - uses: nick-fields/retry@v3 - with: - timeout_minutes: 2 # timeout for a single attempt - max_attempts: 3 - retry_wait_seconds: 60 # wait between retries - command: cd ${{ matrix.image-name }} && npm i --dry-run - - - name: Build and tag image - uses: docker/build-push-action@v5 - with: - context: ./${{ matrix.image-name }} - file: ./${{ matrix.image-name }}/Dockerfile - build-args: NODE_VERSION=${{ matrix.node-version }} - load: true - tags: ${{ fromJson(steps.prepare-tags.outputs.result).allTags }} - - - name: Test image - run: docker run ${{ fromJson(steps.prepare-tags.outputs.result).firstImageName }} - - - name: Login to DockerHub - if: github.event_name != 'pull_request' - uses: docker/login-action@v3 - with: - username: ${{ secrets.APIFY_SERVICE_ACCOUNT_DOCKERHUB_USERNAME }} - password: ${{ secrets.APIFY_SERVICE_ACCOUNT_DOCKERHUB_TOKEN }} - - - name: Push images - if: github.event_name != 'pull_request' - run: docker push apify/actor-${{ matrix.image-name }} --all-tags diff --git a/.github/workflows/release-python-playwright.yaml b/.github/workflows/release-python-playwright.yaml deleted file mode 100644 index 0409162..0000000 --- a/.github/workflows/release-python-playwright.yaml +++ /dev/null @@ -1,115 +0,0 @@ -name: Release Python + Playwright images - -on: - workflow_dispatch: - inputs: - release_tag: - description: 'Tag for the images (e.g.: "latest" or "beta")' - required: true - apify_version: - description: 'Apify Python SDK version (e.g.: "1.0.0")' - required: true - playwright_version: - description: 'Playwright version (e.g.: "1.39.0")' - required: true - - repository_dispatch: - types: [build-python-images] - - pull_request: - -env: - RELEASE_TAG: ${{ github.event.inputs.release_tag || github.event.client_payload.release_tag }} - APIFY_VERSION: ${{ github.event.inputs.apify_version || github.event.client_payload.apify_version }} - # The default Playwright version is set because this workflow is triggered by a Python SDK - # release where the version is not specified. - PLAYWRIGHT_VERSION: ${{ github.event.inputs.playwright_version || github.event.client_payload.playwright_version || '1.42.0' }} - LATEST_PYTHON: "3.12" - -jobs: - # Build master images that are not dependent on existing builds. - build-main: - runs-on: ubuntu-latest - strategy: - matrix: - image-name: [python-playwright] - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] - - steps: - - name: Set default inputs if event is pull request - if: github.event_name == 'pull_request' - run: | - if [[ -z "$RELEASE_TAG" ]]; then echo "RELEASE_TAG=CI_TEST" >> $GITHUB_ENV; fi - if [[ -z "$APIFY_VERSION" ]]; then echo "APIFY_VERSION=1.1.0" >> $GITHUB_ENV; fi - - - name: Check if inputs are set correctly - run: | - if [[ -z "$RELEASE_TAG" ]]; then echo "RELEASE_TAG input is empty!" >&2; exit 1; fi - if [[ -z "$APIFY_VERSION" ]]; then echo "APIFY_VERSION input is empty!" >&2; exit 1; fi - - - name: Checkout - uses: actions/checkout@v4 - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - - name: Update pip - run: python -m pip install --upgrade pip - - - # It seems that it takes a few minutes before a newly published version - # becomes available in the PyPI registry. We wait before starting the image builds. - name: Wait For Package Registry - uses: nick-fields/retry@v3 - with: - timeout_minutes: 2 # timeout for a single attempt - max_attempts: 3 - retry_wait_seconds: 60 # wait between retries - command: pip install apify~=$APIFY_VERSION - - - name: Prepare image tags - id: prepare-tags - uses: actions/github-script@v7 - env: - CURRENT_PYTHON: ${{ matrix.python-version }} - LATEST_PYTHON: ${{ env.LATEST_PYTHON }} - FRAMEWORK_VERSION: ${{ env.PLAYWRIGHT_VERSION }} - RELEASE_TAG: ${{ env.RELEASE_TAG }} - IMAGE_NAME: apify/actor-${{ matrix.image-name }} - with: - script: | - const generateTags = require("./.github/scripts/prepare-python-image-tags.js"); - return generateTags() - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Build and tag image - uses: docker/build-push-action@v5 - with: - context: ./${{ matrix.image-name }} - file: ./${{ matrix.image-name }}/Dockerfile - build-args: | - PYTHON_VERSION=${{ matrix.python-version }} - APIFY_VERSION=${{ env.APIFY_VERSION }} - PLAYWRIGHT_VERSION=${{ env.PLAYWRIGHT_VERSION }} - load: true - tags: ${{ fromJson(steps.prepare-tags.outputs.result).allTags }} - - - name: Test image - run: docker run ${{ fromJson(steps.prepare-tags.outputs.result).firstImageName }} - - - name: Login to DockerHub - if: github.event_name != 'pull_request' - uses: docker/login-action@v3 - with: - username: ${{ secrets.APIFY_SERVICE_ACCOUNT_DOCKERHUB_USERNAME }} - password: ${{ secrets.APIFY_SERVICE_ACCOUNT_DOCKERHUB_TOKEN }} - - - name: Push images - if: github.event_name != 'pull_request' - run: docker push apify/actor-${{ matrix.image-name }} --all-tags diff --git a/.github/workflows/release-python-selenium.yaml b/.github/workflows/release-python-selenium.yaml deleted file mode 100644 index 6c827c0..0000000 --- a/.github/workflows/release-python-selenium.yaml +++ /dev/null @@ -1,115 +0,0 @@ -name: Release Python + Selenium images - -on: - workflow_dispatch: - inputs: - release_tag: - description: 'Tag for the images (e.g.: "latest" or "beta")' - required: true - apify_version: - description: 'Apify Python SDK version (e.g.: "1.0.0")' - required: true - selenium_version: - description: 'Selenium version (e.g.: "4.14.0")' - required: true - - repository_dispatch: - types: [build-python-images] - - pull_request: - -env: - RELEASE_TAG: ${{ github.event.inputs.release_tag || github.event.client_payload.release_tag }} - APIFY_VERSION: ${{ github.event.inputs.apify_version || github.event.client_payload.apify_version }} - # The default Selenium version is set because this workflow is triggered by a Python SDK - # release where the version is not specified. - SELENIUM_VERSION: ${{ github.event.inputs.selenium_version || github.event.client_payload.selenium_version || '4.14.0' }} - LATEST_PYTHON: "3.12" - -jobs: - # Build master images that are not dependent on existing builds. - build-main: - runs-on: ubuntu-latest - strategy: - matrix: - image-name: [python-selenium] - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] - - steps: - - name: Set default inputs if event is pull request - if: github.event_name == 'pull_request' - run: | - if [[ -z "$RELEASE_TAG" ]]; then echo "RELEASE_TAG=CI_TEST" >> $GITHUB_ENV; fi - if [[ -z "$APIFY_VERSION" ]]; then echo "APIFY_VERSION=1.1.0" >> $GITHUB_ENV; fi - - - name: Check if inputs are set correctly - run: | - if [[ -z "$RELEASE_TAG" ]]; then echo "RELEASE_TAG input is empty!" >&2; exit 1; fi - if [[ -z "$APIFY_VERSION" ]]; then echo "APIFY_VERSION input is empty!" >&2; exit 1; fi - - - name: Checkout - uses: actions/checkout@v4 - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - - name: Update pip - run: python -m pip install --upgrade pip - - - # It seems that it takes a few minutes before a newly published version - # becomes available in the PyPI registry. We wait before starting the image builds. - name: Wait For Package Registry - uses: nick-fields/retry@v3 - with: - timeout_minutes: 2 # timeout for a single attempt - max_attempts: 3 - retry_wait_seconds: 60 # wait between retries - command: pip install apify~=$APIFY_VERSION - - - name: Prepare image tags - id: prepare-tags - uses: actions/github-script@v7 - env: - CURRENT_PYTHON: ${{ matrix.python-version }} - LATEST_PYTHON: ${{ env.LATEST_PYTHON }} - FRAMEWORK_VERSION: ${{ env.SELENIUM_VERSION }} - RELEASE_TAG: ${{ env.RELEASE_TAG }} - IMAGE_NAME: apify/actor-${{ matrix.image-name }} - with: - script: | - const generateTags = require("./.github/scripts/prepare-python-image-tags.js"); - return generateTags() - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Build and tag image - uses: docker/build-push-action@v5 - with: - context: ./${{ matrix.image-name }} - file: ./${{ matrix.image-name }}/Dockerfile - build-args: | - PYTHON_VERSION=${{ matrix.python-version }} - APIFY_VERSION=${{ env.APIFY_VERSION }} - SELENIUM_VERSION=${{ env.SELENIUM_VERSION }} - load: true - tags: ${{ fromJson(steps.prepare-tags.outputs.result).allTags }} - - - name: Test image - run: docker run ${{ fromJson(steps.prepare-tags.outputs.result).firstImageName }} - - - name: Login to DockerHub - if: github.event_name != 'pull_request' - uses: docker/login-action@v3 - with: - username: ${{ secrets.APIFY_SERVICE_ACCOUNT_DOCKERHUB_USERNAME }} - password: ${{ secrets.APIFY_SERVICE_ACCOUNT_DOCKERHUB_TOKEN }} - - - name: Push images - if: github.event_name != 'pull_request' - run: docker push apify/actor-${{ matrix.image-name }} --all-tags diff --git a/.github/workflows/release-python.yaml b/.github/workflows/release-python.yaml deleted file mode 100644 index 8e103bc..0000000 --- a/.github/workflows/release-python.yaml +++ /dev/null @@ -1,107 +0,0 @@ -name: Release Python basic images - -on: - workflow_dispatch: - inputs: - release_tag: - description: 'Tag for the images (e.g.: "latest" or "beta")' - required: true - apify_version: - description: 'Apify Python SDK version (e.g.: "1.0.0")' - required: true - - repository_dispatch: - types: [build-python-images] - - pull_request: - -env: - RELEASE_TAG: ${{ github.event.inputs.release_tag || github.event.client_payload.release_tag }} - APIFY_VERSION: ${{ github.event.inputs.apify_version || github.event.client_payload.apify_version }} - LATEST_PYTHON: "3.12" - -jobs: - # Build master images that are not dependent on existing builds. - build-main: - runs-on: ubuntu-latest - strategy: - matrix: - image-name: [python] - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] - - steps: - - name: Set default inputs if event is pull request - if: github.event_name == 'pull_request' - run: | - if [[ -z "$RELEASE_TAG" ]]; then echo "RELEASE_TAG=CI_TEST" >> $GITHUB_ENV; fi - if [[ -z "$APIFY_VERSION" ]]; then echo "APIFY_VERSION=1.1.0" >> $GITHUB_ENV; fi - - - name: Check if inputs are set correctly - run: | - if [[ -z "$RELEASE_TAG" ]]; then echo "RELEASE_TAG input is empty!" >&2; exit 1; fi - if [[ -z "$APIFY_VERSION" ]]; then echo "APIFY_VERSION input is empty!" >&2; exit 1; fi - - - name: Checkout - uses: actions/checkout@v4 - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - - name: Update pip - run: python -m pip install --upgrade pip - - - # It seems that it takes a few minutes before a newly published version - # becomes available in the PyPI registry. We wait before starting the image builds. - name: Wait For Package Registry - uses: nick-fields/retry@v3 - with: - timeout_minutes: 2 # timeout for a single attempt - max_attempts: 3 - retry_wait_seconds: 60 # wait between retries - command: pip install apify~=$APIFY_VERSION - - - name: Prepare image tags - id: prepare-tags - uses: actions/github-script@v7 - env: - CURRENT_PYTHON: ${{ matrix.python-version }} - LATEST_PYTHON: ${{ env.LATEST_PYTHON }} - RELEASE_TAG: ${{ env.RELEASE_TAG }} - IMAGE_NAME: apify/actor-${{ matrix.image-name }} - with: - script: | - const generateTags = require("./.github/scripts/prepare-python-image-tags.js"); - return generateTags() - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Build and tag image - uses: docker/build-push-action@v5 - with: - context: ./${{ matrix.image-name }} - file: ./${{ matrix.image-name }}/Dockerfile - build-args: | - PYTHON_VERSION=${{ matrix.python-version }} - APIFY_VERSION=${{ env.APIFY_VERSION }} - load: true - tags: ${{ fromJson(steps.prepare-tags.outputs.result).allTags }} - - - name: Test image - run: docker run ${{ fromJson(steps.prepare-tags.outputs.result).firstImageName }} - - - name: Login to DockerHub - if: github.event_name != 'pull_request' - uses: docker/login-action@v3 - with: - username: ${{ secrets.APIFY_SERVICE_ACCOUNT_DOCKERHUB_USERNAME }} - password: ${{ secrets.APIFY_SERVICE_ACCOUNT_DOCKERHUB_TOKEN }} - - - name: Push images - if: github.event_name != 'pull_request' - run: docker push apify/actor-${{ matrix.image-name }} --all-tags diff --git a/LICENSE b/LICENSE index e4c46fb..8b626be 100644 --- a/LICENSE +++ b/LICENSE @@ -187,6 +187,7 @@ identification within third-party archives. Copyright 2017 Apify Technologies s.r.o. + Copyright 2024 Imamuzzaki Abu Salam Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -198,4 +199,4 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and - limitations under the License. \ No newline at end of file + limitations under the License. diff --git a/Makefile b/Makefile index a6491bf..a40152b 100644 --- a/Makefile +++ b/Makefile @@ -8,14 +8,7 @@ PLAYWRIGHT_VERSION ?= v1.42.0- # Tag must have format: 22.6.2 PUPPETEER_VERSION ?= 22.6.2 -# Python -PYTHON_VERSION ?= 3.12 -# Apify latest version (python does not support the 'latest' tag) -PYTHON_APIFY_VERSION ?= 1.7.0 -PYTHON_PLAYWRIGHT_VERSION = $(subst v,,$(subst -,,$(PLAYWRIGHT_VERSION))) -PYTHON_SELENIUM_VERSION ?= 4.14.0 - -ALL_TESTS = test-node test-playwright test-playwright-chrome test-playwright-firefox test-playwright-webkit test-bun-node-puppeteer-chrome test-puppeteer-chrome test-python test-python-playwright test-python-selenium +ALL_TESTS = test-bun-node-puppeteer-chrome what-tests: @echo "Available tests:" @@ -35,86 +28,6 @@ all: @echo "" @echo "All tests done!" -test-node: - @echo "Building node with version $(NODE_VERSION) (overwrite using NODE_VERSION=XX)" - - @# Correct package.json - @jq '.dependencies.apify = "latest" | .dependencies.crawlee = "latest"' ./node/package.json > ./node/package.json.tmp && mv ./node/package.json.tmp ./node/package.json - - docker buildx build --build-arg NODE_VERSION=$(NODE_VERSION) --file ./node/Dockerfile -t apify/node:local --load ./node - docker run --rm -it --platform linux/amd64 apify/node:local - - @# Restore package.json - @git checkout ./node/package.json 1>/dev/null 2>&1 - - @# Delete docker image - docker rmi apify/node:local - -test-playwright: - @echo "Building playwright with version $(PLAYWRIGHT_VERSION) (overwrite using PLAYWRIGHT_VERSION=v1.42.0-) and node version $(NODE_VERSION) (overwrite using NODE_VERSION=XX)" - - @# Correct package.json - @export PKG_JSON_PW_VERSION=$(echo ${PLAYWRIGHT_VERSION} | cut -c 2- | rev | cut -c 2- | rev) - @jq ".dependencies.apify = \"latest\" | .dependencies.crawlee = \"latest\" | .dependencies.playwright = \"${PKG_JSON_PW_VERSION}\"" ./node-playwright/package.json > ./node-playwright/package.json.tmp && mv ./node-playwright/package.json.tmp ./node-playwright/package.json - - docker buildx build --build-arg NODE_VERSION=$(NODE_VERSION) --build-arg PLAYWRIGHT_VERSION=$(PLAYWRIGHT_VERSION) --file ./node-playwright/Dockerfile --tag apify/playwright:local --load ./node-playwright - docker run --rm -it --platform linux/amd64 apify/playwright:local - - @# Restore package.json - @git checkout ./node-playwright/package.json 1>/dev/null 2>&1 - - @# Delete docker image - docker rmi apify/playwright:local - -test-playwright-chrome: - @echo "Building playwright-chrome with version $(PLAYWRIGHT_VERSION) (overwrite using PLAYWRIGHT_VERSION=v1.42.0-) and node version $(NODE_VERSION) (overwrite using NODE_VERSION=XX)" - - @# Correct package.json - @export PKG_JSON_PW_VERSION=$(echo ${PLAYWRIGHT_VERSION} | cut -c 2- | rev | cut -c 2- | rev) - @jq ".dependencies.apify = \"latest\" | .dependencies.crawlee = \"latest\" | .dependencies.\"playwright-chromium\" = \"${PKG_JSON_PW_VERSION}\"" ./node-playwright-chrome/package.json > ./node-playwright-chrome/package.json.tmp && mv ./node-playwright-chrome/package.json.tmp ./node-playwright-chrome/package.json - - docker buildx build --build-arg NODE_VERSION=$(NODE_VERSION) --file ./node-playwright-chrome/Dockerfile --tag apify/playwright-chrome:local --load ./node-playwright-chrome - docker run --rm -it --platform linux/amd64 apify/playwright-chrome:local - - @# Restore package.json - @git checkout ./node-playwright-chrome/package.json 1>/dev/null 2>&1 - - @# Delete docker image - docker rmi apify/playwright-chrome:local - - -test-playwright-firefox: - @echo "Building playwright-firefox with version $(PLAYWRIGHT_VERSION) (overwrite using PLAYWRIGHT_VERSION=v1.42.0-) and node version $(NODE_VERSION) (overwrite using NODE_VERSION=XX)" - - @# Correct package.json - @export PKG_JSON_PW_VERSION=$(echo ${PLAYWRIGHT_VERSION} | cut -c 2- | rev | cut -c 2- | rev) - @jq ".dependencies.apify = \"latest\" | .dependencies.crawlee = \"latest\" | .dependencies.\"playwright-firefox\" = \"${PKG_JSON_PW_VERSION}\"" ./node-playwright-firefox/package.json > ./node-playwright-firefox/package.json.tmp && mv ./node-playwright-firefox/package.json.tmp ./node-playwright-firefox/package.json - - docker buildx build --build-arg NODE_VERSION=$(NODE_VERSION) --file ./node-playwright-firefox/Dockerfile --tag apify/playwright-firefox:local --load ./node-playwright-firefox - docker run --rm -it --platform linux/amd64 apify/playwright-firefox:local - - @# Restore package.json - @git checkout ./node-playwright-firefox/package.json 1>/dev/null 2>&1 - - @# Delete docker image - docker rmi apify/playwright-firefox:local - -test-playwright-webkit: - @echo "Building playwright-webkit with version $(PLAYWRIGHT_VERSION) (overwrite using PLAYWRIGHT_VERSION=v1.42.0-) and node version $(NODE_VERSION) (overwrite using NODE_VERSION=XX)" - - @# Correct package.json - @export PKG_JSON_PW_VERSION=$(echo ${PLAYWRIGHT_VERSION} | cut -c 2- | rev | cut -c 2- | rev) - @jq ".dependencies.apify = \"latest\" | .dependencies.crawlee = \"latest\" | .dependencies.\"playwright-webkit\" = \"${PKG_JSON_PW_VERSION}\"" ./node-playwright-webkit/package.json > ./node-playwright-webkit/package.json.tmp && mv ./node-playwright-webkit/package.json.tmp ./node-playwright-webkit/package.json - - docker buildx build --build-arg NODE_VERSION=$(NODE_VERSION) --file ./node-playwright-webkit/Dockerfile --tag apify/playwright-webkit:local --load ./node-playwright-webkit - docker run --rm -it --platform linux/amd64 apify/playwright-webkit:local - - @# Restore package.json - @git checkout ./node-playwright-webkit/package.json 1>/dev/null 2>&1 - - @# Delete docker image - docker rmi apify/playwright-webkit:local - test-bun-node-puppeteer-chrome: @echo "Building bun-puppeteer-chrome with version $(PUPPETEER_VERSION) (overwrite using PUPPETEER_VERSION=22.6.2), bun version $(BUN_VERSION), and node version $(NODE_VERSION) (overwrite using NODE_VERSION=XX)" @@ -129,46 +42,3 @@ test-bun-node-puppeteer-chrome: @# Delete docker image docker rmi imbios/bun-node-puppeteer-chrome:local - -test-puppeteer-chrome: - @echo "Building puppeteer-chrome with version $(PUPPETEER_VERSION) (overwrite using PUPPETEER_VERSION=22.6.2) and node version $(NODE_VERSION) (overwrite using NODE_VERSION=XX)" - - @# Correct package.json - @jq ".dependencies.apify = \"latest\" | .dependencies.crawlee = \"latest\" | .dependencies.puppeteer = \"${PUPPETEER_VERSION}\"" ./node-puppeteer-chrome/package.json > ./node-puppeteer-chrome/package.json.tmp && mv ./node-puppeteer-chrome/package.json.tmp ./node-puppeteer-chrome/package.json - - docker buildx build --build-arg NODE_VERSION=$(NODE_VERSION) --file ./node-puppeteer-chrome/Dockerfile --tag apify/puppeteer-chrome:local --load ./node-puppeteer-chrome - docker run --rm -it --platform linux/amd64 apify/puppeteer-chrome:local - - @# Restore package.json - @git checkout ./node-puppeteer-chrome/package.json 1>/dev/null 2>&1 - - @# Delete docker image - docker rmi apify/puppeteer-chrome:local - -test-python: - @echo "Building python with version $(PYTHON_VERSION) (overwrite using PYTHON_VERSION=XX)" - - docker buildx build --build-arg PYTHON_VERSION=$(PYTHON_VERSION) --build-arg APIFY_VERSION=$(PYTHON_APIFY_VERSION) --file ./python/Dockerfile -t apify/python:local --load ./python - docker run --rm -it --platform linux/amd64 apify/python:local - - @# Delete docker image - docker rmi apify/python:local - -test-python-playwright: - @echo "Building python-playwright with version $(PYTHON_VERSION) (overwrite using PYTHON_VERSION=XX)" - - docker buildx build --build-arg PYTHON_VERSION=$(PYTHON_VERSION) --build-arg APIFY_VERSION=$(PYTHON_APIFY_VERSION) --build-arg PLAYWRIGHT_VERSION=$(PYTHON_PLAYWRIGHT_VERSION) --file ./python-playwright/Dockerfile -t apify/python-playwright:local --load ./python-playwright - docker run --rm -it --platform linux/amd64 apify/python-playwright:local - - @# Delete docker image - docker rmi apify/python-playwright:local - -test-python-selenium: - @echo "Building python-selenium with version $(PYTHON_VERSION) (overwrite using PYTHON_VERSION=XX)" - - docker buildx build --build-arg PYTHON_VERSION=$(PYTHON_VERSION) --build-arg APIFY_VERSION=$(PYTHON_APIFY_VERSION) --build-arg SELENIUM_VERSION=$(PYTHON_SELENIUM_VERSION) --file ./python-selenium/Dockerfile -t apify/python-selenium:local --load ./python-selenium - docker run --rm -it --platform linux/amd64 apify/python-selenium:local - - @# Delete docker image - docker rmi apify/python-selenium:local - diff --git a/README.md b/README.md index 2424b52..a4d3c29 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,8 @@ -# Apify base Docker images - -Public Docker images for Apify Actor serverless platform (https://docs.apify.com/actor) +# Fork Apify actor for bun-node Docker images The sources for the images are present in subdirectories that are named as the corresponding -Docker image. For example, the `node` directory corresponds to the -[apify/actor-node](https://hub.docker.com/r/apify/actor-node/) Docker image. +Docker image. For example, the `bun-node` directory corresponds to the +[imbios/actor-bun-node-puppeteer-chrome](https://hub.docker.com/r/imbios/actor-bun-node-puppeteer-chrome/) Docker image. The images are using the following tags: @@ -19,9 +17,9 @@ In order to build and publish a new version of the Docker images, open the Actions tab and find the Release Images workflow. You can then run the workflow by providing the following inputs: -- A tag, which will be used to tag the image in DockerHub. Typically beta or latest. -- A version of the `apify` package that should be pre-installed in the images. -- A version of the `puppeteer` package that should be pre-installed in the images that use Puppeteer. +- A tag, which will be used to tag the image in DockerHub. Typically beta or latest. +- A version of the `apify` package that should be pre-installed in the images. +- A version of the `puppeteer` package that should be pre-installed in the images that use Puppeteer. ### Adding a new actor image diff --git a/node-phantomjs/.dockerignore b/node-phantomjs/.dockerignore deleted file mode 100644 index b543243..0000000 --- a/node-phantomjs/.dockerignore +++ /dev/null @@ -1,4 +0,0 @@ -npm-debug.log -yarn.lock -node_modules -.gitignore \ No newline at end of file diff --git a/node-phantomjs/Dockerfile b/node-phantomjs/Dockerfile deleted file mode 100644 index e88f45c..0000000 --- a/node-phantomjs/Dockerfile +++ /dev/null @@ -1,52 +0,0 @@ -ARG NODE_VERSION=16 -FROM node:${NODE_VERSION}-alpine - -# First, download PhantomJS and necessary libraries, these change rarely -RUN DEBIAN_FRONTEND=noninteractive apt-get update \ - && DEBIAN_FRONTEND=noninteractive apt-get install -y wget ca-certificates --no-install-recommends \ - && wget --no-verbose -O /usr/bin/phantomjs https://s3.amazonaws.com/apifier-phantomjs-builds/phantomjs-2.1.1s-apifier-ubuntu-16.04-x64 \ - && wget --no-verbose -O /lib/x86_64-linux-gnu/libicudata.so.55.1 https://s3.amazonaws.com/apifier-phantomjs-builds/libicudata.so.55.1 \ - && wget --no-verbose -O /lib/x86_64-linux-gnu/libicui18n.so.55.1 https://s3.amazonaws.com/apifier-phantomjs-builds/libicui18n.so.55.1 \ - && wget --no-verbose -O /lib/x86_64-linux-gnu/libicuuc.so.55.1 https://s3.amazonaws.com/apifier-phantomjs-builds/libicuuc.so.55.1 \ - && wget --no-verbose -O /lib/x86_64-linux-gnu/libssl.so.1.0.0 https://s3.amazonaws.com/apifier-phantomjs-builds/libssl.so.1.0.0 \ - && wget --no-verbose -O /lib/x86_64-linux-gnu/libcrypto.so.1.0.0 https://s3.amazonaws.com/apifier-phantomjs-builds/libcrypto.so.1.0.0 \ - && ln -s /lib/x86_64-linux-gnu/libicudata.so.55.1 /lib/x86_64-linux-gnu/libicudata.so.55 \ - && ln -s /lib/x86_64-linux-gnu/libicui18n.so.55.1 /lib/x86_64-linux-gnu/libicui18n.so.55 \ - && ln -s /lib/x86_64-linux-gnu/libicuuc.so.55.1 /lib/x86_64-linux-gnu/libicuuc.so.55 \ - && chmod a+x /usr/bin/phantomjs \ - && chown root:root /usr/bin/phantomjs - -# Install packages -RUN DEBIAN_FRONTEND=noninteractive apt-get purge --auto-remove -y wget \ - && DEBIAN_FRONTEND=noninteractive apt-get install -y libfreetype6 libfontconfig1 procps sqlite3 --no-install-recommends \ - && rm -rf /var/lib/apt/lists/* \ - && rm -rf /src/*.deb - -# Run everything after as non-privileged user to avoid warnings -RUN groupadd -r myuser && useradd -r -g myuser -G audio,video myuser \ - && mkdir -p /home/myuser/Downloads \ - && chown -R myuser:myuser /home/myuser -USER myuser -WORKDIR /home/myuser - -# Copy source code -COPY --chown=myuser:myuser test.js /home/myuser/ - -# Tell Node.js this is a production environemnt -ENV NODE_ENV=production - -# Enable Node.js process to use a lot of memory (Actor has limit of 32GB) -# Increases default size of headers. The original limit was 80kb, but from node 10+ they decided to lower it to 8kb. -# However they did not think about all the sites there with large headers, -# so we put back the old limit of 80kb, which seems to work just fine. -ENV NODE_OPTIONS="--max_old_space_size=30000 --max-http-header-size=80000" - -# Install default dependencies, print versions of everything -RUN echo "Node.js version:" \ - && node --version \ - && echo "NPM version:" \ - && npm --version \ - && npm config --global set update-notifier false - -# We're using CMD instead of ENTRYPOINT, to allow manual overriding -CMD node test.js diff --git a/node-phantomjs/test.js b/node-phantomjs/test.js deleted file mode 100644 index 56d5040..0000000 --- a/node-phantomjs/test.js +++ /dev/null @@ -1,25 +0,0 @@ -// This code is used to test that both Node.js and PhantomJS work. - -const { exec } = require('child_process'); - -/* global process */ - -console.log('Testing PhantomJS...'); - -exec('phantomjs --version', (error, stdout, stderr) => { - if (error) { - console.error(`exec error: ${error}`); - process.exit(1); - } - - console.log('Version:', stdout); - - if (stdout.trim() !== '2.1.1s-apifier') { - throw new Error(`Unsupported version of PhantomJS: ${stdout}`); - } - if (stderr.trim()) { - throw new Error(`Unknown error occurred: ${stderr}`); - } - - console.log('... test PASSED'); -}); diff --git a/node-playwright-chrome/.dockerignore b/node-playwright-chrome/.dockerignore deleted file mode 100644 index b543243..0000000 --- a/node-playwright-chrome/.dockerignore +++ /dev/null @@ -1,4 +0,0 @@ -npm-debug.log -yarn.lock -node_modules -.gitignore \ No newline at end of file diff --git a/node-playwright-chrome/Dockerfile b/node-playwright-chrome/Dockerfile deleted file mode 100644 index 6304cb5..0000000 --- a/node-playwright-chrome/Dockerfile +++ /dev/null @@ -1,114 +0,0 @@ -ARG NODE_VERSION=20 -# Use bookworm to be consistent across node versions. -FROM --platform=linux/amd64 node:${NODE_VERSION}-bookworm-slim - -LABEL maintainer="support@apify.com" description="Base image for Apify Actors using Chrome" -ENV DEBIAN_FRONTEND=noninteractive - -# This image was inspired by https://github.com/GoogleChrome/puppeteer/blob/master/docs/troubleshooting.md#running-puppeteer-in-docker - -# Install latest Chrome dev packages and fonts to support major charsets (Chinese, Japanese, Arabic, Hebrew, Thai and a few others) -# Note: this also installs the necessary libs to make the bundled version of Chromium that Puppeteer installs, work. -RUN \ - # Disable chrome auto updates, based on https://support.google.com/chrome/a/answer/9052345 - mkdir -p /etc/default && echo 'repo_add_once=false' > /etc/default/google-chrome \ - && apt-get update \ - && apt-get install -y wget gnupg unzip ca-certificates xvfb xauth --no-install-recommends \ - && wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - \ - && sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list' \ - && sh -c 'echo "deb http://ftp.us.debian.org/debian bookworm main non-free" >> /etc/apt/sources.list.d/fonts.list' \ - && apt-get update \ - && apt-get purge --auto-remove -y wget unzip \ - && apt-get install -y \ - git \ - google-chrome-stable \ - # Found this in other images, not sure whether it's needed, it does not come from Playwright deps - procps \ - # Extras - fonts-freefont-ttf \ - fonts-kacst \ - fonts-thai-tlwg \ - fonts-wqy-zenhei \ - --no-install-recommends \ - \ - # Add user so we don't need --no-sandbox. - && groupadd -r myuser && useradd -r -g myuser -G audio,video myuser \ - && mkdir -p /home/myuser/Downloads \ - && chown -R myuser:myuser /home/myuser \ - \ - && mkdir -p /etc/opt/chrome/policies/managed \ - && echo '{ "CommandLineFlagSecurityWarningsEnabled": false }' > /etc/opt/chrome/policies/managed/managed_policies.json \ - && echo '{ "ComponentUpdatesEnabled": false }' > /etc/opt/chrome/policies/managed/component_update.json \ - \ - # Globally disable the update-notifier. - && npm config --global set update-notifier false \ - # Install all required playwright dependencies for chrome/chromium - && PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1 npm_config_ignore_scripts=1 npx playwright install-deps chrome \ - # Cleanup time - && rm -rf /var/lib/apt/lists/* \ - && rm -rf /src/*.deb \ - && apt-get clean -y && apt-get autoremove -y \ - && rm -rf /root/.npm \ - # This is needed to remove an annoying error message when running headful. - && mkdir -p /tmp/.X11-unix && chmod 1777 /tmp/.X11-unix - -# Run everything after as non-privileged user. -USER myuser -WORKDIR /home/myuser - -ENV PLAYWRIGHT_BROWSERS_PATH=/home/myuser/pw-browsers - -# Copy source code and xvfb script -COPY --chown=myuser:myuser package.json main.js chrome_test.js start_xvfb_and_run_cmd.sh new_xvfb_run_cmd.sh /home/myuser/ - -# Sets path to Chrome executable, this is used by Apify.launchPuppeteer() -ENV APIFY_CHROME_EXECUTABLE_PATH=/usr/bin/google-chrome - -# Tell the crawlee cli that we already have browers installed, so it skips installing them -ENV CRAWLEE_SKIP_BROWSER_INSTALL=1 - -# Tell Node.js this is a production environemnt -ENV NODE_ENV=production - -# Enable Node.js process to use a lot of memory (Actor has limit of 32GB) -# Increases default size of headers. The original limit was 80kb, but from node 10+ they decided to lower it to 8kb. -# However they did not think about all the sites there with large headers, -# so we put back the old limit of 80kb, which seems to work just fine. -ENV NODE_OPTIONS="--max_old_space_size=30000 --max-http-header-size=80000" - -# Install default dependencies, print versions of everything -RUN npm --quiet set progress=false \ - && npm install --omit=dev --omit=optional --no-package-lock --prefer-online \ - && echo "Installed NPM packages:" \ - && (npm list --omit=dev --omit=optional || true) \ - && echo "Node.js version:" \ - && node --version \ - && echo "NPM version:" \ - && npm --version \ - && echo "Google Chrome version:" \ - && bash -c "$APIFY_CHROME_EXECUTABLE_PATH --version" \ - # symlink the chromium binary to the root folder in order to bypass the versioning and resulting browser launch crashes. - && ln -s ${PLAYWRIGHT_BROWSERS_PATH}/chromium-*/chrome-linux/chrome ${PLAYWRIGHT_BROWSERS_PATH}/ \ - # Playwright allows donwloading only one browser through separate package with same export. So we rename it to just playwright. - && mv ./node_modules/playwright-chromium ./node_modules/playwright && rm -rf ./node_modules/playwright-chromium - -ENV APIFY_DEFAULT_BROWSER_PATH=${PLAYWRIGHT_BROWSERS_PATH}/chrome - -# Prevent installing of browsers by future `npm install`. -ENV PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD 1 - -# Maybe we can consider changing it to 1920x1080x24. However I guess it should not have any effect. - -# Set up xvfb - -# We should you the autodisplay detection as suggested here: https://github.com/microsoft/playwright/issues/2728#issuecomment-678083619 -ENV DISPLAY=:99 -ENV XVFB_WHD=1920x1080x24+32 -# Uncoment this line if you want to run browser in headfull mode by defautl. -# ENV APIFY_XVFB=1 - -# NOTEs: -# - This needs to be compatible with CLI. -# - Using CMD instead of ENTRYPOINT, to allow manual overriding -# CMD ./start_xvfb_and_run_cmd.sh && npm start --silent -CMD ./new_xvfb_run_cmd.sh npm start --silent diff --git a/node-playwright-chrome/chrome_test.js b/node-playwright-chrome/chrome_test.js deleted file mode 100644 index 746a576..0000000 --- a/node-playwright-chrome/chrome_test.js +++ /dev/null @@ -1,25 +0,0 @@ -const { launchPlaywright } = require('crawlee'); - -const testPageLoading = async (browser) => { - const page = await browser.newPage(); - await page.goto('http://www.example.com'); - const pageTitle = await page.title(); - if (pageTitle !== 'Example Domain') { - throw new Error(`Playwright+Chrome test failed - returned title "${pageTitle}"" !== "Example Domain"`); - } -}; - -const testChrome = async (launchOptions) => { - const launchContext = { useChrome: true, launchOptions }; - - console.log(`Testing Playwright with Chrome`, launchContext); - - const browser = await launchPlaywright(launchContext); - - await testPageLoading(browser); - await browser.close(); -}; - -module.exports = { - testChrome, -}; diff --git a/node-playwright-chrome/main.js b/node-playwright-chrome/main.js deleted file mode 100644 index b7354da..0000000 --- a/node-playwright-chrome/main.js +++ /dev/null @@ -1,39 +0,0 @@ -// This file will be replaced by the content of the Act2.sourceCode field, -// we keep this one here just for testing and clarification. - -console.log( - `If you're seeing this text, it means the actor started the default "main.js" file instead -of your own source code file. You have two options how to fix this: -1) Rename your source code file to "main.js" -2) Define custom "package.json" and/or "Dockerfile" that will run your code your way - -For more information, see https://docs.apify.com/actors/development/source-code#custom-dockerfile -`); -console.log('Testing Docker image...'); - -const { Actor } = require('apify'); -const { launchPlaywright, getMemoryInfo } = require('crawlee'); -const { testChrome } = require('./chrome_test'); - -Actor.main(async () => { - // Sanity test browsers. - // We need --no-sandbox, because even though the build is running on GitHub, the test is running in Docker. - const launchOptions = { headless: true, args: ['--no-sandbox'] }; - const launchContext = { launchOptions }; - - const browser = await launchPlaywright(launchContext); - await browser.close(); - - // Try to use full Chrome headless - await testChrome({ headless: true }); - - // Try to use full Chrome with XVFB - await testChrome({ headless: false }); - - // Try to use playwright default - await testChrome({ executablePath: undefined }); - await testChrome({ executablePath: process.env.APIFY_DEFAULT_BROWSER_PATH }); - - // Test that "ps" command is available, sometimes it was missing in official Node builds - await getMemoryInfo(); -}); diff --git a/node-playwright-chrome/new_xvfb_run_cmd.sh b/node-playwright-chrome/new_xvfb_run_cmd.sh deleted file mode 100755 index 141f76f..0000000 --- a/node-playwright-chrome/new_xvfb_run_cmd.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash - -# xvfb-run -a -s "-ac -screen 0 $XVFB_WHD -nolisten tcp" npm start --silent - -echo "Will run command: xvfb-run -a -s \"-ac -screen 0 $XVFB_WHD -nolisten tcp\" $@" -xvfb-run -a -s "-ac -screen 0 $XVFB_WHD -nolisten tcp" "$@" diff --git a/node-playwright-chrome/package.json b/node-playwright-chrome/package.json deleted file mode 100644 index 35d5413..0000000 --- a/node-playwright-chrome/package.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "description": "Anonymous actor on the Apify platform (with Chrome)", - "version": "0.0.1", - "license": "UNLICENSED", - "main": "main.js", - "scripts": { - "start": "node main.js" - }, - "dependencies": { - "apify": "APIFY_VERSION", - "crawlee": "CRAWLEE_VERSION", - "playwright-chromium": "PLAYWRIGHT_VERSION", - "typescript": "^5.4.3" - }, - "repository": {} -} diff --git a/node-playwright-chrome/start_xvfb_and_run_cmd.sh b/node-playwright-chrome/start_xvfb_and_run_cmd.sh deleted file mode 100755 index b7e4217..0000000 --- a/node-playwright-chrome/start_xvfb_and_run_cmd.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -echo "Starting X virtual framebuffer using: Xvfb $DISPLAY -ac -screen 0 $XVFB_WHD -nolisten tcp" -Xvfb $DISPLAY -ac -screen 0 $XVFB_WHD -nolisten tcp & - -# Execute CMD (original CMD of this Dockerfile gets overridden in actor build) -echo "Executing main command" -exec "$@" diff --git a/node-playwright-firefox/.dockerignore b/node-playwright-firefox/.dockerignore deleted file mode 100644 index b543243..0000000 --- a/node-playwright-firefox/.dockerignore +++ /dev/null @@ -1,4 +0,0 @@ -npm-debug.log -yarn.lock -node_modules -.gitignore \ No newline at end of file diff --git a/node-playwright-firefox/Dockerfile b/node-playwright-firefox/Dockerfile deleted file mode 100644 index 821c640..0000000 --- a/node-playwright-firefox/Dockerfile +++ /dev/null @@ -1,102 +0,0 @@ -ARG NODE_VERSION=20 -# Use bookworm to be consistent across node versions. -FROM --platform=linux/amd64 node:${NODE_VERSION}-bookworm-slim - -LABEL maintainer="support@apify.com" description="Base image for Apify Actors using Firefox" -ENV DEBIAN_FRONTEND=noninteractive - -COPY ./register_intermediate_certs.sh ./register_intermediate_certs.sh - -# Install Firefox dependencies + tools -RUN sh -c 'echo "deb http://ftp.us.debian.org/debian bookworm main non-free" >> /etc/apt/sources.list.d/fonts.list' \ - && apt-get update \ - && apt-get install -y --no-install-recommends \ - # Found this in other images, not sure whether it's needed, it does not come from Playwright deps - procps \ - # The following packages are needed for the intermediate certificates to work in Firefox. - ca-certificates \ - jq \ - wget \ - p11-kit \ - xauth \ - \ - # Register cerificates - && chmod +x ./register_intermediate_certs.sh \ - && ./register_intermediate_certs.sh \ - \ - # Add user so we don't need --no-sandbox. - && groupadd -r myuser && useradd -r -g myuser -G audio,video myuser \ - && mkdir -p /home/myuser/Downloads \ - && chown -R myuser:myuser /home/myuser \ - \ - # Globally disable the update-notifier. - && npm config --global set update-notifier false \ - \ - # Install all required playwright dependencies for firefox - && PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1 npm_config_ignore_scripts=1 npx playwright install-deps firefox \ - \ - # Cleanup time - && rm -rf /var/lib/apt/lists/* \ - && rm -rf /src/*.deb \ - && apt-get clean -y && apt-get autoremove -y \ - && rm -rf /root/.npm \ - # This is needed to remove an annoying error message when running headful. - && mkdir -p /tmp/.X11-unix && chmod 1777 /tmp/.X11-unix - -# Run everything after as non-privileged user. -USER myuser -WORKDIR /home/myuser - -ENV PLAYWRIGHT_BROWSERS_PATH=/home/myuser/pw-browsers - -# Tell the crawlee cli that we already have browers installed, so it skips installing them -ENV CRAWLEE_SKIP_BROWSER_INSTALL=1 - -# Copy source code and xvfb script -COPY --chown=myuser:myuser package.json main.js firefox_test.js start_xvfb_and_run_cmd.sh new_xvfb_run_cmd.sh /home/myuser/ - -# Tell Node.js this is a production environemnt -ENV NODE_ENV=production - -# Enable Node.js process to use a lot of memory (Actor has limit of 32GB) -# Increases default size of headers. The original limit was 80kb, but from node 10+ they decided to lower it to 8kb. -# However they did not think about all the sites there with large headers, -# so we put back the old limit of 80kb, which seems to work just fine. -ENV NODE_OPTIONS="--max_old_space_size=30000 --max-http-header-size=80000" - -# Install default dependencies, print versions of everything -RUN npm --quiet set progress=false \ - && npm install --omit=dev --omit=optional --no-package-lock --prefer-online \ - && echo "Installed NPM packages:" \ - && (npm list --omit=dev --omit=optional || true) \ - && echo "Node.js version:" \ - && node --version \ - && echo "NPM version:" \ - && npm --version \ - \ - # symlink the firefox binary to the root folder in order to bypass the versioning and resulting browser launch crashes. - && ln -s ${PLAYWRIGHT_BROWSERS_PATH}/firefox-*/firefox/firefox ${PLAYWRIGHT_BROWSERS_PATH}/ \ - \ - # Playwright allows donwloading only one browser through separate package with same export. So we rename it to just playwright. - && mv ./node_modules/playwright-firefox ./node_modules/playwright && rm -rf ./node_modules/playwright-firefox \ - \ - # Overrides the dynamic library used by Firefox to determine trusted root certificates with p11-kit-trust.so, which loads the system certificates. - && rm $PLAYWRIGHT_BROWSERS_PATH/firefox-*/firefox/libnssckbi.so \ - && ln -s /usr/lib/x86_64-linux-gnu/pkcs11/p11-kit-trust.so $(ls -d $PLAYWRIGHT_BROWSERS_PATH/firefox-*)/firefox/libnssckbi.so - -ENV APIFY_DEFAULT_BROWSER_PATH=${PLAYWRIGHT_BROWSERS_PATH}/firefox - -# Prevent installing of browsers by future `npm install`. -ENV PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD 1 - -# We should you the autodisplay detection as suggested here: https://github.com/microsoft/playwright/issues/2728#issuecomment-678083619 -ENV DISPLAY=:99 -ENV XVFB_WHD=1920x1080x24+32 -# Uncomment this line if you want to run browser in headful mode by default. -# ENV APIFY_XVFB=1 - -# NOTEs: -# - This needs to be compatible with CLI. -# - Using CMD instead of ENTRYPOINT, to allow manual overriding -# CMD ./start_xvfb_and_run_cmd.sh && npm start --silent -CMD ./new_xvfb_run_cmd.sh npm start --silent diff --git a/node-playwright-firefox/firefox_test.js b/node-playwright-firefox/firefox_test.js deleted file mode 100644 index 29c8b4a..0000000 --- a/node-playwright-firefox/firefox_test.js +++ /dev/null @@ -1,26 +0,0 @@ -const { launchPlaywright } = require('crawlee'); - -const testPageLoading = async (browser) => { - const page = await browser.newPage(); - await page.goto('http://www.example.com'); - const pageTitle = await page.title(); - if (pageTitle !== 'Example Domain') { - throw new Error(`Playwright+Firefox test failed - returned title "${pageTitle}"" !== "Example Domain"`); - } -}; - -const testFirefox = async (launchOptions) => { - const launchContext = { - launcher: require('playwright').firefox, - launchOptions, - }; - - console.log(`Testing Playwright with Firefox`, launchOptions); - - const browser = await launchPlaywright(launchContext); - - await testPageLoading(browser); - await browser.close(); -}; - -module.exports = testFirefox; diff --git a/node-playwright-firefox/main.js b/node-playwright-firefox/main.js deleted file mode 100644 index c72d103..0000000 --- a/node-playwright-firefox/main.js +++ /dev/null @@ -1,33 +0,0 @@ -// This file will be replaced by the content of the Act2.sourceCode field, -// we keep this one here just for testing and clarification. - -console.log( - `If you're seeing this text, it means the actor started the default "main.js" file instead -of your own source code file. You have two options how to fix this: -1) Rename your source code file to "main.js" -2) Define custom "package.json" and/or "Dockerfile" that will run your code your way - -For more information, see https://docs.apify.com/actors/development/source-code#custom-dockerfile -`); -console.log('Testing Docker image...'); - -const { Actor } = require('apify'); -const { getMemoryInfo } = require('crawlee'); -const testFirefox = require('./firefox_test'); - -Actor.main(async () => { - // Sanity test browsers. - - // Try to use full Firefox headless - await testFirefox({ headless: true }); - - // Try to use full Firefox with XVFB - await testFirefox({ headless: false }); - - // Try to use playwright default - await testFirefox({ executablePath: undefined }); - await testFirefox({ executablePath: process.env.APIFY_DEFAULT_BROWSER_PATH }); - - // Test that "ps" command is available, sometimes it was missing in official Node builds - await getMemoryInfo(); -}); diff --git a/node-playwright-firefox/new_xvfb_run_cmd.sh b/node-playwright-firefox/new_xvfb_run_cmd.sh deleted file mode 100755 index 141f76f..0000000 --- a/node-playwright-firefox/new_xvfb_run_cmd.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash - -# xvfb-run -a -s "-ac -screen 0 $XVFB_WHD -nolisten tcp" npm start --silent - -echo "Will run command: xvfb-run -a -s \"-ac -screen 0 $XVFB_WHD -nolisten tcp\" $@" -xvfb-run -a -s "-ac -screen 0 $XVFB_WHD -nolisten tcp" "$@" diff --git a/node-playwright-firefox/package.json b/node-playwright-firefox/package.json deleted file mode 100644 index fa006ba..0000000 --- a/node-playwright-firefox/package.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "description": "Anonymous actor on the Apify platform (with Firefox)", - "version": "0.0.1", - "license": "UNLICENSED", - "main": "main.js", - "scripts": { - "start": "node main.js" - }, - "dependencies": { - "apify": "APIFY_VERSION", - "crawlee": "CRAWLEE_VERSION", - "playwright-firefox": "PLAYWRIGHT_VERSION", - "typescript": "^5.4.3" - }, - "repository": {} -} diff --git a/node-playwright-firefox/register_intermediate_certs.sh b/node-playwright-firefox/register_intermediate_certs.sh deleted file mode 100644 index 2ba86ae..0000000 --- a/node-playwright-firefox/register_intermediate_certs.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -FILENAMES=$(wget -O - https://firefox.settings.services.mozilla.com/v1/buckets/security-state/collections/intermediates/records | jq -r '.data[].attachment.location') - -mkdir -p /usr/local/share/ca-certificates/firefox - -wget -P "/usr/local/share/ca-certificates/firefox/" -i <(echo $FILENAMES | tr ' ' '\n' | sed -e 's/^/https:\/\/firefox-settings-attachments.cdn.mozilla.net\//g') - -for f in /usr/local/share/ca-certificates/firefox/*.pem; do - mv -- "$f" "${f%.pem}.crt" -done - -chmod 644 /usr/local/share/ca-certificates/firefox/*.crt -chmod 755 /usr/local/share/ca-certificates/firefox - -update-ca-certificates \ No newline at end of file diff --git a/node-playwright-firefox/start_xvfb_and_run_cmd.sh b/node-playwright-firefox/start_xvfb_and_run_cmd.sh deleted file mode 100755 index b7e4217..0000000 --- a/node-playwright-firefox/start_xvfb_and_run_cmd.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -echo "Starting X virtual framebuffer using: Xvfb $DISPLAY -ac -screen 0 $XVFB_WHD -nolisten tcp" -Xvfb $DISPLAY -ac -screen 0 $XVFB_WHD -nolisten tcp & - -# Execute CMD (original CMD of this Dockerfile gets overridden in actor build) -echo "Executing main command" -exec "$@" diff --git a/node-playwright-webkit/.dockerignore b/node-playwright-webkit/.dockerignore deleted file mode 100644 index b543243..0000000 --- a/node-playwright-webkit/.dockerignore +++ /dev/null @@ -1,4 +0,0 @@ -npm-debug.log -yarn.lock -node_modules -.gitignore \ No newline at end of file diff --git a/node-playwright-webkit/Dockerfile b/node-playwright-webkit/Dockerfile deleted file mode 100644 index a3e22bf..0000000 --- a/node-playwright-webkit/Dockerfile +++ /dev/null @@ -1,92 +0,0 @@ -FROM --platform=linux/amd64 ubuntu:jammy -ARG NODE_VERSION=20 - -LABEL maintainer="support@apify.com" description="Base image for Apify Actors using Webkit" -ENV DEBIAN_FRONTEND=noninteractive - -# Install WebKit dependencies -RUN apt-get update \ - && apt-get install -y --no-install-recommends \ - git \ - procps \ - xvfb \ - xauth \ - # Install node - && apt-get update && apt-get install -y curl \ - && curl -sL https://deb.nodesource.com/setup_${NODE_VERSION}.x | bash - \ - && apt-get install -y nodejs \ - # Feature-parity with Node.js base images. - # From: https://github.com/microsoft/playwright/blob/master/utils/docker/Dockerfile.focal - && apt-get update && apt-get install -y --no-install-recommends git ssh \ - && npm install -g yarn \ - \ - # Add user so we don't need --no-sandbox. - && groupadd -r myuser && useradd -r -g myuser -G audio,video myuser \ - && mkdir -p /home/myuser/Downloads \ - && chown -R myuser:myuser /home/myuser \ - # Globally disable the update-notifier. - && npm config --global set update-notifier false \ - \ - # Install all required playwright dependencies for webkit - && PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1 npm_config_ignore_scripts=1 npx playwright install-deps webkit \ - \ - # Cleanup time - && rm -rf /var/lib/apt/lists/* \ - && rm -rf /src/*.deb \ - && apt-get clean -y && apt-get autoremove -y \ - && rm -rf /root/.npm \ - \ - # This is needed to remove an annoying error message when running headful. - && mkdir -p /tmp/.X11-unix && chmod 1777 /tmp/.X11-unix - -# Run everything after as non-privileged user. -USER myuser -WORKDIR /home/myuser - -ENV PLAYWRIGHT_BROWSERS_PATH=/home/myuser/pw-browsers - -# Tell the crawlee cli that we already have browers installed, so it skips installing them -ENV CRAWLEE_SKIP_BROWSER_INSTALL=1 - -# Copy source code and xvfb script -COPY --chown=myuser:myuser package.json main.js webkit_test.js start_xvfb_and_run_cmd.sh new_xvfb_run_cmd.sh /home/myuser/ - -# Tell Node.js this is a production environemnt -ENV NODE_ENV=production - -# Enable Node.js process to use a lot of memory (Actor has limit of 32GB) -# Increases default size of headers. The original limit was 80kb, but from node 10+ they decided to lower it to 8kb. -# However they did not think about all the sites there with large headers, -# so we put back the old limit of 80kb, which seems to work just fine. -ENV NODE_OPTIONS="--max_old_space_size=30000 --max-http-header-size=80000" - -# Install default dependencies, print versions of everything -RUN npm --quiet set progress=false \ - && npm install --omit=dev --omit=optional --no-package-lock --prefer-online \ - && echo "Installed NPM packages:" \ - && (npm list --omit=dev --omit=optional || true) \ - && echo "Node.js version:" \ - && node --version \ - && echo "NPM version:" \ - && npm --version \ - # symlink the webkit binary to the root folder in order to bypass the versioning and resulting browser launch crashes. - && ln -s ${PLAYWRIGHT_BROWSERS_PATH}/webkit-*/minibrowser-gtk/MiniBrowser ${PLAYWRIGHT_BROWSERS_PATH}/ \ - # Playwright allows donwloading only one browser through separate package with same export. So we rename it to just playwright. - && mv ./node_modules/playwright-webkit ./node_modules/playwright && rm -rf ./node_modules/playwright-webkit - -ENV APIFY_DEFAULT_BROWSER_PATH=${PLAYWRIGHT_BROWSERS_PATH}/MiniBrowser - -# Prevent installing of browsers by future `npm install`. -ENV PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD 1 - -# We should you the autodisplay detection as suggested here: https://github.com/microsoft/playwright/issues/2728#issuecomment-678083619 -ENV DISPLAY=:99 -ENV XVFB_WHD=1920x1080x24+32 -# Uncoment this line if you want to run browser in headfull mode by defautl. -# ENV APIFY_XVFB=1 - -# NOTEs: -# - This needs to be compatible with CLI. -# - Using CMD instead of ENTRYPOINT, to allow manual overriding -# CMD ./start_xvfb_and_run_cmd.sh && npm start --silent -CMD ./new_xvfb_run_cmd.sh npm start --silent diff --git a/node-playwright-webkit/main.js b/node-playwright-webkit/main.js deleted file mode 100644 index 864bbec..0000000 --- a/node-playwright-webkit/main.js +++ /dev/null @@ -1,33 +0,0 @@ -// This file will be replaced by the content of the Act2.sourceCode field, -// we keep this one here just for testing and clarification. - -console.log( - `If you're seeing this text, it means the actor started the default "main.js" file instead -of your own source code file. You have two options how to fix this: -1) Rename your source code file to "main.js" -2) Define custom "package.json" and/or "Dockerfile" that will run your code your way - -For more information, see https://docs.apify.com/actors/development/source-code#custom-dockerfile -`); -console.log('Testing Docker image...'); - -const { Actor } = require('apify'); -const { getMemoryInfo } = require('crawlee'); -const testWebkit = require('./webkit_test'); - -Actor.main(async () => { - // Sanity test browsers. - - // Try to use full Webkit headless - await testWebkit({ headless: true }); - - // Try to use full Webkit with XVFB - await testWebkit({ headless: false }); - - // Try to use playwright default - await testWebkit({ executablePath: undefined }); - await testWebkit({ executablePath: process.env.APIFY_DEFAULT_BROWSER_PATH }); - - // Test that "ps" command is available, sometimes it was missing in official Node builds - await getMemoryInfo(); -}); diff --git a/node-playwright-webkit/new_xvfb_run_cmd.sh b/node-playwright-webkit/new_xvfb_run_cmd.sh deleted file mode 100755 index 141f76f..0000000 --- a/node-playwright-webkit/new_xvfb_run_cmd.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash - -# xvfb-run -a -s "-ac -screen 0 $XVFB_WHD -nolisten tcp" npm start --silent - -echo "Will run command: xvfb-run -a -s \"-ac -screen 0 $XVFB_WHD -nolisten tcp\" $@" -xvfb-run -a -s "-ac -screen 0 $XVFB_WHD -nolisten tcp" "$@" diff --git a/node-playwright-webkit/package.json b/node-playwright-webkit/package.json deleted file mode 100644 index e304879..0000000 --- a/node-playwright-webkit/package.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "description": "Anonymous actor on the Apify platform (with Webkit)", - "version": "0.0.1", - "license": "UNLICENSED", - "main": "main.js", - "scripts": { - "start": "node main.js" - }, - "dependencies": { - "apify": "APIFY_VERSION", - "crawlee": "CRAWLEE_VERSION", - "playwright-webkit": "PLAYWRIGHT_VERSION", - "typescript": "^5.4.3" - }, - "repository": {} -} diff --git a/node-playwright-webkit/start_xvfb_and_run_cmd.sh b/node-playwright-webkit/start_xvfb_and_run_cmd.sh deleted file mode 100755 index b7e4217..0000000 --- a/node-playwright-webkit/start_xvfb_and_run_cmd.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -echo "Starting X virtual framebuffer using: Xvfb $DISPLAY -ac -screen 0 $XVFB_WHD -nolisten tcp" -Xvfb $DISPLAY -ac -screen 0 $XVFB_WHD -nolisten tcp & - -# Execute CMD (original CMD of this Dockerfile gets overridden in actor build) -echo "Executing main command" -exec "$@" diff --git a/node-playwright-webkit/webkit_test.js b/node-playwright-webkit/webkit_test.js deleted file mode 100644 index 48db32e..0000000 --- a/node-playwright-webkit/webkit_test.js +++ /dev/null @@ -1,26 +0,0 @@ -const { launchPlaywright } = require('crawlee'); - -const testPageLoading = async (browser) => { - const page = await browser.newPage(); - await page.goto('http://www.example.com'); - const pageTitle = await page.title(); - if (pageTitle !== 'Example Domain') { - throw new Error(`Playwright+Webkit test failed - returned title "${pageTitle}"" !== "Example Domain"`); - } -}; - -const testWebkit = async (launchOptions) => { - const launchContext = { - launcher: require('playwright').webkit, - launchOptions, - }; - - console.log(`Testing Playwright with Webkit`, launchOptions); - - const browser = await launchPlaywright(launchContext); - - await testPageLoading(browser); - await browser.close(); -}; - -module.exports = testWebkit; diff --git a/node-playwright/.dockerignore b/node-playwright/.dockerignore deleted file mode 100644 index b543243..0000000 --- a/node-playwright/.dockerignore +++ /dev/null @@ -1,4 +0,0 @@ -npm-debug.log -yarn.lock -node_modules -.gitignore \ No newline at end of file diff --git a/node-playwright/Dockerfile b/node-playwright/Dockerfile deleted file mode 100644 index e876111..0000000 --- a/node-playwright/Dockerfile +++ /dev/null @@ -1,109 +0,0 @@ -ARG PLAYWRIGHT_VERSION= -FROM --platform=linux/amd64 mcr.microsoft.com/playwright:${PLAYWRIGHT_VERSION}jammy - -ARG NODE_VERSION=20 - -LABEL maintainer="support@apify.com" description="Base image for Apify Actors using headless Chrome" -ENV DEBIAN_FRONTEND=noninteractive - -# Copy the script for registering intermediate certificates. -COPY ./register_intermediate_certs.sh ./register_intermediate_certs.sh - -# Install libs -RUN apt-get update \ - && apt-get install --fix-missing -yq --no-install-recommends procps xvfb xauth wget \ - && mkdir -p /tmp/.X11-unix && chmod 1777 /tmp/.X11-unix \ - # Uninstall system NodeJs - && apt-get purge -yq nodejs \ - # Install node - && apt-get update && apt-get install -y curl \ - && curl -sL https://deb.nodesource.com/setup_${NODE_VERSION}.x | bash - \ - && apt-get install -y nodejs \ - # The following packages are needed for the intermediate certificates to work in Firefox. - ca-certificates \ - jq \ - wget \ - p11-kit \ - # Register cerificates - && chmod +x ./register_intermediate_certs.sh \ - && ./register_intermediate_certs.sh \ - # Disable chrome auto updates, based on https://support.google.com/chrome/a/answer/9052345 - && mkdir -p /etc/default && echo 'repo_add_once=false' > /etc/default/google-chrome \ - # Install chrome - && wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb -nv \ - && apt install --fix-missing -yq ./google-chrome-stable_current_amd64.deb && rm ./google-chrome-stable_current_amd64.deb \ - \ - # Add user so we don't need --no-sandbox. - && groupadd -r myuser && useradd -r -g myuser -G audio,video myuser \ - && mkdir -p /home/myuser/Downloads \ - && chown -R myuser:myuser /home/myuser \ - \ - && mkdir -p /etc/opt/chrome/policies/managed \ - && echo '{ "CommandLineFlagSecurityWarningsEnabled": false }' > /etc/opt/chrome/policies/managed/managed_policies.json \ - && echo '{ "ComponentUpdatesEnabled": false }' > /etc/opt/chrome/policies/managed/component_update.json \ - \ - # Globally disable the update-notifier. - && npm config --global set update-notifier false \ - \ - # Final cleanup - # Cleanup time - && rm -rf /var/lib/apt/lists/* \ - && rm -rf /src/*.deb \ - && apt-get clean -y && apt-get autoremove -y \ - && rm -rf /root/.npm \ - # This is needed to remove an annoying error message when running headful. - && mkdir -p /tmp/.X11-unix && chmod 1777 /tmp/.X11-unix - -# Run everything after as non-privileged user. -USER myuser -WORKDIR /home/myuser - -# Point playwright to the preincluded browsers - moving browsers around increases the image size a *lot* -ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright - -# Tell the crawlee cli that we already have browers installed, so it skips installing them -ENV CRAWLEE_SKIP_BROWSER_INSTALL=1 - -# Copy source code and xvfb script -COPY --chown=myuser:myuser package.json main.js chrome_test.js start_xvfb_and_run_cmd.sh new_xvfb_run_cmd.sh /home/myuser/ - -# Sets path to Chrome executable, this is used by Apify.launchPuppeteer() -ENV APIFY_CHROME_EXECUTABLE_PATH=/usr/bin/google-chrome - -# Tell Node.js this is a production environemnt -ENV NODE_ENV=production - -# Enable Node.js process to use a lot of memory (Actor has limit of 32GB) -# Increases default size of headers. The original limit was 80kb, but from node 10+ they decided to lower it to 8kb. -# However they did not think about all the sites there with large headers, -# so we put back the old limit of 80kb, which seems to work just fine. -ENV NODE_OPTIONS="--max_old_space_size=30000 --max-http-header-size=80000" - -# Install default dependencies, print versions of everything -RUN npm --quiet set progress=false \ - && npm install --omit=dev --omit=optional --no-package-lock --prefer-online \ - && echo "Installed NPM packages:" \ - && (npm list --omit=dev --omit=optional || true) \ - && echo "Node.js version:" \ - && node --version \ - && echo "NPM version:" \ - && npm --version \ - && echo "Google Chrome version:" \ - && bash -c "$APIFY_CHROME_EXECUTABLE_PATH --version" \ - # Overrides the dynamic library used by Firefox to determine trusted root certificates with p11-kit-trust.so, which loads the system certificates. - && rm $PLAYWRIGHT_BROWSERS_PATH/firefox-*/firefox/libnssckbi.so \ - && ln -s /usr/lib/x86_64-linux-gnu/pkcs11/p11-kit-trust.so $(ls -d $PLAYWRIGHT_BROWSERS_PATH/firefox-*)/firefox/libnssckbi.so - -# Set up xvfb - -# We should you the autodisplay detection as suggested here: https://github.com/microsoft/playwright/issues/2728#issuecomment-678083619 -ENV DISPLAY=:99 -ENV XVFB_WHD=1920x1080x24+32 -# Uncoment this line if you want to run browser in headfull mode by defautl. -# ENV APIFY_XVFB=1 - -# NOTEs: -# - This needs to be compatible with CLI. -# - Using CMD instead of ENTRYPOINT, to allow manual overriding -# CMD ./start_xvfb_and_run_cmd.sh && npm start --silent -CMD ./new_xvfb_run_cmd.sh npm start --silent diff --git a/node-playwright/chrome_test.js b/node-playwright/chrome_test.js deleted file mode 100644 index d07ff03..0000000 --- a/node-playwright/chrome_test.js +++ /dev/null @@ -1,26 +0,0 @@ -const { launchPlaywright } = require('crawlee'); - -const testPageLoading = async (browser) => { - const page = await browser.newPage(); - await page.goto('http://www.example.com'); - const pageTitle = await page.title(); - if (pageTitle !== 'Example Domain') { - throw new Error(`Playwright+Chrome test failed - returned title "${pageTitle}"" !== "Example Domain"`); - } -}; - -const testChrome = async (launchOptions) => { - const launchContext = { useChrome: true, launchOptions }; - - console.log(`Testing Playwright with Chrome`, launchContext); - - const browser = await launchPlaywright(launchContext); - - await testPageLoading(browser); - await browser.close(); -}; - -module.exports = { - testChrome, - testPageLoading, -}; diff --git a/node-playwright/main.js b/node-playwright/main.js deleted file mode 100644 index 18e752f..0000000 --- a/node-playwright/main.js +++ /dev/null @@ -1,44 +0,0 @@ -// This file will be replaced by the content of the Act2.sourceCode field, -// we keep this one here just for testing and clarification. - -console.log( - `If you're seeing this text, it means the actor started the default "main.js" file instead -of your own source code file. You have two options how to fix this: -1) Rename your source code file to "main.js" -2) Define custom "package.json" and/or "Dockerfile" that will run your code your way - -For more information, see https://docs.apify.com/actors/development/source-code#custom-dockerfile -`); -console.log('Testing Docker image...'); - -const { Actor } = require('apify'); -const { launchPlaywright, getMemoryInfo } = require('crawlee'); -const playwright = require('playwright'); -const { testChrome, testPageLoading } = require('./chrome_test'); - -Actor.main(async () => { - const browsers = ['webkit', 'firefox', 'chromium']; - const promisesHeadless = browsers.map(async (browserName) => { - const browser = await launchPlaywright({ launcher: playwright[browserName] }); - return testPageLoading(browser); - }); - - const promisesHeadful = browsers.map(async (browserName) => { - const browser = await launchPlaywright({ launcher: playwright[browserName], launchOptions: { headless: false } }); - return testPageLoading(browser); - }); - - await Promise.all(promisesHeadless); - await Promise.all(promisesHeadful); - - // Try to use full Chrome headless - await testChrome({ headless: true }); - - // Try to use full Chrome with XVFB - await testChrome({ headless: false, args: ['--disable-gpu'] }); - - // Test that "ps" command is available, sometimes it was missing in official Node builds - await getMemoryInfo(); - - console.log('All tests passed!'); -}); diff --git a/node-playwright/new_xvfb_run_cmd.sh b/node-playwright/new_xvfb_run_cmd.sh deleted file mode 100755 index 141f76f..0000000 --- a/node-playwright/new_xvfb_run_cmd.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash - -# xvfb-run -a -s "-ac -screen 0 $XVFB_WHD -nolisten tcp" npm start --silent - -echo "Will run command: xvfb-run -a -s \"-ac -screen 0 $XVFB_WHD -nolisten tcp\" $@" -xvfb-run -a -s "-ac -screen 0 $XVFB_WHD -nolisten tcp" "$@" diff --git a/node-playwright/package.json b/node-playwright/package.json deleted file mode 100644 index c0e3fb5..0000000 --- a/node-playwright/package.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "description": "Anonymous actor on the Apify platform (with Chrome)", - "version": "0.0.1", - "license": "UNLICENSED", - "main": "main.js", - "scripts": { - "start": "node main.js" - }, - "dependencies": { - "apify": "APIFY_VERSION", - "crawlee": "CRAWLEE_VERSION", - "playwright": "PLAYWRIGHT_VERSION", - "typescript": "^5.4.3" - }, - "repository": {} -} diff --git a/node-playwright/register_intermediate_certs.sh b/node-playwright/register_intermediate_certs.sh deleted file mode 100644 index 2ba86ae..0000000 --- a/node-playwright/register_intermediate_certs.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -FILENAMES=$(wget -O - https://firefox.settings.services.mozilla.com/v1/buckets/security-state/collections/intermediates/records | jq -r '.data[].attachment.location') - -mkdir -p /usr/local/share/ca-certificates/firefox - -wget -P "/usr/local/share/ca-certificates/firefox/" -i <(echo $FILENAMES | tr ' ' '\n' | sed -e 's/^/https:\/\/firefox-settings-attachments.cdn.mozilla.net\//g') - -for f in /usr/local/share/ca-certificates/firefox/*.pem; do - mv -- "$f" "${f%.pem}.crt" -done - -chmod 644 /usr/local/share/ca-certificates/firefox/*.crt -chmod 755 /usr/local/share/ca-certificates/firefox - -update-ca-certificates \ No newline at end of file diff --git a/node-playwright/start_xvfb_and_run_cmd.sh b/node-playwright/start_xvfb_and_run_cmd.sh deleted file mode 100755 index b7e4217..0000000 --- a/node-playwright/start_xvfb_and_run_cmd.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -echo "Starting X virtual framebuffer using: Xvfb $DISPLAY -ac -screen 0 $XVFB_WHD -nolisten tcp" -Xvfb $DISPLAY -ac -screen 0 $XVFB_WHD -nolisten tcp & - -# Execute CMD (original CMD of this Dockerfile gets overridden in actor build) -echo "Executing main command" -exec "$@" diff --git a/node-puppeteer-chrome/.dockerignore b/node-puppeteer-chrome/.dockerignore deleted file mode 100644 index b543243..0000000 --- a/node-puppeteer-chrome/.dockerignore +++ /dev/null @@ -1,4 +0,0 @@ -npm-debug.log -yarn.lock -node_modules -.gitignore \ No newline at end of file diff --git a/node-puppeteer-chrome/Dockerfile b/node-puppeteer-chrome/Dockerfile deleted file mode 100644 index bb7d015..0000000 --- a/node-puppeteer-chrome/Dockerfile +++ /dev/null @@ -1,107 +0,0 @@ -ARG NODE_VERSION=20 -# Use bookworm to be consistent across node versions. -FROM --platform=linux/amd64 node:${NODE_VERSION}-bookworm-slim - -LABEL maintainer="support@apify.com" description="Base image for Apify Actors using headless Chrome" -ENV DEBIAN_FRONTEND=noninteractive - -# This image was inspired by https://github.com/GoogleChrome/puppeteer/blob/master/docs/troubleshooting.md#running-puppeteer-in-docker - -# Install latest Chrome dev packages and fonts to support major charsets (Chinese, Japanese, Arabic, Hebrew, Thai and a few others) -# Note: this also installs the necessary libs to make the bundled version of Chromium that Puppeteer installs work. -RUN apt-get update \ - && apt-get install -y wget gnupg unzip ca-certificates --no-install-recommends \ - && wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - \ - && sh -c 'echo "deb http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list' \ - && sh -c 'echo "deb http://ftp.us.debian.org/debian bookworm main non-free" >> /etc/apt/sources.list.d/fonts.list' \ - && apt-get update \ - && apt-get purge --auto-remove -y unzip \ - && apt-get install -y \ - fonts-freefont-ttf \ - fonts-ipafont-gothic \ - fonts-kacst \ - fonts-liberation \ - fonts-thai-tlwg \ - fonts-wqy-zenhei \ - git \ - libxss1 \ - lsb-release \ - procps \ - xdg-utils \ - xvfb \ - xauth \ - --no-install-recommends \ - # Disable chrome auto updates, based on https://support.google.com/chrome/a/answer/9052345 - && mkdir -p /etc/default && echo 'repo_add_once=false' > /etc/default/google-chrome \ - \ - # Install chrome - && wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb -nv \ - && apt install --fix-missing -yq ./google-chrome-stable_current_amd64.deb && rm ./google-chrome-stable_current_amd64.deb \ - \ - # Add user so we don't need --no-sandbox. - && groupadd -r myuser && useradd -r -g myuser -G audio,video myuser \ - && mkdir -p /home/myuser/Downloads \ - && chown -R myuser:myuser /home/myuser \ - \ - && mkdir -p /etc/opt/chrome/policies/managed \ - && echo '{ "CommandLineFlagSecurityWarningsEnabled": false }' > /etc/opt/chrome/policies/managed/managed_policies.json \ - && echo '{ "ComponentUpdatesEnabled": false }' > /etc/opt/chrome/policies/managed/component_update.json \ - \ - # Globally disable the update-notifier. - && npm config --global set update-notifier false \ - # Cleanup - && rm -rf /var/lib/apt/lists/* \ - && rm -rf /src/*.deb \ - && apt-get clean -y && apt-get autoremove -y \ - && rm -rf /root/.npm \ - # This is needed to remove an annoying error message when running headful. - && mkdir -p /tmp/.X11-unix && chmod 1777 /tmp/.X11-unix - -# Run everything after as non-privileged user. -USER myuser -WORKDIR /home/myuser - -# Copy source code and xvfb script -COPY --chown=myuser:myuser package.json main.js puppeteer_*.js start_xvfb_and_run_cmd.sh new_xvfb_run_cmd.sh /home/myuser/ - -# Uncomment to skip the chromium download when installing puppeteer. If you do, -# you'll need to launch puppeteer with: -# browser.launch({executablePath: 'google-chrome'}) -# ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD true - -# Sets path to Chrome executable, this is used by Apify.launchPuppeteer() -ENV APIFY_CHROME_EXECUTABLE_PATH=/usr/bin/google-chrome - -# Tell the crawlee cli that we already have browers installed, so it skips installing them -ENV CRAWLEE_SKIP_BROWSER_INSTALL=1 - -# Tell Node.js this is a production environemnt -ENV NODE_ENV=production - -# Enable Node.js process to use a lot of memory (Actor has limit of 32GB) -# Increases default size of headers. The original limit was 80kb, but from node 10+ they decided to lower it to 8kb. -# However they did not think about all the sites there with large headers, -# so we put back the old limit of 80kb, which seems to work just fine. -ENV NODE_OPTIONS="--max_old_space_size=30000 --max-http-header-size=80000" - -# Install default dependencies, print versions of everything -RUN npm --quiet set progress=false \ - && npm install --omit=dev --omit=optional --no-package-lock --prefer-online \ - && echo "Installed NPM packages:" \ - && (npm list --omit=dev --omit=optional || true) \ - && echo "Node.js version:" \ - && node --version \ - && echo "NPM version:" \ - && npm --version \ - && echo "Google Chrome version:" \ - && bash -c "$APIFY_CHROME_EXECUTABLE_PATH --version" - -# Set up xvfb -ENV DISPLAY=:99 -ENV XVFB_WHD=1920x1080x24+32 - -# NOTEs: -# - This needs to be compatible with CLI. -# - Using CMD instead of ENTRYPOINT, to allow manual overriding -# CMD ./start_xvfb_and_run_cmd.sh && npm start --silent -CMD ./new_xvfb_run_cmd.sh npm start --silent diff --git a/node-puppeteer-chrome/main.js b/node-puppeteer-chrome/main.js deleted file mode 100644 index e9eab3e..0000000 --- a/node-puppeteer-chrome/main.js +++ /dev/null @@ -1,39 +0,0 @@ -// This file will be replaced by the content of the Act2.sourceCode field, -// we keep this one here just for testing and clarification. - -console.log( - `If you're seeing this text, it means the actor started the default "main.js" file instead -of your own source code file. You have two options how to fix this: -1) Rename your source code file to "main.js" -2) Define custom "package.json" and/or "Dockerfile" that will run your code your way - -For more information, see https://docs.apify.com/actors/development/source-code#custom-dockerfile -`); -console.log('Testing Docker image...'); - -const { Actor } = require('apify'); -const { launchPuppeteer, getMemoryInfo } = require('crawlee'); -const testPuppeteerChrome = require('./puppeteer_chrome_test'); - -Actor.main(async () => { - // First, try to open Chromium to see all dependencies are correctly installed - console.log('Testing Puppeteer with Chromium'); - // We need --no-sandbox, because even though the build is running on GitHub, the test is running in Docker. - const launchOptions = { headless: true, args: ['--no-sandbox'] }; - const browser1 = await launchPuppeteer({ launchOptions }); - const page1 = await browser1.newPage(); - await page1.goto('http://www.example.com'); - const pageTitle1 = await page1.title(); - if (pageTitle1 !== 'Example Domain') { - throw new Error(`Puppeteer+Chromium test failed - returned title "${pageTitle1}"" !== "Example Domain"`); - } - await browser1.close(); - - // Second, try to use full Chrome - await testPuppeteerChrome(); - - // Test that "ps" command is available, sometimes it was missing in official Node builds - await getMemoryInfo(); - - console.log('... test PASSED'); -}); diff --git a/node-puppeteer-chrome/new_xvfb_run_cmd.sh b/node-puppeteer-chrome/new_xvfb_run_cmd.sh deleted file mode 100755 index 141f76f..0000000 --- a/node-puppeteer-chrome/new_xvfb_run_cmd.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash - -# xvfb-run -a -s "-ac -screen 0 $XVFB_WHD -nolisten tcp" npm start --silent - -echo "Will run command: xvfb-run -a -s \"-ac -screen 0 $XVFB_WHD -nolisten tcp\" $@" -xvfb-run -a -s "-ac -screen 0 $XVFB_WHD -nolisten tcp" "$@" diff --git a/node-puppeteer-chrome/package.json b/node-puppeteer-chrome/package.json deleted file mode 100644 index 33963a8..0000000 --- a/node-puppeteer-chrome/package.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "description": "Anonymous actor on the Apify platform (with Chrome)", - "version": "0.0.1", - "license": "UNLICENSED", - "main": "main.js", - "scripts": { - "start": "node main.js" - }, - "dependencies": { - "apify": "APIFY_VERSION", - "crawlee": "CRAWLEE_VERSION", - "puppeteer": "PUPPETEER_VERSION", - "typescript": "^5.4.3" - }, - "repository": {} -} diff --git a/node-puppeteer-chrome/puppeteer_chrome_test.js b/node-puppeteer-chrome/puppeteer_chrome_test.js deleted file mode 100644 index 55122a1..0000000 --- a/node-puppeteer-chrome/puppeteer_chrome_test.js +++ /dev/null @@ -1,26 +0,0 @@ -const { launchPuppeteer } = require('crawlee'); - -const testPageLoading = async (browser) => { - const page = await browser.newPage(); - await page.goto('http://www.example.com'); - const pageTitle = await page.title(); - if (pageTitle !== 'Example Domain') { - throw new Error(`Puppeteer+Chrome test failed - returned title "${pageTitle}"" !== "Example Domain"`); - } -}; - -const testPuppeteerChrome = async () => { - console.log('Testing Puppeteer with full Chrome'); - // We need --no-sandbox, because even though the build is running on GitHub, the test is running in Docker. - const launchOptions = { headless: true, args: ['--no-sandbox'] }; - const launchContext = { useChrome: true, launchOptions }; - - const browser = await launchPuppeteer(launchContext); - try { - await testPageLoading(browser); - } finally { - await browser.close(); - } -}; - -module.exports = testPuppeteerChrome; diff --git a/node-puppeteer-chrome/start_xvfb_and_run_cmd.sh b/node-puppeteer-chrome/start_xvfb_and_run_cmd.sh deleted file mode 100755 index b7e4217..0000000 --- a/node-puppeteer-chrome/start_xvfb_and_run_cmd.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -echo "Starting X virtual framebuffer using: Xvfb $DISPLAY -ac -screen 0 $XVFB_WHD -nolisten tcp" -Xvfb $DISPLAY -ac -screen 0 $XVFB_WHD -nolisten tcp & - -# Execute CMD (original CMD of this Dockerfile gets overridden in actor build) -echo "Executing main command" -exec "$@" diff --git a/node/.dockerignore b/node/.dockerignore deleted file mode 100644 index b543243..0000000 --- a/node/.dockerignore +++ /dev/null @@ -1,4 +0,0 @@ -npm-debug.log -yarn.lock -node_modules -.gitignore \ No newline at end of file diff --git a/node/Dockerfile b/node/Dockerfile deleted file mode 100644 index 07a02a2..0000000 --- a/node/Dockerfile +++ /dev/null @@ -1,39 +0,0 @@ -ARG NODE_VERSION=20 -FROM --platform=linux/amd64 node:${NODE_VERSION}-alpine - -LABEL maintainer="support@apify.com" description="Base image for simple Apify Actors" - -# Globally disable the update-notifier. -RUN npm config --global set update-notifier false - -# Create app directory -RUN mkdir -p /usr/src/app -WORKDIR /usr/src/app - -# Copy source code -COPY package.json main.js /usr/src/app/ - -# Install default dependencies, print versions of everything -RUN npm --quiet set progress=false \ - && npm config --global set update-notifier false \ - && npm install --omit=dev --omit=optional --no-package-lock --prefer-online \ - && echo "Installed NPM packages:" \ - && (npm list --omit=dev --omit=optional || true) \ - && echo "Node.js version:" \ - && node --version \ - && echo "NPM version:" \ - && npm --version - -# Tell Node.js this is a production environemnt -ENV NODE_ENV=production - -# Enable Node.js process to use a lot of memory (Actor has limit of 32GB) -# Increases default size of headers. The original limit was 80kb, but from node 10+ they decided to lower it to 8kb. -# However they did not think about all the sites there with large headers, -# so we put back the old limit of 80kb, which seems to work just fine. -ENV NODE_OPTIONS="--max_old_space_size=30000 --max-http-header-size=80000" - -# NOTEs: -# - This needs to be compatible with CLI. -# - Using CMD instead of ENTRYPOINT, to allow manual overriding -CMD npm start --silent diff --git a/node/main.js b/node/main.js deleted file mode 100644 index bf32837..0000000 --- a/node/main.js +++ /dev/null @@ -1,22 +0,0 @@ -// This file will be replaced by the content of the Act2.sourceCode field, -// we keep this one here just for testing and clarification. - -console.log( - `If you're seeing this text, it means the actor started the default "main.js" file instead -of your own source code file. You have two options how to fix this: -1) Rename your source code file to "main.js" -2) Define custom "package.json" and/or "Dockerfile" that will run your code your way - -For more information, see https://docs.apify.com/actors/development/source-code#custom-dockerfile -`); -console.log('Testing Docker image...'); - -const { Actor } = require('apify'); -const { getMemoryInfo } = require('crawlee'); - -Actor.main(async () => { - // Test that "ps" command is available, sometimes it was missing in official Node builds - await getMemoryInfo(); - - console.log('... test PASSED'); -}); diff --git a/node/package.json b/node/package.json deleted file mode 100644 index 76ceb47..0000000 --- a/node/package.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "description": "Anonymous actor on the Apify platform", - "version": "0.0.1", - "license": "UNLICENSED", - "main": "main.js", - "scripts": { - "start": "node main.js" - }, - "dependencies": { - "apify": "APIFY_VERSION", - "crawlee": "CRAWLEE_VERSION", - "typescript": "^5.4.3" - }, - "repository": {} -} diff --git a/python-playwright/.dockerignore b/python-playwright/.dockerignore deleted file mode 100644 index 6c7b69a..0000000 --- a/python-playwright/.dockerignore +++ /dev/null @@ -1 +0,0 @@ -.gitignore diff --git a/python-playwright/Dockerfile b/python-playwright/Dockerfile deleted file mode 100644 index 0f98aee..0000000 --- a/python-playwright/Dockerfile +++ /dev/null @@ -1,64 +0,0 @@ -# Get the Python version provided as a build argument -ARG PYTHON_VERSION - -# Extend from the latest Debian and its slim version to keep the image as small as possible -FROM python:${PYTHON_VERSION}-slim-bookworm - -# Add labels to the image to identify it as an Apify Actor -LABEL maintainer="support@apify.com" \ - description="Base image for Apify Actors written in Python using Playwright" - -# Set the shell to use /bin/bash with specific options (see Hadolint DL4006) -SHELL ["/bin/bash", "-o", "pipefail", "-c"] - -# Get the Apify Python SDK version provided as a build argument -ARG APIFY_VERSION - -# Get the Playwright version provided as a build argument -ARG PLAYWRIGHT_VERSION - -# Don't store bytecode, the Python app will be only run once -ENV PYTHONDONTWRITEBYTECODE=1 - -# Don't buffer output and flush it straight away -ENV PYTHONUNBUFFERED=1 - -# Don't use a cache dir -ENV PIP_NO_CACHE_DIR=1 - -# Disable warnings about outdated pip -ENV PIP_DISABLE_PIP_VERSION_CHECK=1 - -# Disable warnings about running pip as root -ENV PIP_ROOT_USER_ACTION=ignore - -# Set up XVFB -# We should use the autodisplay detection as suggested here: https://github.com/microsoft/playwright/issues/2728#issuecomment-678083619 -ENV DISPLAY=:99 -ENV XVFB_WHD=1920x1080x24+32 - -# This instruction: -# - Upgrades pip to the latest version -# - Preinstalls the latest versions of setuptools and wheel to improve package installation speed -# - Installs the specified version of the Apify Python SDK and Playwright -RUN pip install --upgrade \ - pip \ - setuptools \ - wheel \ - apify~=${APIFY_VERSION} \ - playwright~=${PLAYWRIGHT_VERSION} - -# Install Playwright and its dependencies -RUN playwright install-deps && \ - playwright install - -# Create app directory -RUN mkdir -p /usr/src/app -WORKDIR /usr/src/app - -# Copy the dummy source code to the image -COPY . . - -# NOTE: This needs to be compatible with how Apify CLI launches Actors -ENTRYPOINT ["./start_xvfb_and_run_cmd.sh"] -CMD ["python3", "-m", "src"] diff --git a/python-playwright/src/__main__.py b/python-playwright/src/__main__.py deleted file mode 100644 index ab79144..0000000 --- a/python-playwright/src/__main__.py +++ /dev/null @@ -1,9 +0,0 @@ -import asyncio - -from .main import main - -try: - asyncio.run(main()) -except Exception: - print('Test failed!') - raise diff --git a/python-playwright/src/main.py b/python-playwright/src/main.py deleted file mode 100644 index 4c1cff9..0000000 --- a/python-playwright/src/main.py +++ /dev/null @@ -1,20 +0,0 @@ -from playwright.async_api import async_playwright - - -async def run_test(launcher, headless=True): - print(f'Testing {launcher.name} with {headless=}') - browser = await launcher.launch(headless=headless) - page = await browser.new_page() - await page.goto('http://example.com') - if 'Example Domain' != await page.title(): - raise Exception(f'Playwright failed to load! ({launcher.name}, {headless=})') - await browser.close() - - -async def main(): - async with async_playwright() as playwright: - print('Testing docker image by opening browsers...') - for launcher in [playwright.firefox, playwright.chromium, playwright.webkit]: - await run_test(launcher, headless=True) - await run_test(launcher, headless=False) - print('Testing finished successfully.') diff --git a/python-playwright/start_xvfb_and_run_cmd.sh b/python-playwright/start_xvfb_and_run_cmd.sh deleted file mode 100755 index e7f3bdd..0000000 --- a/python-playwright/start_xvfb_and_run_cmd.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash - -set -o errexit - -echo "Starting X virtual framebuffer using: Xvfb $DISPLAY -ac -screen 0 $XVFB_WHD -nolisten tcp" -Xvfb $DISPLAY -ac -screen 0 $XVFB_WHD -nolisten tcp & - -# Execute CMD (original CMD of this Dockerfile gets overridden in actor build) -echo "Executing main command" -exec "$@" diff --git a/python-selenium/.dockerignore b/python-selenium/.dockerignore deleted file mode 100644 index 6c7b69a..0000000 --- a/python-selenium/.dockerignore +++ /dev/null @@ -1 +0,0 @@ -.gitignore diff --git a/python-selenium/Dockerfile b/python-selenium/Dockerfile deleted file mode 100644 index 214c568..0000000 --- a/python-selenium/Dockerfile +++ /dev/null @@ -1,124 +0,0 @@ -# Get the Python version provided as a build argument -ARG PYTHON_VERSION - -# Extend from the latest Debian and its slim version to keep the image as small as possible -FROM python:${PYTHON_VERSION}-slim-bookworm - -# Add labels to the image to identify it as an Apify Actor -LABEL maintainer="support@apify.com" \ - description="Base image for Apify Actors written in Python using Selenium" - -# Set the shell to use /bin/bash with specific options (see Hadolint DL4006) -SHELL ["/bin/bash", "-o", "pipefail", "-c"] - -# Get the Apify Python SDK version provided as a build argument -ARG APIFY_VERSION - -# Get the Selenium version provided as a build argument -ARG SELENIUM_VERSION - -# Don't store bytecode, the Python app will be only run once -ENV PYTHONDONTWRITEBYTECODE=1 - -# Don't buffer output and flush it straight away -ENV PYTHONUNBUFFERED=1 - -# Don't use a cache dir -ENV PIP_NO_CACHE_DIR=1 - -# Disable warnings about outdated pip -ENV PIP_DISABLE_PIP_VERSION_CHECK=1 - -# Disable warnings about running pip as root -ENV PIP_ROOT_USER_ACTION=ignore - -# Set up XVFB -# We should use the autodisplay detection as suggested here: https://github.com/microsoft/playwright/issues/2728#issuecomment-678083619 -ENV DISPLAY=:99 -ENV XVFB_WHD=1920x1080x24+32 - -# Install curl, firefox, jq, unzip, xvfb and dependencies of Chrome and its driver -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - ca-certificates \ - curl \ - firefox-esr \ - fonts-liberation \ - jq \ - libappindicator3-1 \ - libasound2 \ - libatk-bridge2.0-0 \ - libgbm-dev \ - libglib2.0-0 \ - libgtk-3-0 \ - libnspr4 \ - libnss3 \ - libx11-6 \ - libx11-xcb1 \ - libxcomposite1 \ - libxcursor1 \ - libxdamage1 \ - libxext6 \ - libxfixes3 \ - libxi6 \ - libxkbcommon0 \ - libxrandr2 \ - libxrender1 \ - libxslt1.1 \ - libxss1 \ - libxt6 \ - libxtst6 \ - unzip \ - xdg-utils \ - xvfb && \ - apt-get autoremove -yqq --purge && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* /var/log/* - -# Download and install Geckodriver -RUN GECKO_DRIVER_URL="https://github.com/mozilla/geckodriver/releases/download/v0.33.0/geckodriver-v0.33.0-linux64.tar.gz" && \ - curl --silent --show-error --location --output /tmp/geckodriver.tar.gz "$GECKO_DRIVER_URL" && \ - tar --gzip --extract --file=/tmp/geckodriver.tar.gz --directory=/usr/local/bin && \ - rm -f /tmp/geckodriver.tar.gz - -# Download and install Google Chrome -RUN CHROME_URL="$( \ - curl --silent --show-error --location https://googlechromelabs.github.io/chrome-for-testing/last-known-good-versions-with-downloads.json | \ - jq -r '.channels.Stable.downloads.chrome[] | select(.platform=="linux64") | .url' \ - )" && \ - curl --silent --show-error --location --output /tmp/chrome-linux64.zip "$CHROME_URL" && \ - unzip /tmp/chrome-linux64.zip -d /opt/ && \ - ln -s /opt/chrome-linux64/chrome /usr/bin/google-chrome && \ - ln -s /opt/chrome-linux64/chrome /usr/bin/google-chrome-stable && \ - rm -f /tmp/chrome-linux64.zip - -# Download and install Google Chrome driver -RUN CHROME_DRIVER_URL="$( \ - curl --silent --show-error --location https://googlechromelabs.github.io/chrome-for-testing/last-known-good-versions-with-downloads.json | \ - jq -r '.channels.Stable.downloads.chromedriver[] | select(.platform=="linux64") | .url' \ - )" && \ - curl --silent --show-error --location --output /tmp/chromedriver-linux64.zip "$CHROME_DRIVER_URL" && \ - unzip /tmp/chromedriver-linux64.zip -d /usr/local/bin/ && \ - rm -f /tmp/chromedriver-linux64.zip - -# This instruction: -# - Upgrades pip to the latest version -# - Preinstalls the latest versions of setuptools and wheel to improve package installation speed -# - Installs the specified version of the Apify Python SDK and Selenium -RUN pip install --upgrade \ - pip \ - setuptools \ - wheel \ - apify~=${APIFY_VERSION} \ - selenium~=${SELENIUM_VERSION} - -# Create app directory -RUN mkdir -p /usr/src/app -WORKDIR /usr/src/app - -# Copy the dummy source code to the image -COPY . . - -# NOTE: This needs to be compatible with how Apify CLI launches Actors -ENTRYPOINT ["./start_xvfb_and_run_cmd.sh"] -CMD ["python3", "-m", "src"] diff --git a/python-selenium/src/__main__.py b/python-selenium/src/__main__.py deleted file mode 100644 index 24f0d9f..0000000 --- a/python-selenium/src/__main__.py +++ /dev/null @@ -1,7 +0,0 @@ -from .main import main - -try: - main() -except Exception: - print('Test failed!') - raise diff --git a/python-selenium/src/main.py b/python-selenium/src/main.py deleted file mode 100644 index 507f495..0000000 --- a/python-selenium/src/main.py +++ /dev/null @@ -1,25 +0,0 @@ -from selenium.webdriver.chrome.options import Options as ChromeOptions -from selenium.webdriver.firefox.options import Options as FirefoxOptions -from selenium import webdriver - -def main(): - print('Testing Docker image...') - - for (browser_name, driver_class, options_class) in [('Chrome', webdriver.Chrome, ChromeOptions), ('Firefox', webdriver.Firefox, FirefoxOptions)]: - for headless in [True, False]: - print(f'Testing {browser_name}, {headless=}...') - - options = options_class() - options.add_argument('--no-sandbox') - options.add_argument('--disable-dev-shm-usage') - if headless: - options.add_argument('--headless') - - driver = driver_class(options=options) - - driver.get('http://www.example.com') - assert driver.title == 'Example Domain' - - driver.quit() - - print('Tests succeeded!') diff --git a/python-selenium/start_xvfb_and_run_cmd.sh b/python-selenium/start_xvfb_and_run_cmd.sh deleted file mode 100755 index e7f3bdd..0000000 --- a/python-selenium/start_xvfb_and_run_cmd.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash - -set -o errexit - -echo "Starting X virtual framebuffer using: Xvfb $DISPLAY -ac -screen 0 $XVFB_WHD -nolisten tcp" -Xvfb $DISPLAY -ac -screen 0 $XVFB_WHD -nolisten tcp & - -# Execute CMD (original CMD of this Dockerfile gets overridden in actor build) -echo "Executing main command" -exec "$@" diff --git a/python/.dockerignore b/python/.dockerignore deleted file mode 100644 index 6c7b69a..0000000 --- a/python/.dockerignore +++ /dev/null @@ -1 +0,0 @@ -.gitignore diff --git a/python/Dockerfile b/python/Dockerfile deleted file mode 100644 index a56dbdc..0000000 --- a/python/Dockerfile +++ /dev/null @@ -1,50 +0,0 @@ -# Get the Python version provided as a build argument -ARG PYTHON_VERSION - -# Extend from the latest Debian and its slim version to keep the image as small as possible -FROM python:${PYTHON_VERSION}-slim-bookworm - -# Add labels to the image to identify it as an Apify Actor -LABEL maintainer="support@apify.com" \ - description="Base image for simple Apify Actors written in Python" - -# Set the shell to use /bin/bash with specific options (see Hadolint DL4006) -SHELL ["/bin/bash", "-o", "pipefail", "-c"] - -# Get the Apify Python SDK version provided as a build argument -ARG APIFY_VERSION - -# Don't store bytecode, the Python app will be only run once -ENV PYTHONDONTWRITEBYTECODE=1 - -# Don't buffer output and flush it straight away -ENV PYTHONUNBUFFERED=1 - -# Don't use a cache dir -ENV PIP_NO_CACHE_DIR=1 - -# Disable warnings about outdated pip -ENV PIP_DISABLE_PIP_VERSION_CHECK=1 - -# Disable warnings about running pip as root -ENV PIP_ROOT_USER_ACTION=ignore - -# This instruction: -# - Upgrades pip to the latest version -# - Preinstalls the latest versions of setuptools and wheel to improve package installation speed -# - Installs the specified version of the Apify Python SDK -RUN pip install --upgrade \ - pip \ - setuptools \ - wheel \ - apify~=${APIFY_VERSION} - -# Create app directory -RUN mkdir -p /usr/src/app -WORKDIR /usr/src/app - -# Copy the dummy source code to the image -COPY . . - -# Set default startup command, using CMD instead of ENTRYPOINT, to allow manual overriding -CMD ["python3", "-m", "src"] diff --git a/python/src/__main__.py b/python/src/__main__.py deleted file mode 100644 index 159e0d5..0000000 --- a/python/src/__main__.py +++ /dev/null @@ -1,21 +0,0 @@ -import asyncio -import logging - -from apify.log import ActorLogFormatter - -from .main import main - -print('Testing Docker image...') - -handler = logging.StreamHandler() -handler.setFormatter(ActorLogFormatter()) - -apify_client_logger = logging.getLogger('apify_client') -apify_client_logger.setLevel(logging.INFO) -apify_client_logger.addHandler(handler) - -apify_logger = logging.getLogger('apify') -apify_logger.setLevel(logging.DEBUG) -apify_logger.addHandler(handler) - -asyncio.run(main()) diff --git a/python/src/main.py b/python/src/main.py deleted file mode 100644 index 4a4c2b2..0000000 --- a/python/src/main.py +++ /dev/null @@ -1,19 +0,0 @@ -# This file will be replaced by the actual actor source code, -# we keep this one here just for testing and clarification. - -from apify import Actor - - -async def main(): - async with Actor: - print('Testing Docker image...') - try: - assert Actor.config.is_at_home is False - - apify_user = await Actor.apify_client.user('apify').get() - assert apify_user is not None - assert apify_user.get('username') == 'apify' - print('Test successful') - except: - print('Test failed') - raise