Skip to content

Unit Tests with 1 shards and 0 replicas #3534

Unit Tests with 1 shards and 0 replicas

Unit Tests with 1 shards and 0 replicas #3534

name: unit_test_200gb_CI
run-name: Unit Tests with ${{ inputs.number_of_shards || 1 }} shards and ${{ inputs.number_of_replicas || 0 }} replicas
# runs unit tests on AMD64 machine
on:
workflow_call:
inputs:
number_of_shards:
type: number
description: 'Number of shards (content nodes per group in Vespa). Minimum of 1.'
required: true
default: 1
number_of_replicas:
type: number
description: 'Number of replicas (groups in Vespa minus 1). Minimum of 0.'
required: true
default: 0
workflow_dispatch:
inputs:
number_of_shards:
type: number
description: 'Number of shards (content nodes per group in Vespa). Minimum of 1.'
required: true
default: 1
number_of_replicas:
type: number
description: 'Number of replicas (groups in Vespa - 1)'
required: true
default: 0
push:
branches:
- mainline
- releases/*
paths-ignore:
- '**.md'
pull_request:
branches:
- mainline
- releases/*
concurrency:
group: unit-tests-${{ github.ref }}-${{ inputs.number_of_shards }}-${{ inputs.number_of_replicas }}
cancel-in-progress: true
permissions:
contents: read
jobs:
Check-Changes:
runs-on: ubuntu-latest
outputs:
doc_only: ${{ steps.set-output.outputs.doc_only }}
steps:
- name: Checkout marqo repo
uses: actions/checkout@v3
with:
fetch-depth: 0
path: marqo
- name: Check for Documentation-Only Changes
id: set-output
run: |
cd marqo
set -x
# Determine BASE_COMMIT and HEAD_COMMIT based on the event type
if [[ "${GITHUB_EVENT_NAME}" == "pull_request" || "${GITHUB_EVENT_NAME}" == "pull_request_review" ]]; then
BASE_COMMIT=${{ github.event.pull_request.base.sha }}
HEAD_COMMIT=${{ github.event.pull_request.head.sha }}
elif [[ "${GITHUB_EVENT_NAME}" == "push" ]]; then
BASE_COMMIT=${{ github.event.before }}
HEAD_COMMIT=${{ github.sha }}
elif [[ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" ]]; then
# For manual dispatch, explicitly set doc_only
echo "doc_only=false" >> $GITHUB_ENV
echo "doc_only=false" >> $GITHUB_OUTPUT
exit 0
else
echo "Unsupported event: ${GITHUB_EVENT_NAME}"
exit 1
fi
# Debug: Print base and head commits
echo "Base commit: $BASE_COMMIT"
echo "Head commit: $HEAD_COMMIT"
# Perform the diff to check for non-documentation changes
if git diff --name-only $BASE_COMMIT $HEAD_COMMIT -- | grep -vE '\.(md)$'; then
echo "doc_only=false" >> $GITHUB_ENV
echo "doc_only=false" >> $GITHUB_OUTPUT
else
echo "doc_only=true" >> $GITHUB_ENV
echo "doc_only=true" >> $GITHUB_OUTPUT
fi
Determine-Vespa-Setup:
needs:
- Check-Changes
runs-on: ubuntu-latest
if: ${{ needs.Check-Changes.outputs.doc_only == 'false' }} # Run only if there are non-documentation changes
outputs:
VESPA_MULTINODE_SETUP: ${{ steps.set_var.outputs.VESPA_MULTINODE_SETUP }}
MULTINODE_TEST_ARGS: ${{ steps.set_var.outputs.MULTINODE_TEST_ARGS }}
steps:
- name: Determine VESPA_MULTINODE_SETUP
id: set_var
run: |
# For single node, initialize as false
echo "VESPA_MULTINODE_SETUP=false" >> $GITHUB_OUTPUT
# Only enforce coverage check if single node
echo "MULTINODE_TEST_ARGS=--cov-fail-under=69" >> $GITHUB_OUTPUT
echo "First assuming single node Vespa setup."
# Extract inputs safely, defaulting to 1 (for shards), 0 (for replicas) if not present
NUMBER_OF_SHARDS="${{ inputs.number_of_shards || 1 }}"
NUMBER_OF_REPLICAS="${{ inputs.number_of_replicas || 0 }}"
# Convert inputs to integers
NUMBER_OF_SHARDS_INT=$(echo "$NUMBER_OF_SHARDS" | awk '{print int($0)}')
NUMBER_OF_REPLICAS_INT=$(echo "$NUMBER_OF_REPLICAS" | awk '{print int($0)}')
# Evaluate the conditions
if [[ "$NUMBER_OF_SHARDS_INT" -gt 1 || "$NUMBER_OF_REPLICAS_INT" -gt 0 ]]; then
echo "Now using multi-node Vespa setup. Shards are $NUMBER_OF_SHARDS_INT and replicas are $NUMBER_OF_REPLICAS_INT."
echo "VESPA_MULTINODE_SETUP=true" >> $GITHUB_OUTPUT
# If multinode vespa, ignore unrelated tests to save time and prevent errors
echo "MULTINODE_TEST_ARGS=--multinode --ignore=tests/integ_tests/core/index_management/test_index_management.py --ignore=tests/integ_tests/core/inference --ignore=tests/integ_tests/processing --ignore=tests/integ_tests/s2_inference" >> $GITHUB_OUTPUT
fi
Start-Runner:
needs:
- Determine-Vespa-Setup
- Check-Changes
name: Start self-hosted EC2 runner
runs-on: ubuntu-latest
if: ${{ needs.Check-Changes.outputs.doc_only == 'false' }} # Run only if there are non-documentation changes
outputs:
label: ${{ steps.start-ec2-runner.outputs.label }}
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v1
with:
aws-access-key-id: ${{ secrets.MARQO_WORKFLOW_TESTS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.MARQO_WORKFLOW_TESTS_SECRET_ACCESS_KEY }}
aws-region: us-east-1
- name: Start EC2 runner
id: start-ec2-runner
uses: machulav/ec2-github-runner@v2
with:
mode: start
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
ec2-image-id: ${{ vars.MARQO_CPU_AMD64_TESTS_INSTANCE_AMI }}
# m6i.xlarge if single node vespa, but m6i.2xlarge if multinode vespa
ec2-instance-type: ${{ needs.Determine-Vespa-Setup.outputs.VESPA_MULTINODE_SETUP == 'true' && 'm6i.2xlarge' || 'm6i.xlarge' }}
subnet-id: ${{ secrets.MARQO_WORKFLOW_TESTS_SUBNET_ID }}
security-group-id: ${{ secrets.MARQO_WORKFLOW_TESTS_SECURITY_GROUP_ID }}
aws-resource-tags: > # optional, requires additional permissions
[
{"Key": "Name", "Value": "marqo-github-runner-${{ github.run_id }}"},
{"Key": "GitHubRepo", "Value": "${{ github.repository }}"},
{"Key": "WorkflowName", "Value": "${{ github.workflow }}"},
{"Key": "WorkflowRunId", "Value": "${{ github.run_id }}"},
{"Key": "WorlflowURL", "Value": "${{ github.event.repository.html_url }}/actions/runs/${{ github.run_id }}"},
{"Key": "PoloRole", "Value": "testing"}
]
Test-Marqo:
name: Run Unit Tests
needs:
- Check-Changes # required to start the main job when the runner is ready
- Start-Runner # required to get output from the start-runner job
- Determine-Vespa-Setup
if: ${{ needs.Check-Changes.outputs.doc_only == 'false' }} # Run only if there are non-documentation changes
runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
environment: marqo-test-suite
env:
VESPA_MULTINODE_SETUP: ${{ needs.Determine-Vespa-Setup.outputs.VESPA_MULTINODE_SETUP }}
MULTINODE_TEST_ARGS: ${{ needs.Determine-Vespa-Setup.outputs.MULTINODE_TEST_ARGS }}
steps:
- name: Checkout marqo repo
uses: actions/checkout@v3
with:
path: marqo
- name: Set up Python 3.9
uses: actions/setup-python@v3
with:
python-version: "3.9"
cache: "pip"
- name: Checkout marqo-base for requirements
uses: actions/checkout@v3
with:
repository: marqo-ai/marqo-base
path: marqo-base
- name: Install dependencies
run: |
pip install -r marqo-base/requirements/amd64-gpu-requirements.txt
# override base requirements with marqo requirements, if needed:
pip install -r marqo/requirements.dev.txt
- name: Download nltk data
run: |
python -m nltk.downloader punkt_tab
- name: Build Vespa
run: |
systemctl stop unattended-upgrades
apt-get remove -y unattended-upgrades
# Function to wait for the dpkg lock to be released
function wait_for_dpkg_lock() {
while sudo fuser /var/lib/dpkg/lock-frontend >/dev/null 2>&1; do
echo "Waiting for the dpkg lock to be released..."
sleep 5
done
}
# Wait for the dpkg lock before updating and installing
wait_for_dpkg_lock
echo "Updating package list"
apt-get update -y
# Build Vespa components
echo "Installing jdk 17"
sudo apt-get install openjdk-17-jdk -y
echo "Installing maven"
sudo apt-get install maven -y
echo "Building Vespa components"
cd marqo/vespa
mvn clean package
- name: Start Vespa
run: python marqo/scripts/vespa_local/vespa_local.py full-start --Shards ${{ inputs.number_of_shards || 1 }} --Replicas ${{ inputs.number_of_replicas || 0 }}
- name: Run Unit Tests
run: |
# Define these for use by marqo
export VESPA_CONFIG_URL=http://localhost:19071
export VESPA_DOCUMENT_URL=http://localhost:8080
export VESPA_QUERY_URL=http://localhost:8080
export PRIVATE_MODEL_TESTS_AWS_ACCESS_KEY_ID=${{ secrets.PRIVATE_MODEL_TESTS_AWS_ACCESS_KEY_ID }}
export PRIVATE_MODEL_TESTS_AWS_SECRET_ACCESS_KEY=${{ secrets.PRIVATE_MODEL_TESTS_AWS_SECRET_ACCESS_KEY }}
export PRIVATE_MODEL_TESTS_HF_TOKEN=${{ secrets.PRIVATE_MODEL_TESTS_HF_TOKEN }}
cd marqo
export PYTHONPATH="./src:."
set -o pipefail
pytest ${{ env.MULTINODE_TEST_ARGS }} --ignore=tests/integ_tests/test_documentation.py \
--durations=100 --cov=src --cov-branch --cov-context=test \
--cov-report=html:cov_html --cov-report=xml:cov.xml --cov-report term:skip-covered \
--md-report --md-report-flavor gfm --md-report-output pytest_result_summary.md \
tests/integ_tests/ | tee pytest_output.txt
- name: Check Test Coverage of New Code
id: check_test_coverage
continue-on-error: true
run: |
if [[ "${GITHUB_EVENT_NAME}" == "pull_request" ]]; then
export BASE_BRANCH="${{ github.event.pull_request.base.ref }}"
cd marqo
echo "Running diff-cover against branch $BASE_BRANCH"
git fetch origin $BASE_BRANCH:$BASE_BRANCH
diff-cover cov.xml --html-report diff_cov.html --markdown-report diff_cov.md \
--compare-branch $BASE_BRANCH --fail-under=95
else
echo "Skipping diff-cover on Push events"
echo "Skipped diff-cover on Push events" > marqo/diff_cov.md
touch marqo/diff_cov.html
fi
- name: Upload Test Report
uses: actions/upload-artifact@v4
with:
name: marqo-test-report
path: marqo/cov_html/
Stop-Runner:
name: Stop self-hosted EC2 runner
needs:
- Check-Changes # required to start the main job when the runner is ready
- Start-Runner # required to get output from the start-runner job
- Test-Marqo # required to wait when the main job is done
runs-on: ubuntu-latest
if: |
always() && needs.Start-Runner.result == 'success'
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v1
with:
aws-access-key-id: ${{ secrets.MARQO_WORKFLOW_TESTS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.MARQO_WORKFLOW_TESTS_SECRET_ACCESS_KEY }}
aws-region: us-east-1
- name: Stop EC2 runner
uses: machulav/ec2-github-runner@v2
with:
mode: stop
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
label: ${{ needs.start-runner.outputs.label }}
ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }}