Unit Tests with 1 shards and 0 replicas #3534
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: unit_test_200gb_CI | |
run-name: Unit Tests with ${{ inputs.number_of_shards || 1 }} shards and ${{ inputs.number_of_replicas || 0 }} replicas | |
# runs unit tests on AMD64 machine | |
on: | |
workflow_call: | |
inputs: | |
number_of_shards: | |
type: number | |
description: 'Number of shards (content nodes per group in Vespa). Minimum of 1.' | |
required: true | |
default: 1 | |
number_of_replicas: | |
type: number | |
description: 'Number of replicas (groups in Vespa minus 1). Minimum of 0.' | |
required: true | |
default: 0 | |
workflow_dispatch: | |
inputs: | |
number_of_shards: | |
type: number | |
description: 'Number of shards (content nodes per group in Vespa). Minimum of 1.' | |
required: true | |
default: 1 | |
number_of_replicas: | |
type: number | |
description: 'Number of replicas (groups in Vespa - 1)' | |
required: true | |
default: 0 | |
push: | |
branches: | |
- mainline | |
- releases/* | |
paths-ignore: | |
- '**.md' | |
pull_request: | |
branches: | |
- mainline | |
- releases/* | |
concurrency: | |
group: unit-tests-${{ github.ref }}-${{ inputs.number_of_shards }}-${{ inputs.number_of_replicas }} | |
cancel-in-progress: true | |
permissions: | |
contents: read | |
jobs: | |
Check-Changes: | |
runs-on: ubuntu-latest | |
outputs: | |
doc_only: ${{ steps.set-output.outputs.doc_only }} | |
steps: | |
- name: Checkout marqo repo | |
uses: actions/checkout@v3 | |
with: | |
fetch-depth: 0 | |
path: marqo | |
- name: Check for Documentation-Only Changes | |
id: set-output | |
run: | | |
cd marqo | |
set -x | |
# Determine BASE_COMMIT and HEAD_COMMIT based on the event type | |
if [[ "${GITHUB_EVENT_NAME}" == "pull_request" || "${GITHUB_EVENT_NAME}" == "pull_request_review" ]]; then | |
BASE_COMMIT=${{ github.event.pull_request.base.sha }} | |
HEAD_COMMIT=${{ github.event.pull_request.head.sha }} | |
elif [[ "${GITHUB_EVENT_NAME}" == "push" ]]; then | |
BASE_COMMIT=${{ github.event.before }} | |
HEAD_COMMIT=${{ github.sha }} | |
elif [[ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" ]]; then | |
# For manual dispatch, explicitly set doc_only | |
echo "doc_only=false" >> $GITHUB_ENV | |
echo "doc_only=false" >> $GITHUB_OUTPUT | |
exit 0 | |
else | |
echo "Unsupported event: ${GITHUB_EVENT_NAME}" | |
exit 1 | |
fi | |
# Debug: Print base and head commits | |
echo "Base commit: $BASE_COMMIT" | |
echo "Head commit: $HEAD_COMMIT" | |
# Perform the diff to check for non-documentation changes | |
if git diff --name-only $BASE_COMMIT $HEAD_COMMIT -- | grep -vE '\.(md)$'; then | |
echo "doc_only=false" >> $GITHUB_ENV | |
echo "doc_only=false" >> $GITHUB_OUTPUT | |
else | |
echo "doc_only=true" >> $GITHUB_ENV | |
echo "doc_only=true" >> $GITHUB_OUTPUT | |
fi | |
Determine-Vespa-Setup: | |
needs: | |
- Check-Changes | |
runs-on: ubuntu-latest | |
if: ${{ needs.Check-Changes.outputs.doc_only == 'false' }} # Run only if there are non-documentation changes | |
outputs: | |
VESPA_MULTINODE_SETUP: ${{ steps.set_var.outputs.VESPA_MULTINODE_SETUP }} | |
MULTINODE_TEST_ARGS: ${{ steps.set_var.outputs.MULTINODE_TEST_ARGS }} | |
steps: | |
- name: Determine VESPA_MULTINODE_SETUP | |
id: set_var | |
run: | | |
# For single node, initialize as false | |
echo "VESPA_MULTINODE_SETUP=false" >> $GITHUB_OUTPUT | |
# Only enforce coverage check if single node | |
echo "MULTINODE_TEST_ARGS=--cov-fail-under=69" >> $GITHUB_OUTPUT | |
echo "First assuming single node Vespa setup." | |
# Extract inputs safely, defaulting to 1 (for shards), 0 (for replicas) if not present | |
NUMBER_OF_SHARDS="${{ inputs.number_of_shards || 1 }}" | |
NUMBER_OF_REPLICAS="${{ inputs.number_of_replicas || 0 }}" | |
# Convert inputs to integers | |
NUMBER_OF_SHARDS_INT=$(echo "$NUMBER_OF_SHARDS" | awk '{print int($0)}') | |
NUMBER_OF_REPLICAS_INT=$(echo "$NUMBER_OF_REPLICAS" | awk '{print int($0)}') | |
# Evaluate the conditions | |
if [[ "$NUMBER_OF_SHARDS_INT" -gt 1 || "$NUMBER_OF_REPLICAS_INT" -gt 0 ]]; then | |
echo "Now using multi-node Vespa setup. Shards are $NUMBER_OF_SHARDS_INT and replicas are $NUMBER_OF_REPLICAS_INT." | |
echo "VESPA_MULTINODE_SETUP=true" >> $GITHUB_OUTPUT | |
# If multinode vespa, ignore unrelated tests to save time and prevent errors | |
echo "MULTINODE_TEST_ARGS=--multinode --ignore=tests/integ_tests/core/index_management/test_index_management.py --ignore=tests/integ_tests/core/inference --ignore=tests/integ_tests/processing --ignore=tests/integ_tests/s2_inference" >> $GITHUB_OUTPUT | |
fi | |
Start-Runner: | |
needs: | |
- Determine-Vespa-Setup | |
- Check-Changes | |
name: Start self-hosted EC2 runner | |
runs-on: ubuntu-latest | |
if: ${{ needs.Check-Changes.outputs.doc_only == 'false' }} # Run only if there are non-documentation changes | |
outputs: | |
label: ${{ steps.start-ec2-runner.outputs.label }} | |
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} | |
steps: | |
- name: Configure AWS credentials | |
uses: aws-actions/configure-aws-credentials@v1 | |
with: | |
aws-access-key-id: ${{ secrets.MARQO_WORKFLOW_TESTS_ACCESS_KEY_ID }} | |
aws-secret-access-key: ${{ secrets.MARQO_WORKFLOW_TESTS_SECRET_ACCESS_KEY }} | |
aws-region: us-east-1 | |
- name: Start EC2 runner | |
id: start-ec2-runner | |
uses: machulav/ec2-github-runner@v2 | |
with: | |
mode: start | |
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} | |
ec2-image-id: ${{ vars.MARQO_CPU_AMD64_TESTS_INSTANCE_AMI }} | |
# m6i.xlarge if single node vespa, but m6i.2xlarge if multinode vespa | |
ec2-instance-type: ${{ needs.Determine-Vespa-Setup.outputs.VESPA_MULTINODE_SETUP == 'true' && 'm6i.2xlarge' || 'm6i.xlarge' }} | |
subnet-id: ${{ secrets.MARQO_WORKFLOW_TESTS_SUBNET_ID }} | |
security-group-id: ${{ secrets.MARQO_WORKFLOW_TESTS_SECURITY_GROUP_ID }} | |
aws-resource-tags: > # optional, requires additional permissions | |
[ | |
{"Key": "Name", "Value": "marqo-github-runner-${{ github.run_id }}"}, | |
{"Key": "GitHubRepo", "Value": "${{ github.repository }}"}, | |
{"Key": "WorkflowName", "Value": "${{ github.workflow }}"}, | |
{"Key": "WorkflowRunId", "Value": "${{ github.run_id }}"}, | |
{"Key": "WorlflowURL", "Value": "${{ github.event.repository.html_url }}/actions/runs/${{ github.run_id }}"}, | |
{"Key": "PoloRole", "Value": "testing"} | |
] | |
Test-Marqo: | |
name: Run Unit Tests | |
needs: | |
- Check-Changes # required to start the main job when the runner is ready | |
- Start-Runner # required to get output from the start-runner job | |
- Determine-Vespa-Setup | |
if: ${{ needs.Check-Changes.outputs.doc_only == 'false' }} # Run only if there are non-documentation changes | |
runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner | |
environment: marqo-test-suite | |
env: | |
VESPA_MULTINODE_SETUP: ${{ needs.Determine-Vespa-Setup.outputs.VESPA_MULTINODE_SETUP }} | |
MULTINODE_TEST_ARGS: ${{ needs.Determine-Vespa-Setup.outputs.MULTINODE_TEST_ARGS }} | |
steps: | |
- name: Checkout marqo repo | |
uses: actions/checkout@v3 | |
with: | |
path: marqo | |
- name: Set up Python 3.9 | |
uses: actions/setup-python@v3 | |
with: | |
python-version: "3.9" | |
cache: "pip" | |
- name: Checkout marqo-base for requirements | |
uses: actions/checkout@v3 | |
with: | |
repository: marqo-ai/marqo-base | |
path: marqo-base | |
- name: Install dependencies | |
run: | | |
pip install -r marqo-base/requirements/amd64-gpu-requirements.txt | |
# override base requirements with marqo requirements, if needed: | |
pip install -r marqo/requirements.dev.txt | |
- name: Download nltk data | |
run: | | |
python -m nltk.downloader punkt_tab | |
- name: Build Vespa | |
run: | | |
systemctl stop unattended-upgrades | |
apt-get remove -y unattended-upgrades | |
# Function to wait for the dpkg lock to be released | |
function wait_for_dpkg_lock() { | |
while sudo fuser /var/lib/dpkg/lock-frontend >/dev/null 2>&1; do | |
echo "Waiting for the dpkg lock to be released..." | |
sleep 5 | |
done | |
} | |
# Wait for the dpkg lock before updating and installing | |
wait_for_dpkg_lock | |
echo "Updating package list" | |
apt-get update -y | |
# Build Vespa components | |
echo "Installing jdk 17" | |
sudo apt-get install openjdk-17-jdk -y | |
echo "Installing maven" | |
sudo apt-get install maven -y | |
echo "Building Vespa components" | |
cd marqo/vespa | |
mvn clean package | |
- name: Start Vespa | |
run: python marqo/scripts/vespa_local/vespa_local.py full-start --Shards ${{ inputs.number_of_shards || 1 }} --Replicas ${{ inputs.number_of_replicas || 0 }} | |
- name: Run Unit Tests | |
run: | | |
# Define these for use by marqo | |
export VESPA_CONFIG_URL=http://localhost:19071 | |
export VESPA_DOCUMENT_URL=http://localhost:8080 | |
export VESPA_QUERY_URL=http://localhost:8080 | |
export PRIVATE_MODEL_TESTS_AWS_ACCESS_KEY_ID=${{ secrets.PRIVATE_MODEL_TESTS_AWS_ACCESS_KEY_ID }} | |
export PRIVATE_MODEL_TESTS_AWS_SECRET_ACCESS_KEY=${{ secrets.PRIVATE_MODEL_TESTS_AWS_SECRET_ACCESS_KEY }} | |
export PRIVATE_MODEL_TESTS_HF_TOKEN=${{ secrets.PRIVATE_MODEL_TESTS_HF_TOKEN }} | |
cd marqo | |
export PYTHONPATH="./src:." | |
set -o pipefail | |
pytest ${{ env.MULTINODE_TEST_ARGS }} --ignore=tests/integ_tests/test_documentation.py \ | |
--durations=100 --cov=src --cov-branch --cov-context=test \ | |
--cov-report=html:cov_html --cov-report=xml:cov.xml --cov-report term:skip-covered \ | |
--md-report --md-report-flavor gfm --md-report-output pytest_result_summary.md \ | |
tests/integ_tests/ | tee pytest_output.txt | |
- name: Check Test Coverage of New Code | |
id: check_test_coverage | |
continue-on-error: true | |
run: | | |
if [[ "${GITHUB_EVENT_NAME}" == "pull_request" ]]; then | |
export BASE_BRANCH="${{ github.event.pull_request.base.ref }}" | |
cd marqo | |
echo "Running diff-cover against branch $BASE_BRANCH" | |
git fetch origin $BASE_BRANCH:$BASE_BRANCH | |
diff-cover cov.xml --html-report diff_cov.html --markdown-report diff_cov.md \ | |
--compare-branch $BASE_BRANCH --fail-under=95 | |
else | |
echo "Skipping diff-cover on Push events" | |
echo "Skipped diff-cover on Push events" > marqo/diff_cov.md | |
touch marqo/diff_cov.html | |
fi | |
- name: Upload Test Report | |
uses: actions/upload-artifact@v4 | |
with: | |
name: marqo-test-report | |
path: marqo/cov_html/ | |
Stop-Runner: | |
name: Stop self-hosted EC2 runner | |
needs: | |
- Check-Changes # required to start the main job when the runner is ready | |
- Start-Runner # required to get output from the start-runner job | |
- Test-Marqo # required to wait when the main job is done | |
runs-on: ubuntu-latest | |
if: | | |
always() && needs.Start-Runner.result == 'success' | |
steps: | |
- name: Configure AWS credentials | |
uses: aws-actions/configure-aws-credentials@v1 | |
with: | |
aws-access-key-id: ${{ secrets.MARQO_WORKFLOW_TESTS_ACCESS_KEY_ID }} | |
aws-secret-access-key: ${{ secrets.MARQO_WORKFLOW_TESTS_SECRET_ACCESS_KEY }} | |
aws-region: us-east-1 | |
- name: Stop EC2 runner | |
uses: machulav/ec2-github-runner@v2 | |
with: | |
mode: stop | |
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} | |
label: ${{ needs.start-runner.outputs.label }} | |
ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }} |