Unit Tests with 2 shards and 1 replicas #3297

Workflow file for this run

.github/workflows/unit_test_200gb_CI.yml at 05384c0

	name: unit_test_200gb_CI
	run-name: Unit Tests with ${{ inputs.number_of_shards \|\| 1 }} shards and ${{ inputs.number_of_replicas \|\| 0 }} replicas
	# runs unit tests on AMD64 machine

	on:
	workflow_call:
	inputs:
	number_of_shards:
	type: number
	description: 'Number of shards (content nodes per group in Vespa). Minimum of 1.'
	required: true
	default: 1

	number_of_replicas:
	type: number
	description: 'Number of replicas (groups in Vespa minus 1). Minimum of 0.'
	required: true
	default: 0

	workflow_dispatch:
	inputs:
	number_of_shards:
	type: number
	description: 'Number of shards (content nodes per group in Vespa). Minimum of 1.'
	required: true
	default: 1

	number_of_replicas:
	type: number
	description: 'Number of replicas (groups in Vespa - 1)'
	required: true
	default: 0

	push:
	branches:
	- mainline
	- releases/*
	pull_request:
	branches:
	- mainline
	- releases/*

	concurrency:
	group: unit-tests-${{ github.ref }}-${{ inputs.number_of_shards }}-${{ inputs.number_of_replicas }}
	cancel-in-progress: true

	permissions:
	contents: read

	jobs:
	Determine-Vespa-Setup:
	runs-on: ubuntu-latest
	outputs:
	VESPA_MULTINODE_SETUP: ${{ steps.set_var.outputs.VESPA_MULTINODE_SETUP }}
	MULTINODE_TEST_ARGS: ${{ steps.set_var.outputs.MULTINODE_TEST_ARGS }}
	steps:
	- name: Determine VESPA_MULTINODE_SETUP
	id: set_var
	run: \|
	# Initialize as false
	echo "VESPA_MULTINODE_SETUP=false" >> $GITHUB_OUTPUT
	echo "MULTINODE_TEST_ARGS=" >> $GITHUB_OUTPUT

	# Check if the event is workflow_call or workflow_dispatch
	if [[ "${{ github.event_name }}" == "workflow_call" \|\| "${{ github.event_name }}" == "workflow_dispatch" ]]; then
	# Extract inputs safely, defaulting to 1 (for shards), 0 (for replicas) if not present
	NUMBER_OF_SHARDS="${{ inputs.number_of_shards \|\| 1 }}"
	NUMBER_OF_REPLICAS="${{ inputs.number_of_replicas \|\| 0 }}"

	# Convert inputs to integers
	NUMBER_OF_SHARDS_INT=$(echo "$NUMBER_OF_SHARDS" \| awk '{print int($0)}')
	NUMBER_OF_REPLICAS_INT=$(echo "$NUMBER_OF_REPLICAS" \| awk '{print int($0)}')

	# Evaluate the conditions
	if [[ "$NUMBER_OF_SHARDS_INT" -gt 1 \|\| "$NUMBER_OF_REPLICAS_INT" -gt 0 ]]; then
	echo "Using multi-node Vespa setup. Shards are $NUMBER_OF_SHARDS_INT and replicas are $NUMBER_OF_REPLICAS_INT."
	echo "VESPA_MULTINODE_SETUP=true" >> $GITHUB_OUTPUT
	# If multinode vespa, ignore unrelated tests to save time
	echo "MULTINODE_TEST_ARGS=--multinode --ignore=tests/core/index_management/test_index_management.py --ignore=tests/core/inference --ignore=tests/processing --ignore=tests/s2_inference" >> $GITHUB_OUTPUT
	fi
	fi

	Start-Runner:
	needs:
	- Determine-Vespa-Setup
	name: Start self-hosted EC2 runner
	runs-on: ubuntu-latest
	outputs:
	label: ${{ steps.start-ec2-runner.outputs.label }}
	ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
	steps:
	- name: Configure AWS credentials
	uses: aws-actions/configure-aws-credentials@v1
	with:
	aws-access-key-id: ${{ secrets.MARQO_WORKFLOW_TESTS_ACCESS_KEY_ID }}
	aws-secret-access-key: ${{ secrets.MARQO_WORKFLOW_TESTS_SECRET_ACCESS_KEY }}
	aws-region: us-east-1
	- name: Start EC2 runner
	id: start-ec2-runner
	uses: machulav/ec2-github-runner@v2
	with:
	mode: start
	github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
	ec2-image-id: ${{ vars.MARQO_CPU_AMD64_TESTS_INSTANCE_AMI }}
	# m6i.xlarge if single node vespa, but m6i.2xlarge if multinode vespa
	ec2-instance-type: ${{ needs.Determine-Vespa-Setup.outputs.VESPA_MULTINODE_SETUP == 'true' && 'm6i.2xlarge' \|\| 'm6i.xlarge' }}
	subnet-id: ${{ secrets.MARQO_WORKFLOW_TESTS_SUBNET_ID }}
	security-group-id: ${{ secrets.MARQO_WORKFLOW_TESTS_SECURITY_GROUP_ID }}
	aws-resource-tags: > # optional, requires additional permissions
	[
	{"Key": "Name", "Value": "marqo-github-runner-${{ github.run_id }}"},
	{"Key": "GitHubRepo", "Value": "${{ github.repository }}"},
	{"Key": "WorkflowName", "Value": "${{ github.workflow }}"},
	{"Key": "WorkflowRunId", "Value": "${{ github.run_id }}"},
	{"Key": "WorlflowURL", "Value": "${{ github.event.repository.html_url }}/actions/runs/${{ github.run_id }}"},
	{"Key": "PoloRole", "Value": "testing"}
	]

	Test-Marqo:
	name: Run Unit Tests
	needs:
	- Start-Runner
	- Determine-Vespa-Setup
	runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
	environment: marqo-test-suite
	env:
	VESPA_MULTINODE_SETUP: ${{ needs.Determine-Vespa-Setup.outputs.VESPA_MULTINODE_SETUP }}
	MULTINODE_TEST_ARGS: ${{ needs.Determine-Vespa-Setup.outputs.MULTINODE_TEST_ARGS }}
	steps:
	- name: Checkout marqo repo
	uses: actions/checkout@v3
	with:
	fetch-depth: 0
	path: marqo

	- name: Set up Python 3.9
	uses: actions/setup-python@v3
	with:
	python-version: "3.9"
	cache: "pip"

	- name: Checkout marqo-base for requirements
	uses: actions/checkout@v3
	with:
	repository: marqo-ai/marqo-base
	path: marqo-base

	- name: Install dependencies
	run: \|
	pip install -r marqo-base/requirements/amd64-gpu-requirements.txt
	# override base requirements with marqo requirements, if needed:
	pip install -r marqo/requirements.dev.txt

	- name: Download nltk data
	run: \|
	python -m nltk.downloader punkt_tab

	- name: Build Vespa
	run: \|
	systemctl stop unattended-upgrades
	apt-get remove -y unattended-upgrades

	# Function to wait for the dpkg lock to be released
	function wait_for_dpkg_lock() {
	while sudo fuser /var/lib/dpkg/lock-frontend >/dev/null 2>&1; do
	echo "Waiting for the dpkg lock to be released..."
	sleep 5
	done
	}

	# Wait for the dpkg lock before updating and installing
	wait_for_dpkg_lock

	echo "Updating package list"
	apt-get update -y

	# Build Vespa components
	echo "Installing jdk 17"
	sudo apt-get install openjdk-17-jdk -y
	echo "Installing maven"
	sudo apt-get install maven -y
	echo "Building Vespa components"
	cd marqo/vespa
	mvn clean package

	- name: Start Vespa
	run: \|
	# Define these for checking if Vespa is ready
	export VESPA_CONFIG_URL=http://localhost:19071
	export VESPA_DOCUMENT_URL=http://localhost:8080
	export VESPA_QUERY_URL=http://localhost:8080

	cd marqo/scripts/vespa_local
	set -x
	START_SCRIPT_OUTPUT=$(python vespa_local.py start --Shards ${{ inputs.number_of_shards \|\| 1 }} --Replicas ${{ inputs.number_of_replicas \|\| 0 }})
	set +x

	echo "Waiting for Vespa to start"
	for i in {1..20}; do
	echo -ne "Waiting... $i seconds\r"
	sleep 1
	done
	echo -e "\nDone waiting."

	# Zip up schemas, hosts, and services
	# Deploy application with test schema, don't keep zip file
	sudo apt-get install zip -y
	zip -r - . -x README.md .env .gitignore "*.yml" \| curl --header "Content-Type:application/zip" --data-binary @- http://localhost:19071/application/v2/tenant/default/prepareandactivate

	# Check for config node convergence
	timeout 10m bash -c 'until [ "$(curl -f -X GET $VESPA_CONFIG_URL/application/v2/tenant/default/application/default/environment/prod/region/default/instance/default/serviceconverge \| jq -r ".converged")" = "true" ]; do echo " Waiting for Vespa convergence to be true..."; sleep 10; done;' \|\| \
	(echo "Vespa did not converge in time" && exit 1)
	echo "Vespa application has converged. Vespa setup complete!"

	- name: Run Unit Tests
	id: run_unit_tests
	continue-on-error: true
	run: \|
	# Define these for use by marqo
	export VESPA_CONFIG_URL=http://localhost:19071
	export VESPA_DOCUMENT_URL=http://localhost:8080
	export VESPA_QUERY_URL=http://localhost:8080
	export MARQO_ENABLE_THROTTLING=FALSE

	export PRIVATE_MODEL_TESTS_AWS_ACCESS_KEY_ID=${{ secrets.PRIVATE_MODEL_TESTS_AWS_ACCESS_KEY_ID }}
	export PRIVATE_MODEL_TESTS_AWS_SECRET_ACCESS_KEY=${{ secrets.PRIVATE_MODEL_TESTS_AWS_SECRET_ACCESS_KEY }}
	export PRIVATE_MODEL_TESTS_HF_TOKEN=${{ secrets.PRIVATE_MODEL_TESTS_HF_TOKEN }}

	cd marqo
	export PYTHONPATH="./tests:./src:."
	set -o pipefail
	pytest ${{ env.MULTINODE_TEST_ARGS }} --ignore=tests/test_documentation.py --ignore=tests/compatibility_tests \
	--durations=100 --cov=src --cov-branch --cov-context=test \
	--cov-report=html:cov_html --cov-report=xml:cov.xml --cov-report term:skip-covered \
	--md-report --md-report-flavor gfm --md-report-output pytest_result_summary.md \
	tests \| tee pytest_output.txt

	- name: Check Test Coverage of New Code
	id: check_test_coverage
	continue-on-error: true
	run: \|
	if [[ "${GITHUB_EVENT_NAME}" == "pull_request" ]]; then
	export BASE_BRANCH="${{ github.event.pull_request.base.ref }}"
	cd marqo
	echo "Running diff-cover against branch $BASE_BRANCH"
	git fetch origin $BASE_BRANCH:$BASE_BRANCH
	diff-cover cov.xml --html-report diff_cov.html --markdown-report diff_cov.md \
	--compare-branch $BASE_BRANCH
	else
	echo "Skipping diff-cover on Push events"
	echo "Skipped diff-cover on Push events" > marqo/diff_cov.md
	touch marqo/diff_cov.html
	fi

	- name: Upload Test Report
	uses: actions/upload-artifact@v4
	continue-on-error: true
	with:
	name: marqo-test-report
	path: \|
	marqo/cov_html/
	marqo/diff_cov.html

	- name: Test and coverage report summary
	continue-on-error: true
	run: \|
	echo "# Test Summary" >> $GITHUB_STEP_SUMMARY
	cat marqo/pytest_result_summary.md >> $GITHUB_STEP_SUMMARY

	echo "# Coverage Summary, Slow Tests and Failed Tests" >> $GITHUB_STEP_SUMMARY
	echo '```text' >> $GITHUB_STEP_SUMMARY
	awk '/---------- coverage:/ {flag=1} flag' marqo/pytest_output.txt >> $GITHUB_STEP_SUMMARY
	echo '```' >> $GITHUB_STEP_SUMMARY

	cat marqo/diff_cov.md >> $GITHUB_STEP_SUMMARY

	- name: Fail Job If Tests or Coverage Failed
	if: ${{ steps.run_unit_tests.outcome == 'failure' \|\| steps.check_test_coverage.outcome == 'failure' }}
	run: \|
	echo "Tests or coverage checks failed. Marking job as failed."
	exit 1
	shell: bash

	Stop-Runner:
	name: Stop self-hosted EC2 runner
	needs:
	- Start-Runner # required to get output from the start-runner job
	- Test-Marqo # required to wait when the main job is done
	runs-on: ubuntu-latest
	if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs
	steps:
	- name: Configure AWS credentials
	uses: aws-actions/configure-aws-credentials@v1
	with:
	aws-access-key-id: ${{ secrets.MARQO_WORKFLOW_TESTS_ACCESS_KEY_ID }}
	aws-secret-access-key: ${{ secrets.MARQO_WORKFLOW_TESTS_SECRET_ACCESS_KEY }}
	aws-region: us-east-1
	- name: Stop EC2 runner
	uses: machulav/ec2-github-runner@v2
	with:
	mode: stop
	github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
	label: ${{ needs.start-runner.outputs.label }}
	ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Unit Tests with 2 shards and 1 replicas #3297

Workflow file

Unit Tests with 2 shards and 1 replicas #3297

Jobs

Run details

Workflow file for this run