.github/workflows/unit_test_200gb_CI.yml

name: unit_test_200gb_CI
# runs unit tests on AMD64 machine

on:
  workflow_call:
  workflow_dispatch:
  push:
    branches:
      - mainline
      - releases/*
    paths-ignore:
      - '**.md'
  pull_request:
    branches:
      - mainline
      - releases/*

concurrency:
  group: unit-tests-${{ github.ref }}
  cancel-in-progress: true

permissions:
  contents: read

jobs:
  Check-Changes:
    runs-on: ubuntu-latest
    outputs:
      doc_only: ${{ steps.set-output.outputs.doc_only }}
    steps:
      - name: Checkout marqo repo
        uses: actions/checkout@v3
        with:
          fetch-depth: 0
          path: marqo

      - name: Check for Documentation-Only Changes
        id: set-output
        run: |
          cd marqo
          set -x
          
          # Determine BASE_COMMIT and HEAD_COMMIT based on the event type
          if [[ "${GITHUB_EVENT_NAME}" == "pull_request" ]]; then
            BASE_COMMIT=${{ github.event.pull_request.base.sha }}
            HEAD_COMMIT=${{ github.event.pull_request.head.sha }}
          elif [[ "${GITHUB_EVENT_NAME}" == "push" ]]; then
            BASE_COMMIT=${{ github.event.before }}
            HEAD_COMMIT=${{ github.sha }}
          elif [[ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" ]]; then
            # For manual dispatch, explicitly set doc_only
            echo "doc_only=false" >> $GITHUB_ENV
            echo "doc_only=false" >> $GITHUB_OUTPUT
            exit 0
          else
            echo "Unsupported event: ${GITHUB_EVENT_NAME}"
            exit 1
          fi

          # Debug: Print base and head commits
          echo "Base commit: $BASE_COMMIT"
          echo "Head commit: $HEAD_COMMIT"

          # Perform the diff to check for non-documentation changes
          if git diff --name-only $BASE_COMMIT $HEAD_COMMIT -- | grep -vE '\.(md)$'; then
            echo "doc_only=false" >> $GITHUB_ENV
            echo "doc_only=false" >> $GITHUB_OUTPUT
          else
            echo "doc_only=true" >> $GITHUB_ENV
            echo "doc_only=true" >> $GITHUB_OUTPUT
          fi

  Start-Runner:
    name: Start self-hosted EC2 runner
    runs-on: ubuntu-latest
    needs:
      - Check-Changes
    if: ${{ needs.Check-Changes.outputs.doc_only == 'false' }} # Run only if there are non-documentation changes
    outputs:
      label: ${{ steps.start-ec2-runner.outputs.label }}
      ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
    steps:
      - name: Configure AWS credentials
        uses: aws-actions/configure-aws-credentials@v1
        with:
          aws-access-key-id: ${{ secrets.MARQO_WORKFLOW_TESTS_ACCESS_KEY_ID }}
          aws-secret-access-key: ${{ secrets.MARQO_WORKFLOW_TESTS_SECRET_ACCESS_KEY }}
          aws-region: us-east-1
      - name: Start EC2 runner
        id: start-ec2-runner
        uses: machulav/ec2-github-runner@v2
        with:
          mode: start
          github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
          ec2-image-id: ${{ vars.MARQO_CPU_AMD64_TESTS_INSTANCE_AMI }}
          ec2-instance-type: m6i.xlarge
          subnet-id: ${{ secrets.MARQO_WORKFLOW_TESTS_SUBNET_ID }}
          security-group-id: ${{ secrets.MARQO_WORKFLOW_TESTS_SECURITY_GROUP_ID }}
          aws-resource-tags: > # optional, requires additional permissions
            [
              {"Key": "Name", "Value": "marqo-github-runner-${{ github.run_id }}"},
              {"Key": "GitHubRepo", "Value": "${{ github.repository }}"},
              {"Key": "WorkflowName", "Value": "${{ github.workflow }}"},
              {"Key": "WorkflowRunId", "Value": "${{ github.run_id }}"},
              {"Key": "WorlflowURL", "Value": "${{ github.event.repository.html_url }}/actions/runs/${{ github.run_id }}"},
              {"Key": "PoloRole", "Value": "testing"}
            ]

  Test-Marqo:
    name: Run Unit Tests
    needs:
      - Check-Changes # required to start the main job when the runner is ready
      - Start-Runner # required to get output from the start-runner job
    if: ${{ needs.Check-Changes.outputs.doc_only == 'false' }} # Run only if there are non-documentation changes
    runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
    environment: marqo-test-suite
    steps:
      - name: Checkout marqo repo
        uses: actions/checkout@v3
        with:
          fetch-depth: 0
          path: marqo

      - name: Set up Python 3.9
        uses: actions/setup-python@v3
        with:
          python-version: "3.9"
          cache: "pip"

      - name: Checkout marqo-base for requirements
        uses: actions/checkout@v3
        with:
          repository: marqo-ai/marqo-base
          path: marqo-base

      - name: Install dependencies
        run: |
          exit 1
          pip install -r marqo-base/requirements/amd64-gpu-requirements.txt
          # override base requirements with marqo requirements, if needed:
          pip install -r marqo/requirements.dev.txt

      - name: Download nltk data
        run: |
            python -m nltk.downloader punkt_tab

      - name: Build Vespa
        run: |
          systemctl stop unattended-upgrades
          apt-get remove -y unattended-upgrades

          # Function to wait for the dpkg lock to be released
          function wait_for_dpkg_lock() {
            while sudo fuser /var/lib/dpkg/lock-frontend >/dev/null 2>&1; do
              echo "Waiting for the dpkg lock to be released..."
              sleep 5
            done
          }

          # Wait for the dpkg lock before updating and installing
          wait_for_dpkg_lock

          echo "Updating package list"
          apt-get update -y

          # Build Vespa components
          echo "Installing jdk 17"
          sudo apt-get install openjdk-17-jdk -y
          echo "Installing maven"
          sudo apt-get install maven -y
          echo "Building Vespa components"
          cd marqo/vespa
          mvn clean package

      - name: Start Vespa
        run: |
          # Define these for checking if Vespa is ready
          export VESPA_CONFIG_URL=http://localhost:19071
          export VESPA_DOCUMENT_URL=http://localhost:8080
          export VESPA_QUERY_URL=http://localhost:8080

          cd marqo/scripts/vespa_local
          set -x
          python vespa_local.py start
          set +x

          echo "Waiting for Vespa to start"
          for i in {1..20}; do
              echo -ne "Waiting... $i seconds\r"
              sleep 1
          done
          echo -e "\nDone waiting."

          # Zip up schemas and services
          sudo apt-get install zip -y
          zip -r vespa_tester_app.zip services.xml schemas

          # Deploy application with test schema
          curl --header "Content-Type:application/zip" --data-binary @vespa_tester_app.zip http://localhost:19071/application/v2/tenant/default/prepareandactivate

          # wait for vespa to start (document url):
          timeout 10m bash -c 'until curl -f -X GET $VESPA_DOCUMENT_URL >/dev/null 2>&1; do echo "  Waiting for Vespa document API to be available..."; sleep 10; done;' || \
            (echo "Vespa (Document URL) did not start in time" && exit 1)

          echo "Vespa document API is available. Local Vespa setup complete."

          # Delete the zip file
          rm vespa_tester_app.zip
          echo "Deleted vespa_tester_app.zip"

      - name: Run Unit Tests
        id: run_unit_tests
        continue-on-error: true
        run: |
          # Define these for use by marqo
          export VESPA_CONFIG_URL=http://localhost:19071
          export VESPA_DOCUMENT_URL=http://localhost:8080
          export VESPA_QUERY_URL=http://localhost:8080
          export MARQO_ENABLE_THROTTLING=FALSE

          export PRIVATE_MODEL_TESTS_AWS_ACCESS_KEY_ID=${{ secrets.PRIVATE_MODEL_TESTS_AWS_ACCESS_KEY_ID }}
          export PRIVATE_MODEL_TESTS_AWS_SECRET_ACCESS_KEY=${{ secrets.PRIVATE_MODEL_TESTS_AWS_SECRET_ACCESS_KEY }}
          export PRIVATE_MODEL_TESTS_HF_TOKEN=${{ secrets.PRIVATE_MODEL_TESTS_HF_TOKEN }}

          cd marqo
          export PYTHONPATH="./tests:./src:."
          set -o pipefail
          pytest --ignore=tests/test_documentation.py --ignore=tests/compatibility_tests \
            --durations=100 --cov=src --cov-branch --cov-context=test --cov-fail-under=69 \
            --cov-report=html:cov_html --cov-report=xml:cov.xml --cov-report term:skip-covered \
            --md-report --md-report-flavor gfm --md-report-output pytest_result_summary.md \
            tests | tee pytest_output.txt

      - name: Check Test Coverage of New Code
        id: check_test_coverage
        continue-on-error: true
        run: |
          if [[ "${GITHUB_EVENT_NAME}" == "pull_request" ]]; then
            export BASE_BRANCH="${{ github.event.pull_request.base.ref }}"
            cd marqo
            echo "Running diff-cover against branch $BASE_BRANCH"
            git fetch origin $BASE_BRANCH:$BASE_BRANCH
            diff-cover cov.xml --html-report diff_cov.html --markdown-report diff_cov.md \
              --compare-branch $BASE_BRANCH --fail-under=95
          else
            echo "Skipping diff-cover on Push events"
            echo "Skipped diff-cover on Push events" > marqo/diff_cov.md
            touch marqo/diff_cov.html
          fi

      - name: Upload Test Report
        uses: actions/upload-artifact@v4
        continue-on-error: true
        with:
          name: marqo-test-report
          path: |
            marqo/cov_html/
            marqo/diff_cov.html

      - name: Test and coverage report summary
        continue-on-error: true
        run: |
          echo "# Test Summary" >> $GITHUB_STEP_SUMMARY
          cat marqo/pytest_result_summary.md >> $GITHUB_STEP_SUMMARY
          
          echo "# Coverage Summary, Slow Tests and Failed Tests" >> $GITHUB_STEP_SUMMARY
          echo '```text' >> $GITHUB_STEP_SUMMARY
          awk '/---------- coverage:/ {flag=1} flag' marqo/pytest_output.txt >> $GITHUB_STEP_SUMMARY
          echo '```' >> $GITHUB_STEP_SUMMARY
          
          cat marqo/diff_cov.md >> $GITHUB_STEP_SUMMARY

      - name: Fail Job If Tests or Coverage Failed
        if: ${{ steps.run_unit_tests.outcome == 'failure' || steps.check_test_coverage.outcome == 'failure' }}
        run: |
          echo "Tests or coverage checks failed. Marking job as failed."
          exit 1
        shell: bash

  Stop-Runner:
    name: Stop self-hosted EC2 runner
    needs:
      - Start-Runner # required to get output from the start-runner job
      - Test-Marqo # required to wait when the main job is done
    runs-on: ubuntu-latest
    if: ${{ needs.Start-Runner.outputs.label != null }}
    steps:
      - name: Configure AWS credentials
        uses: aws-actions/configure-aws-credentials@v1
        with:
          aws-access-key-id: ${{ secrets.MARQO_WORKFLOW_TESTS_ACCESS_KEY_ID }}
          aws-secret-access-key: ${{ secrets.MARQO_WORKFLOW_TESTS_SECRET_ACCESS_KEY }}
          aws-region: us-east-1
      - name: Stop EC2 runner
        uses: machulav/ec2-github-runner@v2
        with:
          mode: stop
          github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
          label: ${{ needs.start-runner.outputs.label }}
          ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }}