Np new testing framework #107
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Test ReblockGVCF | |
# Controls when the workflow will run | |
on: | |
pull_request: | |
branches: [ "develop", "staging", "master" ] | |
# Only run if files in these paths changed: | |
#################################### | |
# SET PIPELINE SPECIFIC PATHS HERE # | |
#################################### | |
paths: | |
- 'pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/**' | |
- 'tasks/broad/GermlineVariantDiscovery.wdl' | |
- 'tasks/broad/Qc.wdl' | |
- 'tasks/broad/Utilities.wdl' | |
- 'verification/VerifyGvcf.wdl' | |
- 'verification/VerifyTasks.wdl' | |
- 'verification/test-wdls/TestReblockGVCF.wdl' | |
- 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' | |
- '.github/workflows/test_reblockGVCF.yml' | |
# Allows you to run this workflow manually from the Actions tab | |
workflow_dispatch: | |
inputs: | |
useCallCache: | |
description: 'Use call cache (default: true)' | |
required: false | |
default: "true" | |
updateTruth: | |
description: 'Update truth files (default: false)' | |
required: false | |
default: "false" | |
testType: | |
description: 'Specify the type of test (Plumbing or Scientific)' | |
required: false | |
type: choice | |
options: | |
- Plumbing | |
- Scientific | |
truthBranch: | |
description: 'Specify the branch for truth files (default: master)' | |
required: false | |
default: "master" | |
env: | |
# pipeline configuration | |
PIPELINE_NAME: TestReblockGVCF | |
DOCKSTORE_PIPELINE_NAME: ReblockGVCF | |
PIPELINE_DIR: "pipelines/broad/dna_seq/germline/joint_genotyping/reblocking" | |
# workspace configuration | |
TESTING_WORKSPACE: WARP Tests | |
WORKSPACE_NAMESPACE: warp-pipelines | |
# service account configuration | |
SA_JSON_B64: ${{ secrets.PDT_TESTER_SA_B64 }} | |
USER: [email protected] | |
jobs: | |
TestReblockGVCF: | |
runs-on: ubuntu-latest | |
# Add "id-token" with the intended permissions. | |
permissions: | |
contents: 'read' | |
id-token: 'write' | |
actions: write | |
steps: | |
# Step 1: Checkout code | |
# Purpose: Clones the repository code at the specified reference | |
- uses: actions/checkout@v3 | |
with: | |
ref: ${{ github.ref }} | |
# Step 2: Setup Python | |
# Purpose: Installs Python 3.11 for running pipeline scripts | |
- name: Set up python | |
id: setup-python | |
uses: actions/setup-python@v4 | |
with: | |
python-version: '3.11' | |
# Step 3: Install Dependencies | |
# Purpose: Installs required Python packages for the pipeline | |
- name: Install dependencies | |
run: | | |
cd scripts/firecloud_api/ | |
pip install -r requirements.txt | |
# Step 4: Set Branch Name | |
# Purpose: Determines and sets the correct branch name for either PR or direct commits | |
- name: Set Branch Name | |
id: set_branch | |
run: | | |
if [ -z "${{ github.head_ref }}" ]; then | |
echo "Branch name is missing, using ${GITHUB_REF##*/}" | |
echo "BRANCH_NAME=${GITHUB_REF##*/}" >> $GITHUB_ENV | |
else | |
echo "Branch name from PR: ${{ github.head_ref }}" | |
echo "BRANCH_NAME=${{ github.head_ref }}" >> $GITHUB_ENV | |
fi | |
# Step 5: Set Test Type | |
# Purpose: Determines and sets the correct test type based on the branch name | |
- name: Set Test Type | |
id: set_test_type | |
run: | | |
if [ "${{ github.event_name }}" == "pull_request" ]; then | |
# For PRs, set based on target branch | |
if [ "${{ github.base_ref }}" == "master" ]; then | |
# If PR is targeting master branch, run Scientific tests | |
echo "testType=Scientific" >> $GITHUB_ENV | |
echo "testType=Scientific" | |
else | |
# If PR targets any other branch (develop, staging), run Plumbing tests | |
echo "testType=Plumbing" >> $GITHUB_ENV | |
echo "testType=Plumbing" | |
fi | |
else | |
# For manual workflow runs (workflow_dispatch) | |
echo "testType=${{ github.event.inputs.testType }}" >> $GITHUB_ENV | |
echo "testType=${{ github.event.inputs.testType }}" | |
fi | |
# Step 6: Create Method Configuration | |
# Purpose: Sets up the testing configuration in Terra workspace | |
- name: Create new method configuration | |
run: | | |
echo "Creating new method configuration for branch: $BRANCH_NAME" | |
METHOD_CONFIG_NAME=$(python3 scripts/firecloud_api/firecloud_api.py \ | |
create_new_method_config \ | |
--workspace-namespace $WORKSPACE_NAMESPACE \ | |
--workspace-name "$TESTING_WORKSPACE" \ | |
--pipeline_name "$PIPELINE_NAME" \ | |
--branch_name "$BRANCH_NAME" \ | |
--test_type "$testType" \ | |
--sa-json-b64 "$SA_JSON_B64" \ | |
--user "$USER") | |
echo "METHOD_CONFIG_NAME=$METHOD_CONFIG_NAME" >> $GITHUB_ENV | |
# Step 7: Cancel Previous Runs | |
# Purpose: Cancels previous GHA workflows from the same branch (regardless of plumbing or scientific test type) | |
# to avoid running multiple tests at the same time | |
- name: Cancel Previous GHA Runs | |
uses: styfle/[email protected] | |
with: | |
access_token: ${{ github.token }} | |
all_but_latest: true | |
ignore_sha: true | |
# Step 8: Cancel Previous Terra Submissions | |
# Purpose: Abort previous Terra submissions from the same branch to avoid running multiple tests at the same time | |
# Will not abort a Terra submission if it is a scientific test | |
- name: Cancel Previous Terra Submissions | |
if: ${{ !contains(env.METHOD_CONFIG_NAME, '_Scientific_') }} | |
run: | | |
python3 scripts/firecloud_api/firecloud_api.py \ | |
--workspace-namespace "${{ env.WORKSPACE_NAMESPACE }}" \ | |
--workspace-name "${{ env.TESTING_WORKSPACE }}" \ | |
--pipeline_name "${{ env.PIPELINE_NAME }}" \ | |
--branch_name "${{ env.BRANCH_NAME }}" \ | |
--sa-json-b64 "${{ secrets.PDT_TESTER_SA_B64 }}" \ | |
--user "${{ env.USER }}" \ | |
--test_type "$testType" \ | |
cancel_old_submissions | |
# Step 9: Handle Git Commit Hash | |
# Purpose: Gets the correct Github commit hash for version tracking | |
- name: Determine Github Commit Hash | |
id: determine_github_commit_hash | |
run: | | |
if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then | |
echo "Using github.sha for manually triggered workflow." | |
echo "GITHUB_COMMIT_HASH=${{ github.sha }}" >> $GITHUB_ENV | |
elif [ "${{ github.event_name }}" == "pull_request" ]; then | |
echo "Using github.event.pull_request.head.sha for PR-triggered workflow." | |
echo "GITHUB_COMMIT_HASH=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV | |
else | |
echo "Unsupported event type: ${{ github.event_name }}" | |
exit 1 | |
fi | |
# Step 10: Compare Hashes | |
# Purpose: Compares the Dockstore and Github commit hashes to ensure they match | |
- name: Compare Dockstore and Github Commit Hashes with Retry | |
id: compare_hashes | |
run: | | |
# Wait 5.5 minutes for Dockstore to update | |
sleep 330 | |
MAX_WAIT_TIME=$((15 * 60)) # 15 minutes in seconds | |
WAIT_INTERVAL=60 # 1 minute in seconds | |
TOTAL_WAITED=0 | |
echo "Starting hash comparison with retry mechanism..." | |
while [ $TOTAL_WAITED -lt $MAX_WAIT_TIME ]; do | |
echo "Fetching Dockstore Commit Hash..." | |
DOCKSTORE_COMMIT_HASH=$(python scripts/dockstore_api/fetch_dockstore_commit.py \ | |
$DOCKSTORE_TOKEN \ | |
$DOCKSTORE_PIPELINE_NAME \ | |
$BRANCH_NAME) | |
echo "Fetched Dockstore Commit Hash: $DOCKSTORE_COMMIT_HASH" | |
echo "GitHub Commit Hash: $GITHUB_COMMIT_HASH" | |
if [ "$DOCKSTORE_COMMIT_HASH" == "$GITHUB_COMMIT_HASH" ]; then | |
echo "Success: The Dockstore Commit Hash matches the GitHub Commit Hash." | |
exit 0 | |
else | |
echo "Mismatch found: $DOCKSTORE_COMMIT_HASH != $GITHUB_COMMIT_HASH" | |
echo "Retrying in $WAIT_INTERVAL seconds..." | |
sleep $WAIT_INTERVAL | |
TOTAL_WAITED=$((TOTAL_WAITED + WAIT_INTERVAL)) | |
fi | |
done | |
echo "Error: The Dockstore Commit Hash does not match the GitHub Commit Hash after 15 minutes of retries!" | |
exit 1 | |
env: | |
GITHUB_COMMIT_HASH: ${{ env.GITHUB_COMMIT_HASH }} | |
DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} | |
# Step 11: Run Tests | |
# Purpose: Main testing step - runs the pipeline and collects results | |
- name: Update test inputs, Upload to Terra, Submit, Monitor and Retrieve Outputs | |
run: | | |
UPDATE_TRUTH="${{ github.event.inputs.updateTruth || 'false' }}" | |
USE_CALL_CACHE="${{ github.event.inputs.useCallCache || 'true' }}" | |
TRUTH_BRANCH="${{ github.event.inputs.truthBranch || 'master' }}" | |
CURRENT_TIME=$(date +"%Y-%m-%d-%H-%M-%S") | |
MAX_RETRIES=2 | |
RETRY_DELAY=300 # 300 seconds = 5 minutes | |
# Initialize variables to aggregate statuses and outputs | |
ALL_WORKFLOW_STATUSES="Workflow ID | Status"$'\n'"--- | ---" | |
ALL_OUTPUTS="" | |
# Initialize arrays to track submission and workflow statuses | |
declare -a SUBMISSION_IDS | |
declare -A WORKFLOW_STATUSES | |
OVERALL_SUCCESS=true | |
# Convert UPDATE_TRUTH and USE_CALL_CACHE to a boolean-friendly format ("true" -> true, "false" -> false) | |
if [ "$UPDATE_TRUTH" = "true" ]; then | |
UPDATE_TRUTH_BOOL=true | |
else | |
UPDATE_TRUTH_BOOL=false | |
fi | |
if [ "$USE_CALL_CACHE" == "true" ]; then | |
USE_CALL_CACHE_BOOL=true | |
else | |
USE_CALL_CACHE_BOOL=false | |
fi | |
TEST_TYPE="${{ env.testType }}" | |
INPUTS_DIR="$PIPELINE_DIR/test_inputs/$TEST_TYPE" | |
echo "Running tests with test type: $TEST_TYPE" | |
TRUTH_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/truth/$(echo "$TEST_TYPE" | tr '[:upper:]' '[:lower:]')/$TRUTH_BRANCH" | |
echo "Truth path: $TRUTH_PATH" | |
RESULTS_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/results/$CURRENT_TIME" | |
# 1. Submit all jobs first and store their submission IDs | |
for input_file in "$INPUTS_DIR"/*.json; do | |
test_input_file=$(python3 scripts/firecloud_api/UpdateTestInputs.py --truth_path "$TRUTH_PATH" \ | |
--results_path "$RESULTS_PATH" \ | |
--inputs_json "$input_file" \ | |
--update_truth "$UPDATE_TRUTH_BOOL" \ | |
--branch_name "$BRANCH_NAME" ) | |
echo "Uploading the test input file: $test_input_file" | |
# Create the submission_data.json file for this input_file | |
input_file_filename=$(basename $input_file) | |
SUBMISSION_DATA_FILE="submission_data.json" | |
printf '{ | |
"methodConfigurationNamespace": "%s", | |
"methodConfigurationName": "%s_%s_%s", | |
"useCallCache": %s, | |
"deleteIntermediateOutputFiles": false, | |
"useReferenceDisks": true, | |
"memoryRetryMultiplier": 1.2, | |
"workflowFailureMode": "NoNewCalls", | |
"userComment": "%s", | |
"ignoreEmptyOutputs": false | |
}' "$WORKSPACE_NAMESPACE" "$PIPELINE_NAME" "$TEST_TYPE" "$BRANCH_NAME" "$USE_CALL_CACHE_BOOL" "$input_file_filename" > "$SUBMISSION_DATA_FILE" | |
echo "Created submission data file: $SUBMISSION_DATA_FILE" | |
cat "$SUBMISSION_DATA_FILE" | |
# Upload test input file | |
python3 scripts/firecloud_api/firecloud_api.py \ | |
upload_test_inputs \ | |
--workspace-namespace $WORKSPACE_NAMESPACE \ | |
--workspace-name "$TESTING_WORKSPACE" \ | |
--pipeline_name "$PIPELINE_NAME" \ | |
--test_input_file "$test_input_file" \ | |
--branch_name "$BRANCH_NAME" \ | |
--sa-json-b64 "$SA_JSON_B64" \ | |
--test_type "$TEST_TYPE" \ | |
--user "$USER" | |
attempt=1 | |
while [ $attempt -le $MAX_RETRIES ]; do | |
SUBMISSION_ID=$(python3 scripts/firecloud_api/firecloud_api.py submit_job \ | |
--workspace-namespace "$WORKSPACE_NAMESPACE" \ | |
--workspace-name "$TESTING_WORKSPACE" \ | |
--sa-json-b64 "$SA_JSON_B64" \ | |
--user "$USER" \ | |
--submission_data_file "$SUBMISSION_DATA_FILE") | |
echo "Submission ID: $SUBMISSION_ID" | |
if [[ "$SUBMISSION_ID" == *"404"* || -z "$SUBMISSION_ID" ]]; then | |
echo "Error in submission, retrying in $RETRY_DELAY seconds..." | |
((attempt++)) | |
if [ $attempt -gt $MAX_RETRIES ]; then | |
echo "Max retries reached. Exiting..." | |
exit 1 | |
fi | |
sleep $RETRY_DELAY | |
continue | |
fi | |
echo "Submission successful. Submission ID: $SUBMISSION_ID" | |
SUBMISSION_IDS+=("$SUBMISSION_ID") | |
break | |
done | |
done | |
echo "All jobs have been submitted. Starting to poll for statuses..." | |
# Continue with polling and output retrieval... | |
# 2. After all submissions are done, start polling for statuses of all jobs | |
for SUBMISSION_ID in "${SUBMISSION_IDS[@]}"; do | |
attempt=1 | |
while [ $attempt -le $MAX_RETRIES ]; do | |
echo "Polling for Submission ID: $SUBMISSION_ID" | |
RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py poll_job_status \ | |
--submission_id "$SUBMISSION_ID" \ | |
--sa-json-b64 "$SA_JSON_B64" \ | |
--user "$USER" \ | |
--workspace-namespace "$WORKSPACE_NAMESPACE" \ | |
--workspace-name "$TESTING_WORKSPACE") | |
if [ -z "$RESPONSE" ]; then | |
echo "Failed to retrieve Workflow IDs for submission: $SUBMISSION_ID" | |
OVERALL_SUCCESS=false | |
((attempt++)) | |
if [ $attempt -gt $MAX_RETRIES ]; then | |
echo "Max retries reached. Exiting..." | |
exit 1 | |
fi | |
sleep $RETRY_DELAY | |
continue | |
fi | |
WORKFLOW_STATUSES_FOR_SUBMISSION=$(echo "$RESPONSE" | jq -r 'to_entries | map(.key + " | " + .value) | .[]') | |
WORKFLOW_STATUSES["$SUBMISSION_ID"]="$WORKFLOW_STATUSES_FOR_SUBMISSION" | |
# Check if any workflow failed or errored | |
FAILED_WORKFLOWS=$(echo "$RESPONSE" | jq -r 'to_entries | .[] | select(.value == "Failed" or .value == "Aborted" or .value == "Aborting") | .key') | |
if [ ! -z "$FAILED_WORKFLOWS" ]; then | |
echo "Failed workflows detected:" | |
echo "$FAILED_WORKFLOWS" | |
OVERALL_SUCCESS=false | |
fi | |
# retrieve workflow outputs | |
echo "Retrieving workflow outputs for Submission ID: $SUBMISSION_ID..." | |
for WORKFLOW_ID in $(echo "$RESPONSE" | jq -r 'keys[]'); do | |
WORKFLOW_OUTPUT=$(python3 scripts/firecloud_api/firecloud_api.py get_workflow_outputs \ | |
--user "$USER" \ | |
--sa-json-b64 "$SA_JSON_B64" \ | |
--submission_id "$SUBMISSION_ID" \ | |
--workspace-namespace $WORKSPACE_NAMESPACE \ | |
--workspace-name "$TESTING_WORKSPACE" \ | |
--workflow_id "$WORKFLOW_ID" \ | |
--pipeline_name "$PIPELINE_NAME") | |
ALL_OUTPUTS+="$WORKFLOW_OUTPUT"$'\n' | |
done | |
break | |
done | |
done | |
# Generate the final summary after all processing is complete | |
FINAL_SUMMARY="## Combined Workflow Statuses\n\n" | |
# Add all workflow statuses to the summary | |
for SUBMISSION_ID in "${!WORKFLOW_STATUSES[@]}"; do | |
# Generate the Terra URL for the submission | |
SUBMISSION_URL="https://app.terra.bio/#workspaces/$WORKSPACE_NAMESPACE/WARP%20Tests/job_history/$SUBMISSION_ID" | |
# Add the Submission ID as a hyperlink | |
FINAL_SUMMARY+="[Submission ID: $SUBMISSION_ID]($SUBMISSION_URL)\n" | |
# Add the workflows and statuses for this submission | |
FINAL_SUMMARY+="${WORKFLOW_STATUSES[$SUBMISSION_ID]}\n\n" | |
done | |
# Write the complete summary once at the end | |
echo -e "$FINAL_SUMMARY" >> $GITHUB_STEP_SUMMARY | |
# Exit with error if any workflows failed | |
if [ "$OVERALL_SUCCESS" = false ]; then | |
echo "" | |
echo "" | |
echo "****************************************************************************************" | |
echo "****************************************************************************************" | |
echo "" | |
echo "One or more workflows failed in Terra. Check the workflow status summary for details." | |
echo "" | |
echo "****************************************************************************************" | |
echo "****************************************************************************************" | |
echo "" | |
echo "" | |
exit 1 | |
fi | |
# Step 12: Cleanup | |
# Purpose: Ensures cleanup of Terra method configurations regardless of test outcome | |
- name: Delete Method Configuration | |
if: always() # Ensures it runs regardless of success or failure | |
run: | | |
echo "Deleting method configuration for branch: $BRANCH_NAME" | |
DELETE_RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py delete_method_config \ | |
--workspace-namespace $WORKSPACE_NAMESPACE \ | |
--workspace-name "$TESTING_WORKSPACE" \ | |
--pipeline_name "$PIPELINE_NAME" \ | |
--branch_name "$BRANCH_NAME" \ | |
--test_type "$testType" \ | |
--sa-json-b64 "$SA_JSON_B64" \ | |
--user "$USER" \ | |
--method_config_name "${PIPELINE_NAME}_${testType}_${BRANCH_NAME}") | |
echo "Delete response: $DELETE_RESPONSE" | |
if [ "$DELETE_RESPONSE" == "True" ]; then | |
echo "Method configuration deleted successfully." | |
else | |
echo "Error: Method configuration deletion failed." | |
exit 1 | |
fi | |
# Step 13: Print Summary on Success | |
# Purpose: Prints the final summary of the pipeline execution in case of success | |
- name: Print Summary on Success | |
if: success() | |
run: | | |
echo "# :white_check_mark: Pipeline Execution Summary :white_check_mark:" >> $GITHUB_STEP_SUMMARY | |
# Step 14: Print Summary on Failure | |
# Purpose: Prints the final summary of the pipeline execution in case of failure | |
- name: Print Summary on Failure | |
if: failure() | |
run: | | |
echo "# :x: Pipeline Execution Summary (on Failure) :x: " >> $GITHUB_STEP_SUMMARY |