Np new testing framework #107

Workflow file for this run

.github/workflows/test_reblockGVCF.yml at bcaeb26

	name: Test ReblockGVCF

	# Controls when the workflow will run
	on:
	pull_request:
	branches: [ "develop", "staging", "master" ]
	# Only run if files in these paths changed:
	####################################
	# SET PIPELINE SPECIFIC PATHS HERE #
	####################################
	paths:
	- 'pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/**'
	- 'tasks/broad/GermlineVariantDiscovery.wdl'
	- 'tasks/broad/Qc.wdl'
	- 'tasks/broad/Utilities.wdl'
	- 'verification/VerifyGvcf.wdl'
	- 'verification/VerifyTasks.wdl'
	- 'verification/test-wdls/TestReblockGVCF.wdl'
	- 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl'
	- '.github/workflows/test_reblockGVCF.yml'


	# Allows you to run this workflow manually from the Actions tab
	workflow_dispatch:
	inputs:
	useCallCache:
	description: 'Use call cache (default: true)'
	required: false
	default: "true"
	updateTruth:
	description: 'Update truth files (default: false)'
	required: false
	default: "false"
	testType:
	description: 'Specify the type of test (Plumbing or Scientific)'
	required: false
	type: choice
	options:
	- Plumbing
	- Scientific
	truthBranch:
	description: 'Specify the branch for truth files (default: master)'
	required: false
	default: "master"


	env:
	# pipeline configuration
	PIPELINE_NAME: TestReblockGVCF
	DOCKSTORE_PIPELINE_NAME: ReblockGVCF
	PIPELINE_DIR: "pipelines/broad/dna_seq/germline/joint_genotyping/reblocking"

	# workspace configuration
	TESTING_WORKSPACE: WARP Tests
	WORKSPACE_NAMESPACE: warp-pipelines

	# service account configuration
	SA_JSON_B64: ${{ secrets.PDT_TESTER_SA_B64 }}
	USER: [email protected]


	jobs:
	TestReblockGVCF:
	runs-on: ubuntu-latest
	# Add "id-token" with the intended permissions.
	permissions:
	contents: 'read'
	id-token: 'write'
	actions: write

	steps:
	# Step 1: Checkout code
	# Purpose: Clones the repository code at the specified reference
	- uses: actions/checkout@v3
	with:
	ref: ${{ github.ref }}

	# Step 2: Setup Python
	# Purpose: Installs Python 3.11 for running pipeline scripts
	- name: Set up python
	id: setup-python
	uses: actions/setup-python@v4
	with:
	python-version: '3.11'

	# Step 3: Install Dependencies
	# Purpose: Installs required Python packages for the pipeline
	- name: Install dependencies
	run: \|
	cd scripts/firecloud_api/
	pip install -r requirements.txt

	# Step 4: Set Branch Name
	# Purpose: Determines and sets the correct branch name for either PR or direct commits
	- name: Set Branch Name
	id: set_branch
	run: \|
	if [ -z "${{ github.head_ref }}" ]; then
	echo "Branch name is missing, using ${GITHUB_REF##*/}"
	echo "BRANCH_NAME=${GITHUB_REF##*/}" >> $GITHUB_ENV
	else
	echo "Branch name from PR: ${{ github.head_ref }}"
	echo "BRANCH_NAME=${{ github.head_ref }}" >> $GITHUB_ENV
	fi

	# Step 5: Set Test Type
	# Purpose: Determines and sets the correct test type based on the branch name
	- name: Set Test Type
	id: set_test_type
	run: \|
	if [ "${{ github.event_name }}" == "pull_request" ]; then
	# For PRs, set based on target branch
	if [ "${{ github.base_ref }}" == "master" ]; then
	# If PR is targeting master branch, run Scientific tests
	echo "testType=Scientific" >> $GITHUB_ENV
	echo "testType=Scientific"
	else
	# If PR targets any other branch (develop, staging), run Plumbing tests
	echo "testType=Plumbing" >> $GITHUB_ENV
	echo "testType=Plumbing"
	fi
	else
	# For manual workflow runs (workflow_dispatch)
	echo "testType=${{ github.event.inputs.testType }}" >> $GITHUB_ENV
	echo "testType=${{ github.event.inputs.testType }}"
	fi

	# Step 6: Create Method Configuration
	# Purpose: Sets up the testing configuration in Terra workspace
	- name: Create new method configuration
	run: \|
	echo "Creating new method configuration for branch: $BRANCH_NAME"

	METHOD_CONFIG_NAME=$(python3 scripts/firecloud_api/firecloud_api.py \
	create_new_method_config \
	--workspace-namespace $WORKSPACE_NAMESPACE \
	--workspace-name "$TESTING_WORKSPACE" \
	--pipeline_name "$PIPELINE_NAME" \
	--branch_name "$BRANCH_NAME" \
	--test_type "$testType" \
	--sa-json-b64 "$SA_JSON_B64" \
	--user "$USER")

	echo "METHOD_CONFIG_NAME=$METHOD_CONFIG_NAME" >> $GITHUB_ENV

	# Step 7: Cancel Previous Runs
	# Purpose: Cancels previous GHA workflows from the same branch (regardless of plumbing or scientific test type)
	# to avoid running multiple tests at the same time
	- name: Cancel Previous GHA Runs
	uses: styfle/[email protected]
	with:
	access_token: ${{ github.token }}
	all_but_latest: true
	ignore_sha: true

	# Step 8: Cancel Previous Terra Submissions
	# Purpose: Abort previous Terra submissions from the same branch to avoid running multiple tests at the same time
	# Will not abort a Terra submission if it is a scientific test
	- name: Cancel Previous Terra Submissions
	if: ${{ !contains(env.METHOD_CONFIG_NAME, '_Scientific_') }}
	run: \|
	python3 scripts/firecloud_api/firecloud_api.py \
	--workspace-namespace "${{ env.WORKSPACE_NAMESPACE }}" \
	--workspace-name "${{ env.TESTING_WORKSPACE }}" \
	--pipeline_name "${{ env.PIPELINE_NAME }}" \
	--branch_name "${{ env.BRANCH_NAME }}" \
	--sa-json-b64 "${{ secrets.PDT_TESTER_SA_B64 }}" \
	--user "${{ env.USER }}" \
	--test_type "$testType" \
	cancel_old_submissions

	# Step 9: Handle Git Commit Hash
	# Purpose: Gets the correct Github commit hash for version tracking
	- name: Determine Github Commit Hash
	id: determine_github_commit_hash
	run: \|
	if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
	echo "Using github.sha for manually triggered workflow."
	echo "GITHUB_COMMIT_HASH=${{ github.sha }}" >> $GITHUB_ENV
	elif [ "${{ github.event_name }}" == "pull_request" ]; then
	echo "Using github.event.pull_request.head.sha for PR-triggered workflow."
	echo "GITHUB_COMMIT_HASH=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV
	else
	echo "Unsupported event type: ${{ github.event_name }}"
	exit 1
	fi

	# Step 10: Compare Hashes
	# Purpose: Compares the Dockstore and Github commit hashes to ensure they match
	- name: Compare Dockstore and Github Commit Hashes with Retry
	id: compare_hashes
	run: \|

	# Wait 5.5 minutes for Dockstore to update
	sleep 330

	MAX_WAIT_TIME=$((15 * 60)) # 15 minutes in seconds
	WAIT_INTERVAL=60 # 1 minute in seconds
	TOTAL_WAITED=0

	echo "Starting hash comparison with retry mechanism..."

	while [ $TOTAL_WAITED -lt $MAX_WAIT_TIME ]; do
	echo "Fetching Dockstore Commit Hash..."
	DOCKSTORE_COMMIT_HASH=$(python scripts/dockstore_api/fetch_dockstore_commit.py \
	$DOCKSTORE_TOKEN \
	$DOCKSTORE_PIPELINE_NAME \
	$BRANCH_NAME)
	echo "Fetched Dockstore Commit Hash: $DOCKSTORE_COMMIT_HASH"

	echo "GitHub Commit Hash: $GITHUB_COMMIT_HASH"

	if [ "$DOCKSTORE_COMMIT_HASH" == "$GITHUB_COMMIT_HASH" ]; then
	echo "Success: The Dockstore Commit Hash matches the GitHub Commit Hash."
	exit 0
	else
	echo "Mismatch found: $DOCKSTORE_COMMIT_HASH != $GITHUB_COMMIT_HASH"
	echo "Retrying in $WAIT_INTERVAL seconds..."
	sleep $WAIT_INTERVAL
	TOTAL_WAITED=$((TOTAL_WAITED + WAIT_INTERVAL))
	fi
	done

	echo "Error: The Dockstore Commit Hash does not match the GitHub Commit Hash after 15 minutes of retries!"
	exit 1

	env:
	GITHUB_COMMIT_HASH: ${{ env.GITHUB_COMMIT_HASH }}
	DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }}

	# Step 11: Run Tests
	# Purpose: Main testing step - runs the pipeline and collects results
	- name: Update test inputs, Upload to Terra, Submit, Monitor and Retrieve Outputs
	run: \|
	UPDATE_TRUTH="${{ github.event.inputs.updateTruth \|\| 'false' }}"
	USE_CALL_CACHE="${{ github.event.inputs.useCallCache \|\| 'true' }}"
	TRUTH_BRANCH="${{ github.event.inputs.truthBranch \|\| 'master' }}"
	CURRENT_TIME=$(date +"%Y-%m-%d-%H-%M-%S")
	MAX_RETRIES=2
	RETRY_DELAY=300 # 300 seconds = 5 minutes
	# Initialize variables to aggregate statuses and outputs
	ALL_WORKFLOW_STATUSES="Workflow ID \| Status"$'\n'"--- \| ---"
	ALL_OUTPUTS=""
	# Initialize arrays to track submission and workflow statuses
	declare -a SUBMISSION_IDS
	declare -A WORKFLOW_STATUSES
	OVERALL_SUCCESS=true

	# Convert UPDATE_TRUTH and USE_CALL_CACHE to a boolean-friendly format ("true" -> true, "false" -> false)
	if [ "$UPDATE_TRUTH" = "true" ]; then
	UPDATE_TRUTH_BOOL=true
	else
	UPDATE_TRUTH_BOOL=false
	fi

	if [ "$USE_CALL_CACHE" == "true" ]; then
	USE_CALL_CACHE_BOOL=true
	else
	USE_CALL_CACHE_BOOL=false
	fi

	TEST_TYPE="${{ env.testType }}"
	INPUTS_DIR="$PIPELINE_DIR/test_inputs/$TEST_TYPE"
	echo "Running tests with test type: $TEST_TYPE"

	TRUTH_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/truth/$(echo "$TEST_TYPE" \| tr '[:upper:]' '[:lower:]')/$TRUTH_BRANCH"
	echo "Truth path: $TRUTH_PATH"
	RESULTS_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/results/$CURRENT_TIME"



	# 1. Submit all jobs first and store their submission IDs
	for input_file in "$INPUTS_DIR"/*.json; do
	test_input_file=$(python3 scripts/firecloud_api/UpdateTestInputs.py --truth_path "$TRUTH_PATH" \
	--results_path "$RESULTS_PATH" \
	--inputs_json "$input_file" \
	--update_truth "$UPDATE_TRUTH_BOOL" \
	--branch_name "$BRANCH_NAME" )
	echo "Uploading the test input file: $test_input_file"

	# Create the submission_data.json file for this input_file
	input_file_filename=$(basename $input_file)
	SUBMISSION_DATA_FILE="submission_data.json"
	printf '{
	"methodConfigurationNamespace": "%s",
	"methodConfigurationName": "%s_%s_%s",
	"useCallCache": %s,
	"deleteIntermediateOutputFiles": false,
	"useReferenceDisks": true,
	"memoryRetryMultiplier": 1.2,
	"workflowFailureMode": "NoNewCalls",
	"userComment": "%s",
	"ignoreEmptyOutputs": false
	}' "$WORKSPACE_NAMESPACE" "$PIPELINE_NAME" "$TEST_TYPE" "$BRANCH_NAME" "$USE_CALL_CACHE_BOOL" "$input_file_filename" > "$SUBMISSION_DATA_FILE"

	echo "Created submission data file: $SUBMISSION_DATA_FILE"
	cat "$SUBMISSION_DATA_FILE"

	# Upload test input file
	python3 scripts/firecloud_api/firecloud_api.py \
	upload_test_inputs \
	--workspace-namespace $WORKSPACE_NAMESPACE \
	--workspace-name "$TESTING_WORKSPACE" \
	--pipeline_name "$PIPELINE_NAME" \
	--test_input_file "$test_input_file" \
	--branch_name "$BRANCH_NAME" \
	--sa-json-b64 "$SA_JSON_B64" \
	--test_type "$TEST_TYPE" \
	--user "$USER"

	attempt=1
	while [ $attempt -le $MAX_RETRIES ]; do
	SUBMISSION_ID=$(python3 scripts/firecloud_api/firecloud_api.py submit_job \
	--workspace-namespace "$WORKSPACE_NAMESPACE" \
	--workspace-name "$TESTING_WORKSPACE" \
	--sa-json-b64 "$SA_JSON_B64" \
	--user "$USER" \
	--submission_data_file "$SUBMISSION_DATA_FILE")

	echo "Submission ID: $SUBMISSION_ID"

	if [[ "$SUBMISSION_ID" == "404" \|\| -z "$SUBMISSION_ID" ]]; then
	echo "Error in submission, retrying in $RETRY_DELAY seconds..."
	((attempt++))
	if [ $attempt -gt $MAX_RETRIES ]; then
	echo "Max retries reached. Exiting..."
	exit 1
	fi
	sleep $RETRY_DELAY
	continue
	fi

	echo "Submission successful. Submission ID: $SUBMISSION_ID"
	SUBMISSION_IDS+=("$SUBMISSION_ID")
	break
	done
	done

	echo "All jobs have been submitted. Starting to poll for statuses..."

	# Continue with polling and output retrieval...

	# 2. After all submissions are done, start polling for statuses of all jobs
	for SUBMISSION_ID in "${SUBMISSION_IDS[@]}"; do
	attempt=1
	while [ $attempt -le $MAX_RETRIES ]; do
	echo "Polling for Submission ID: $SUBMISSION_ID"
	RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py poll_job_status \
	--submission_id "$SUBMISSION_ID" \
	--sa-json-b64 "$SA_JSON_B64" \
	--user "$USER" \
	--workspace-namespace "$WORKSPACE_NAMESPACE" \
	--workspace-name "$TESTING_WORKSPACE")

	if [ -z "$RESPONSE" ]; then
	echo "Failed to retrieve Workflow IDs for submission: $SUBMISSION_ID"
	OVERALL_SUCCESS=false
	((attempt++))
	if [ $attempt -gt $MAX_RETRIES ]; then
	echo "Max retries reached. Exiting..."
	exit 1
	fi
	sleep $RETRY_DELAY
	continue
	fi

	WORKFLOW_STATUSES_FOR_SUBMISSION=$(echo "$RESPONSE" \| jq -r 'to_entries \| map(.key + " \| " + .value) \| .[]')
	WORKFLOW_STATUSES["$SUBMISSION_ID"]="$WORKFLOW_STATUSES_FOR_SUBMISSION"

	# Check if any workflow failed or errored
	FAILED_WORKFLOWS=$(echo "$RESPONSE" \| jq -r 'to_entries \| .[] \| select(.value == "Failed" or .value == "Aborted" or .value == "Aborting") \| .key')
	if [ ! -z "$FAILED_WORKFLOWS" ]; then
	echo "Failed workflows detected:"
	echo "$FAILED_WORKFLOWS"
	OVERALL_SUCCESS=false
	fi

	# retrieve workflow outputs
	echo "Retrieving workflow outputs for Submission ID: $SUBMISSION_ID..."
	for WORKFLOW_ID in $(echo "$RESPONSE" \| jq -r 'keys[]'); do
	WORKFLOW_OUTPUT=$(python3 scripts/firecloud_api/firecloud_api.py get_workflow_outputs \
	--user "$USER" \
	--sa-json-b64 "$SA_JSON_B64" \
	--submission_id "$SUBMISSION_ID" \
	--workspace-namespace $WORKSPACE_NAMESPACE \
	--workspace-name "$TESTING_WORKSPACE" \
	--workflow_id "$WORKFLOW_ID" \
	--pipeline_name "$PIPELINE_NAME")
	ALL_OUTPUTS+="$WORKFLOW_OUTPUT"$'\n'
	done
	break
	done
	done

	# Generate the final summary after all processing is complete
	FINAL_SUMMARY="## Combined Workflow Statuses\n\n"

	# Add all workflow statuses to the summary
	for SUBMISSION_ID in "${!WORKFLOW_STATUSES[@]}"; do
	# Generate the Terra URL for the submission
	SUBMISSION_URL="https://app.terra.bio/#workspaces/$WORKSPACE_NAMESPACE/WARP%20Tests/job_history/$SUBMISSION_ID"

	# Add the Submission ID as a hyperlink
	FINAL_SUMMARY+="[Submission ID: $SUBMISSION_ID]($SUBMISSION_URL)\n"

	# Add the workflows and statuses for this submission
	FINAL_SUMMARY+="${WORKFLOW_STATUSES[$SUBMISSION_ID]}\n\n"
	done

	# Write the complete summary once at the end
	echo -e "$FINAL_SUMMARY" >> $GITHUB_STEP_SUMMARY

	# Exit with error if any workflows failed
	if [ "$OVERALL_SUCCESS" = false ]; then
	echo ""
	echo ""
	echo "****************************************************************************************"
	echo "****************************************************************************************"
	echo ""
	echo "One or more workflows failed in Terra. Check the workflow status summary for details."
	echo ""
	echo "****************************************************************************************"
	echo "****************************************************************************************"
	echo ""
	echo ""
	exit 1
	fi



	# Step 12: Cleanup
	# Purpose: Ensures cleanup of Terra method configurations regardless of test outcome
	- name: Delete Method Configuration
	if: always() # Ensures it runs regardless of success or failure
	run: \|
	echo "Deleting method configuration for branch: $BRANCH_NAME"
	DELETE_RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py delete_method_config \
	--workspace-namespace $WORKSPACE_NAMESPACE \
	--workspace-name "$TESTING_WORKSPACE" \
	--pipeline_name "$PIPELINE_NAME" \
	--branch_name "$BRANCH_NAME" \
	--test_type "$testType" \
	--sa-json-b64 "$SA_JSON_B64" \
	--user "$USER" \
	--method_config_name "${PIPELINE_NAME}_${testType}_${BRANCH_NAME}")
	echo "Delete response: $DELETE_RESPONSE"
	if [ "$DELETE_RESPONSE" == "True" ]; then
	echo "Method configuration deleted successfully."
	else
	echo "Error: Method configuration deletion failed."
	exit 1
	fi


	# Step 13: Print Summary on Success
	# Purpose: Prints the final summary of the pipeline execution in case of success
	- name: Print Summary on Success
	if: success()
	run: \|
	echo "# :white_check_mark: Pipeline Execution Summary :white_check_mark:" >> $GITHUB_STEP_SUMMARY

	# Step 14: Print Summary on Failure
	# Purpose: Prints the final summary of the pipeline execution in case of failure
	- name: Print Summary on Failure
	if: failure()
	run: \|
	echo "# :x: Pipeline Execution Summary (on Failure) :x: " >> $GITHUB_STEP_SUMMARY

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Np new testing framework #107

Workflow file

Np new testing framework #107

Jobs

Run details

Workflow file for this run