diff --git a/.dockstore.yml b/.dockstore.yml index 366840f7d4..9ab1966238 100644 --- a/.dockstore.yml +++ b/.dockstore.yml @@ -19,14 +19,14 @@ workflows: subclass: WDL primaryDescriptorPath: /pipelines/skylab/smartseq2_single_sample/SmartSeq2SingleSample.wdl - - name: Smartseq2_Single_Nucleus_Multisample - subclass: WDL - primaryDescriptorPath: /pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl - - name: Smartseq2_Single_Nucleus subclass: WDL primaryDescriptorPath: /pipelines/skylab/smartseq2_single_nucleus/SmartSeq2SingleNucleus.wdl + - name: Smartseq2_Single_Nucleus_Multisample + subclass: WDL + primaryDescriptorPath: /pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl + - name: IlluminaGenotypingArray subclass: WDL primaryDescriptorPath: /pipelines/broad/genotyping/illumina/IlluminaGenotypingArray.wdl @@ -43,10 +43,18 @@ workflows: subclass: WDL primaryDescriptorPath: /pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.wdl + - name: ExomeReprocessing + subclass: WDL + primaryDescriptorPath: /pipelines/broad/reprocessing/exome/ExomeReprocessing.wdl + - name: WholeGenomeGermlineSingleSample subclass: WDL primaryDescriptorPath: /pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.wdl + - name: WholeGenomeReprocessing + subclass: WDL + primaryDescriptorPath: /pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.wdl + - name: OptimusHcaAdapter subclass: WDL primaryDescriptorPath: /projects/optimus/CreateOptimusAdapterMetadata.wdl @@ -59,6 +67,10 @@ workflows: subclass: WDL primaryDescriptorPath: /pipelines/cemba/cemba_methylcseq/CEMBA.wdl + - name: CramToUnmappedBams + subclass: WDL + primaryDescriptorPath: /pipelines/broad/reprocessing/cram_to_unmapped_bams/CramToUnmappedBams.wdl + - name: ReblockGVCF subclass: WDL primaryDescriptorPath: /pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.wdl @@ -123,10 +135,86 @@ workflows: subclass: WDL primaryDescriptorPath: /pipelines/skylab/atac/atac.wdl + - name: TestCramToUnmappedBams + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/TestCramToUnmappedBams.wdl + + - name: TestExomeGermlineSingleSample + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/TestExomeGermlineSingleSample.wdl + + - name: TestExomeReprocessing + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/TestExomeReprocessing.wdl + - name: TestIlluminaGenotypingArray subclass: WDL primaryDescriptorPath: /verification/test-wdls/TestIlluminaGenotypingArray.wdl + + - name: TestImputation + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/TestImputation.wdl + + - name: TestJointGenotyping + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/TestJointGenotyping.wdl + + - name: TestPairedTag + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/TestPairedTag.wdl + - name: TestOptimus + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/TestOptimus.wdl + + - name: TestMultiSampleSmartSeq2SingleNucleus + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl + + - name: TestMultiome + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/TestMultiome.wdl + + - name: TestSlideSeq + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/TestSlideSeq.wdl + + - name: TestReblockGVCF + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/TestReblockGVCF.wdl + + - name: TestRNAWithUMIsPipeline + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/TestRNAWithUMIsPipeline.wdl + + - name: Testsnm3C + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/Testsnm3C.wdl + + - name: TestUltimaGenomicsJointGenotyping + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/TestUltimaGenomicsJointGenotyping.wdl + + - name: TestUltimaGenomicsWholeGenomeGermline + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/TestUltimaGenomicsWholeGenomeGermline.wdl + + - name: TestUltimaGenomicsWholeGenomeCramOnly + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/TestUltimaGenomicsWholeGenomeCramOnly.wdl + + - name: TestVariantCalling + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/TestVariantCalling.wdl + + - name: TestWholeGenomeGermlineSingleSample + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/TestWholeGenomeGermlineSingleSample.wdl + + - name: TestWholeGenomeReprocessing + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/TestWholeGenomeReprocessing.wdl + - name: VariantCalling subclass: WDL primaryDescriptorPath: /pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl @@ -134,3 +222,7 @@ workflows: - name: SlideTags subclass: WDL primaryDescriptorPath: /beta-pipelines/skylab/slidetags/SlideTags.wdl + + - name: TestATAC + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/TestATAC.wdl diff --git a/.github/workflows/test_atac.yml b/.github/workflows/test_atac.yml new file mode 100644 index 0000000000..6714e6b8cf --- /dev/null +++ b/.github/workflows/test_atac.yml @@ -0,0 +1,462 @@ +name: Test ATAC + +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + - 'pipelines/skylab/atac/**' + - 'tasks/skylab/MergeSortBam.wdl' + - 'tasks/skylab/FastqProcessing.wdl' + - 'tasks/skylab/PairedTagUtils.wdl' + - 'tasks/broad/Utilities.wdl' + - 'verification/VerifyATAC.wdl' + - 'verification/VerifyTasks.wdl' + - 'verification/test-wdls/TestATAC.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + - '.github/workflows/test_atac.yml' + + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + +env: + # pipeline configuration + PIPELINE_NAME: TestATAC + DOCKSTORE_PIPELINE_NAME: atac + PIPELINE_DIR: "pipelines/skylab/atac" + + # workspace configuration + TESTING_WORKSPACE: WARP Tests + WORKSPACE_NAMESPACE: warp-pipelines + + # service account configuration + SA_JSON_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + USER: pdt-tester@warp-pipeline-dev.iam.gserviceaccount.com + + +jobs: + TestATAC: + runs-on: ubuntu-latest + # Add "id-token" with the intended permissions. + permissions: + contents: 'read' + id-token: 'write' + + steps: + # Step 1: Checkout code + # Purpose: Clones the repository code at the specified reference + - uses: actions/checkout@v3 + with: + ref: ${{ github.ref }} + + # Step 2: Setup Python + # Purpose: Installs Python 3.11 for running pipeline scripts + - name: Set up python + id: setup-python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + # Step 3: Install Dependencies + # Purpose: Installs required Python packages for the pipeline + - name: Install dependencies + run: | + cd scripts/firecloud_api/ + pip install -r requirements.txt + + # Step 4: Set Branch Name + # Purpose: Determines and sets the correct branch name for either PR or direct commits + - name: Set Branch Name + id: set_branch + run: | + if [ -z "${{ github.head_ref }}" ]; then + echo "Branch name is missing, using ${GITHUB_REF##*/}" + echo "BRANCH_NAME=${GITHUB_REF##*/}" >> $GITHUB_ENV + else + echo "Branch name from PR: ${{ github.head_ref }}" + echo "BRANCH_NAME=${{ github.head_ref }}" >> $GITHUB_ENV + fi + + # Step 5: Set Test Type + # Purpose: Determines and sets the correct test type based on the branch name + - name: Set Test Type + id: set_test_type + run: | + if [ "${{ github.event_name }}" == "pull_request" ]; then + # For PRs, set based on target branch + if [ "${{ github.base_ref }}" == "master" ]; then + # If PR is targeting master branch, run Scientific tests + echo "testType=Scientific" >> $GITHUB_ENV + echo "testType=Scientific" + else + # If PR targets any other branch (develop, staging), run Plumbing tests + echo "testType=Plumbing" >> $GITHUB_ENV + echo "testType=Plumbing" + fi + else + # For manual workflow runs (workflow_dispatch) + echo "testType=${{ github.event.inputs.testType }}" >> $GITHUB_ENV + echo "testType=${{ github.event.inputs.testType }}" + fi + + # Step 6: Create Method Configuration + # Purpose: Sets up the testing configuration in Terra workspace + - name: Create new method configuration + run: | + echo "Creating new method configuration for branch: $BRANCH_NAME" + + METHOD_CONFIG_NAME=$(python3 scripts/firecloud_api/firecloud_api.py \ + create_new_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER") + + echo "METHOD_CONFIG_NAME=$METHOD_CONFIG_NAME" >> $GITHUB_ENV + + # Step 7: Cancel Previous Runs + # Purpose: Cancels previous GHA workflows from the same branch (regardless of plumbing or scientific test type) + # to avoid running multiple tests at the same time + - name: Cancel Previous GHA Runs + uses: styfle/cancel-workflow-action@0.11.0 + with: + access_token: ${{ github.token }} + all_but_latest: true + ignore_sha: true + + # Step 8: Cancel Previous Terra Submissions + # Purpose: Abort previous Terra submissions from the same branch to avoid running multiple tests at the same time + # Will not abort a Terra submission if it is a scientific test + - name: Cancel Previous Terra Submissions + if: ${{ !contains(env.METHOD_CONFIG_NAME, '_Scientific_') }} + run: | + python3 scripts/firecloud_api/firecloud_api.py \ + --workspace-namespace "${{ env.WORKSPACE_NAMESPACE }}" \ + --workspace-name "${{ env.TESTING_WORKSPACE }}" \ + --pipeline_name "${{ env.PIPELINE_NAME }}" \ + --branch_name "${{ env.BRANCH_NAME }}" \ + --sa-json-b64 "${{ secrets.PDT_TESTER_SA_B64 }}" \ + --user "${{ env.USER }}" \ + --test_type "$testType" \ + cancel_old_submissions + + # Step 9: Handle Git Commit Hash + # Purpose: Gets the correct Github commit hash for version tracking + - name: Determine Github Commit Hash + id: determine_github_commit_hash + run: | + if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then + echo "Using github.sha for manually triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.sha }}" >> $GITHUB_ENV + elif [ "${{ github.event_name }}" == "pull_request" ]; then + echo "Using github.event.pull_request.head.sha for PR-triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV + else + echo "Unsupported event type: ${{ github.event_name }}" + exit 1 + fi + + # Step 10: Compare Hashes + # Purpose: Compares the Dockstore and Github commit hashes to ensure they match + - name: Compare Dockstore and Github Commit Hashes with Retry + id: compare_hashes + run: | + + # Wait 5.5 minutes for Dockstore to update + sleep 330 + + MAX_WAIT_TIME=$((15 * 60)) # 15 minutes in seconds + WAIT_INTERVAL=60 # 1 minute in seconds + TOTAL_WAITED=0 + + echo "Starting hash comparison with retry mechanism..." + + while [ $TOTAL_WAITED -lt $MAX_WAIT_TIME ]; do + echo "Fetching Dockstore Commit Hash..." + DOCKSTORE_COMMIT_HASH=$(python scripts/dockstore_api/fetch_dockstore_commit.py \ + $DOCKSTORE_TOKEN \ + $DOCKSTORE_PIPELINE_NAME \ + $BRANCH_NAME) + echo "Fetched Dockstore Commit Hash: $DOCKSTORE_COMMIT_HASH" + + echo "GitHub Commit Hash: $GITHUB_COMMIT_HASH" + + if [ "$DOCKSTORE_COMMIT_HASH" == "$GITHUB_COMMIT_HASH" ]; then + echo "Success: The Dockstore Commit Hash matches the GitHub Commit Hash." + exit 0 + else + echo "Mismatch found: $DOCKSTORE_COMMIT_HASH != $GITHUB_COMMIT_HASH" + echo "Retrying in $WAIT_INTERVAL seconds..." + sleep $WAIT_INTERVAL + TOTAL_WAITED=$((TOTAL_WAITED + WAIT_INTERVAL)) + fi + done + + echo "Error: The Dockstore Commit Hash does not match the GitHub Commit Hash after 15 minutes of retries!" + exit 1 + + env: + GITHUB_COMMIT_HASH: ${{ env.GITHUB_COMMIT_HASH }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} + + # Step 11: Run Tests + # Purpose: Main testing step - runs the pipeline and collects results + - name: Update test inputs, Upload to Terra, Submit, Monitor and Retrieve Outputs + run: | + UPDATE_TRUTH="${{ github.event.inputs.updateTruth || 'false' }}" + USE_CALL_CACHE="${{ github.event.inputs.useCallCache || 'true' }}" + TRUTH_BRANCH="${{ github.event.inputs.truthBranch || 'master' }}" + CURRENT_TIME=$(date +"%Y-%m-%d-%H-%M-%S") + MAX_RETRIES=2 + RETRY_DELAY=300 # 300 seconds = 5 minutes + # Initialize variables to aggregate statuses and outputs + ALL_WORKFLOW_STATUSES="Workflow ID | Status"$'\n'"--- | ---" + ALL_OUTPUTS="" + # Initialize arrays to track submission and workflow statuses + declare -a SUBMISSION_IDS + declare -A WORKFLOW_STATUSES + OVERALL_SUCCESS=true + + # Convert UPDATE_TRUTH and USE_CALL_CACHE to a boolean-friendly format ("true" -> true, "false" -> false) + if [ "$UPDATE_TRUTH" = "true" ]; then + UPDATE_TRUTH_BOOL=true + else + UPDATE_TRUTH_BOOL=false + fi + + if [ "$USE_CALL_CACHE" == "true" ]; then + USE_CALL_CACHE_BOOL=true + else + USE_CALL_CACHE_BOOL=false + fi + + TEST_TYPE="${{ env.testType }}" + INPUTS_DIR="$PIPELINE_DIR/test_inputs/$TEST_TYPE" + echo "Running tests with test type: $TEST_TYPE" + + TRUTH_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/truth/$(echo "$TEST_TYPE" | tr '[:upper:]' '[:lower:]')/$TRUTH_BRANCH" + echo "Truth path: $TRUTH_PATH" + RESULTS_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/results/$CURRENT_TIME" + + + + # 1. Submit all jobs first and store their submission IDs + for input_file in "$INPUTS_DIR"/*.json; do + test_input_file=$(python3 scripts/firecloud_api/UpdateTestInputs.py --truth_path "$TRUTH_PATH" \ + --results_path "$RESULTS_PATH" \ + --inputs_json "$input_file" \ + --update_truth "$UPDATE_TRUTH_BOOL" \ + --branch_name "$BRANCH_NAME" ) + echo "Uploading the test input file: $test_input_file" + + # Create the submission_data.json file for this input_file + input_file_filename=$(basename $input_file) + SUBMISSION_DATA_FILE="submission_data.json" + printf '{ + "methodConfigurationNamespace": "%s", + "methodConfigurationName": "%s_%s_%s", + "useCallCache": %s, + "deleteIntermediateOutputFiles": false, + "useReferenceDisks": true, + "memoryRetryMultiplier": 1.2, + "workflowFailureMode": "NoNewCalls", + "userComment": "%s", + "ignoreEmptyOutputs": false + }' "$WORKSPACE_NAMESPACE" "$PIPELINE_NAME" "$TEST_TYPE" "$BRANCH_NAME" "$USE_CALL_CACHE_BOOL" "$input_file_filename" > "$SUBMISSION_DATA_FILE" + + echo "Created submission data file: $SUBMISSION_DATA_FILE" + cat "$SUBMISSION_DATA_FILE" + + # Upload test input file + python3 scripts/firecloud_api/firecloud_api.py \ + upload_test_inputs \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --test_input_file "$test_input_file" \ + --branch_name "$BRANCH_NAME" \ + --sa-json-b64 "$SA_JSON_B64" \ + --test_type "$TEST_TYPE" \ + --user "$USER" + + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + SUBMISSION_ID=$(python3 scripts/firecloud_api/firecloud_api.py submit_job \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --submission_data_file "$SUBMISSION_DATA_FILE") + + echo "Submission ID: $SUBMISSION_ID" + + if [[ "$SUBMISSION_ID" == *"404"* || -z "$SUBMISSION_ID" ]]; then + echo "Error in submission, retrying in $RETRY_DELAY seconds..." + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + echo "Submission successful. Submission ID: $SUBMISSION_ID" + SUBMISSION_IDS+=("$SUBMISSION_ID") + break + done + done + + echo "All jobs have been submitted. Starting to poll for statuses..." + + # Continue with polling and output retrieval... + + # 2. After all submissions are done, start polling for statuses of all jobs + for SUBMISSION_ID in "${SUBMISSION_IDS[@]}"; do + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + echo "Polling for Submission ID: $SUBMISSION_ID" + RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py poll_job_status \ + --submission_id "$SUBMISSION_ID" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE") + + if [ -z "$RESPONSE" ]; then + echo "Failed to retrieve Workflow IDs for submission: $SUBMISSION_ID" + OVERALL_SUCCESS=false + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + WORKFLOW_STATUSES_FOR_SUBMISSION=$(echo "$RESPONSE" | jq -r 'to_entries | map(.key + " | " + .value) | .[]') + WORKFLOW_STATUSES["$SUBMISSION_ID"]="$WORKFLOW_STATUSES_FOR_SUBMISSION" + + # Check if any workflow failed or errored + FAILED_WORKFLOWS=$(echo "$RESPONSE" | jq -r 'to_entries | .[] | select(.value == "Failed" or .value == "Aborted" or .value == "Aborting") | .key') + if [ ! -z "$FAILED_WORKFLOWS" ]; then + echo "Failed workflows detected:" + echo "$FAILED_WORKFLOWS" + OVERALL_SUCCESS=false + fi + + # retrieve workflow outputs + echo "Retrieving workflow outputs for Submission ID: $SUBMISSION_ID..." + for WORKFLOW_ID in $(echo "$RESPONSE" | jq -r 'keys[]'); do + WORKFLOW_OUTPUT=$(python3 scripts/firecloud_api/firecloud_api.py get_workflow_outputs \ + --user "$USER" \ + --sa-json-b64 "$SA_JSON_B64" \ + --submission_id "$SUBMISSION_ID" \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --workflow_id "$WORKFLOW_ID" \ + --pipeline_name "$PIPELINE_NAME") + ALL_OUTPUTS+="$WORKFLOW_OUTPUT"$'\n' + done + break + done + done + + # Generate the final summary after all processing is complete + FINAL_SUMMARY="## Combined Workflow Statuses\n\n" + + # Add all workflow statuses to the summary + for SUBMISSION_ID in "${!WORKFLOW_STATUSES[@]}"; do + # Generate the Terra URL for the submission + SUBMISSION_URL="https://app.terra.bio/#workspaces/$WORKSPACE_NAMESPACE/WARP%20Tests/job_history/$SUBMISSION_ID" + + # Add the Submission ID as a hyperlink + FINAL_SUMMARY+="[Submission ID: $SUBMISSION_ID]($SUBMISSION_URL)\n" + + # Add the workflows and statuses for this submission + FINAL_SUMMARY+="${WORKFLOW_STATUSES[$SUBMISSION_ID]}\n\n" + done + + # Write the complete summary once at the end + echo -e "$FINAL_SUMMARY" >> $GITHUB_STEP_SUMMARY + + # Exit with error if any workflows failed + if [ "$OVERALL_SUCCESS" = false ]; then + echo "" + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "One or more workflows failed in Terra. Check the workflow status summary for details." + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "" + exit 1 + fi + + + + # Step 12: Cleanup + # Purpose: Ensures cleanup of Terra method configurations regardless of test outcome + - name: Delete Method Configuration + if: always() # Ensures it runs regardless of success or failure + run: | + echo "Deleting method configuration for branch: $BRANCH_NAME" + DELETE_RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py delete_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --method_config_name "${PIPELINE_NAME}_${testType}_${BRANCH_NAME}") + echo "Delete response: $DELETE_RESPONSE" + if [ "$DELETE_RESPONSE" == "True" ]; then + echo "Method configuration deleted successfully." + else + echo "Error: Method configuration deletion failed." + exit 1 + fi + + + # Step 13: Print Summary on Success + # Purpose: Prints the final summary of the pipeline execution in case of success + - name: Print Summary on Success + if: success() + run: | + echo "# :white_check_mark: Pipeline Execution Summary :white_check_mark:" >> $GITHUB_STEP_SUMMARY + + # Step 14: Print Summary on Failure + # Purpose: Prints the final summary of the pipeline execution in case of failure + - name: Print Summary on Failure + if: failure() + run: | + echo "# :x: Pipeline Execution Summary (on Failure) :x: " >> $GITHUB_STEP_SUMMARY \ No newline at end of file diff --git a/.github/workflows/test_cram_to_unmapped_bams.yml b/.github/workflows/test_cram_to_unmapped_bams.yml new file mode 100644 index 0000000000..9a829d46f7 --- /dev/null +++ b/.github/workflows/test_cram_to_unmapped_bams.yml @@ -0,0 +1,464 @@ +name: Test CramToUnmappedBams + +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + - 'pipelines/broad/reprocessing/cram_to_unmapped_bams/**' + - 'verification/VerifyCramToUnmappedBams.wdl' + - 'verification/test-wdls/TestCramToUnmappedBams.wdl' + - 'tasks/broad/Utilities.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + - '.github/workflows/test_cram_to_unmapped_bams.yml' + + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + + +env: + # pipeline configuration + PIPELINE_NAME: TestCramToUnmappedBams + DOCKSTORE_PIPELINE_NAME: CramToUnmappedBams + PIPELINE_DIR: "pipelines/broad/reprocessing/cram_to_unmapped_bams" + + # workspace configuration + TESTING_WORKSPACE: WARP Tests + WORKSPACE_NAMESPACE: warp-pipelines + + # service account configuration + SA_JSON_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + USER: pdt-tester@warp-pipeline-dev.iam.gserviceaccount.com + + +jobs: + TestCramToUnmappedBams: + runs-on: ubuntu-latest + # Add "id-token" with the intended permissions. + permissions: + contents: 'read' + id-token: 'write' + actions: write + + steps: + # Step 1: Checkout code + # Purpose: Clones the repository code at the specified reference + - uses: actions/checkout@v3 + with: + ref: ${{ github.ref }} + + # Step 2: Setup Python + # Purpose: Installs Python 3.11 for running pipeline scripts + - name: Set up python + id: setup-python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + # Step 3: Install Dependencies + # Purpose: Installs required Python packages for the pipeline + - name: Install dependencies + run: | + cd scripts/firecloud_api/ + pip install -r requirements.txt + + # Step 4: Set Branch Name + # Purpose: Determines and sets the correct branch name for either PR or direct commits + - name: Set Branch Name + id: set_branch + run: | + if [ -z "${{ github.head_ref }}" ]; then + echo "Branch name is missing, using ${GITHUB_REF##*/}" + echo "BRANCH_NAME=${GITHUB_REF##*/}" >> $GITHUB_ENV + else + echo "Branch name from PR: ${{ github.head_ref }}" + echo "BRANCH_NAME=${{ github.head_ref }}" >> $GITHUB_ENV + fi + + # Step 5: Set Test Type + # Purpose: Determines and sets the correct test type based on the branch name + - name: Set Test Type + id: set_test_type + run: | + if [ "${{ github.event_name }}" == "pull_request" ]; then + # For PRs, set based on target branch + if [ "${{ github.base_ref }}" == "master" ]; then + # If PR is targeting master branch, run Scientific tests + echo "testType=Scientific" >> $GITHUB_ENV + echo "testType=Scientific" + else + # If PR targets any other branch (develop, staging), run Plumbing tests + echo "testType=Plumbing" >> $GITHUB_ENV + echo "testType=Plumbing" + fi + else + # For manual workflow runs (workflow_dispatch) + echo "testType=${{ github.event.inputs.testType }}" >> $GITHUB_ENV + echo "testType=${{ github.event.inputs.testType }}" + fi + + # Step 6: Create Method Configuration + # Purpose: Sets up the testing configuration in Terra workspace + - name: Create new method configuration + run: | + echo "Creating new method configuration for branch: $BRANCH_NAME" + + METHOD_CONFIG_NAME=$(python3 scripts/firecloud_api/firecloud_api.py \ + create_new_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER") + + echo "METHOD_CONFIG_NAME=$METHOD_CONFIG_NAME" >> $GITHUB_ENV + + # Step 7: Cancel Previous Runs + # Purpose: Cancels previous GHA workflows from the same branch (regardless of plumbing or scientific test type) + # to avoid running multiple tests at the same time + - name: Cancel Previous GHA Runs + uses: styfle/cancel-workflow-action@0.11.0 + with: + access_token: ${{ github.token }} + all_but_latest: true + ignore_sha: true + + # Step 8: Cancel Previous Terra Submissions + # Purpose: Abort previous Terra submissions from the same branch to avoid running multiple tests at the same time + # Will not abort a Terra submission if it is a scientific test + - name: Cancel Previous Terra Submissions + if: ${{ !contains(env.METHOD_CONFIG_NAME, '_Scientific_') }} + run: | + python3 scripts/firecloud_api/firecloud_api.py \ + --workspace-namespace "${{ env.WORKSPACE_NAMESPACE }}" \ + --workspace-name "${{ env.TESTING_WORKSPACE }}" \ + --pipeline_name "${{ env.PIPELINE_NAME }}" \ + --branch_name "${{ env.BRANCH_NAME }}" \ + --sa-json-b64 "${{ secrets.PDT_TESTER_SA_B64 }}" \ + --user "${{ env.USER }}" \ + --test_type "$testType" \ + cancel_old_submissions + + # Step 9: Handle Git Commit Hash + # Purpose: Gets the correct Github commit hash for version tracking + - name: Determine Github Commit Hash + id: determine_github_commit_hash + run: | + if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then + echo "Using github.sha for manually triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.sha }}" >> $GITHUB_ENV + elif [ "${{ github.event_name }}" == "pull_request" ]; then + echo "Using github.event.pull_request.head.sha for PR-triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV + else + echo "Unsupported event type: ${{ github.event_name }}" + exit 1 + fi + + # Step 10: Compare Hashes + # Purpose: Compares the Dockstore and Github commit hashes to ensure they match + - name: Compare Dockstore and Github Commit Hashes with Retry + id: compare_hashes + run: | + + # Wait 5.5 minutes for Dockstore to update + sleep 330 + + MAX_WAIT_TIME=$((15 * 60)) # 15 minutes in seconds + WAIT_INTERVAL=60 # 1 minute in seconds + TOTAL_WAITED=0 + + echo "Starting hash comparison with retry mechanism..." + + while [ $TOTAL_WAITED -lt $MAX_WAIT_TIME ]; do + echo "Fetching Dockstore Commit Hash..." + DOCKSTORE_COMMIT_HASH=$(python scripts/dockstore_api/fetch_dockstore_commit.py \ + $DOCKSTORE_TOKEN \ + $DOCKSTORE_PIPELINE_NAME \ + $BRANCH_NAME) + echo "Fetched Dockstore Commit Hash: $DOCKSTORE_COMMIT_HASH" + + echo "GitHub Commit Hash: $GITHUB_COMMIT_HASH" + + if [ "$DOCKSTORE_COMMIT_HASH" == "$GITHUB_COMMIT_HASH" ]; then + echo "Success: The Dockstore Commit Hash matches the GitHub Commit Hash." + exit 0 + else + echo "Mismatch found: $DOCKSTORE_COMMIT_HASH != $GITHUB_COMMIT_HASH" + echo "Retrying in $WAIT_INTERVAL seconds..." + sleep $WAIT_INTERVAL + TOTAL_WAITED=$((TOTAL_WAITED + WAIT_INTERVAL)) + fi + done + + echo "Error: The Dockstore Commit Hash does not match the GitHub Commit Hash after 15 minutes of retries!" + exit 1 + + env: + GITHUB_COMMIT_HASH: ${{ env.GITHUB_COMMIT_HASH }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} + + # Step 11: Run Tests + # Purpose: Main testing step - runs the pipeline and collects results + - name: Update test inputs, Upload to Terra, Submit, Monitor and Retrieve Outputs + run: | + UPDATE_TRUTH="${{ github.event.inputs.updateTruth || 'false' }}" + USE_CALL_CACHE="${{ github.event.inputs.useCallCache || 'true' }}" + TRUTH_BRANCH="${{ github.event.inputs.truthBranch || 'master' }}" + CURRENT_TIME=$(date +"%Y-%m-%d-%H-%M-%S") + MAX_RETRIES=2 + RETRY_DELAY=300 # 300 seconds = 5 minutes + # Initialize variables to aggregate statuses and outputs + ALL_WORKFLOW_STATUSES="Workflow ID | Status"$'\n'"--- | ---" + ALL_OUTPUTS="" + # Initialize arrays to track submission and workflow statuses + declare -a SUBMISSION_IDS + declare -A WORKFLOW_STATUSES + OVERALL_SUCCESS=true + + # Convert UPDATE_TRUTH and USE_CALL_CACHE to a boolean-friendly format ("true" -> true, "false" -> false) + if [ "$UPDATE_TRUTH" = "true" ]; then + UPDATE_TRUTH_BOOL=true + else + UPDATE_TRUTH_BOOL=false + fi + + if [ "$USE_CALL_CACHE" == "true" ]; then + USE_CALL_CACHE_BOOL=true + else + USE_CALL_CACHE_BOOL=false + fi + + TEST_TYPE="${{ env.testType }}" + INPUTS_DIR="$PIPELINE_DIR/test_inputs/$TEST_TYPE" + echo "Running tests with test type: $TEST_TYPE" + + TRUTH_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/truth/$(echo "$TEST_TYPE" | tr '[:upper:]' '[:lower:]')/$TRUTH_BRANCH" + echo "Truth path: $TRUTH_PATH" + RESULTS_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/results/$CURRENT_TIME" + + + + # 1. Submit all jobs first and store their submission IDs + for input_file in "$INPUTS_DIR"/*.json; do + test_input_file=$(python3 scripts/firecloud_api/UpdateTestInputs.py --truth_path "$TRUTH_PATH" \ + --results_path "$RESULTS_PATH" \ + --inputs_json "$input_file" \ + --update_truth "$UPDATE_TRUTH_BOOL" \ + --branch_name "$BRANCH_NAME" ) + echo "Uploading the test input file: $test_input_file" + + # Create the submission_data.json file for this input_file + input_file_filename=$(basename $input_file) + SUBMISSION_DATA_FILE="submission_data.json" + printf '{ + "methodConfigurationNamespace": "%s", + "methodConfigurationName": "%s_%s_%s", + "useCallCache": %s, + "deleteIntermediateOutputFiles": false, + "useReferenceDisks": true, + "memoryRetryMultiplier": 1.2, + "workflowFailureMode": "NoNewCalls", + "userComment": "%s", + "ignoreEmptyOutputs": false + }' "$WORKSPACE_NAMESPACE" "$PIPELINE_NAME" "$TEST_TYPE" "$BRANCH_NAME" "$USE_CALL_CACHE_BOOL" "$input_file_filename" > "$SUBMISSION_DATA_FILE" + + echo "Created submission data file: $SUBMISSION_DATA_FILE" + cat "$SUBMISSION_DATA_FILE" + + # Upload test input file + python3 scripts/firecloud_api/firecloud_api.py \ + upload_test_inputs \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --test_input_file "$test_input_file" \ + --branch_name "$BRANCH_NAME" \ + --sa-json-b64 "$SA_JSON_B64" \ + --test_type "$TEST_TYPE" \ + --user "$USER" + + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + SUBMISSION_ID=$(python3 scripts/firecloud_api/firecloud_api.py submit_job \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --submission_data_file "$SUBMISSION_DATA_FILE") + + echo "Submission ID: $SUBMISSION_ID" + + if [[ "$SUBMISSION_ID" == *"404"* || -z "$SUBMISSION_ID" ]]; then + echo "Error in submission, retrying in $RETRY_DELAY seconds..." + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + echo "Submission successful. Submission ID: $SUBMISSION_ID" + SUBMISSION_IDS+=("$SUBMISSION_ID") + break + done + done + + echo "All jobs have been submitted. Starting to poll for statuses..." + + # Continue with polling and output retrieval... + + # 2. After all submissions are done, start polling for statuses of all jobs + for SUBMISSION_ID in "${SUBMISSION_IDS[@]}"; do + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + echo "Polling for Submission ID: $SUBMISSION_ID" + RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py poll_job_status \ + --submission_id "$SUBMISSION_ID" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE") + + if [ -z "$RESPONSE" ]; then + echo "Failed to retrieve Workflow IDs for submission: $SUBMISSION_ID" + OVERALL_SUCCESS=false + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + WORKFLOW_STATUSES_FOR_SUBMISSION=$(echo "$RESPONSE" | jq -r 'to_entries | map(.key + " | " + .value) | .[]') + WORKFLOW_STATUSES["$SUBMISSION_ID"]="$WORKFLOW_STATUSES_FOR_SUBMISSION" + + # Check if any workflow failed or errored + FAILED_WORKFLOWS=$(echo "$RESPONSE" | jq -r 'to_entries | .[] | select(.value == "Failed" or .value == "Aborted" or .value == "Aborting") | .key') + if [ ! -z "$FAILED_WORKFLOWS" ]; then + echo "Failed workflows detected:" + echo "$FAILED_WORKFLOWS" + OVERALL_SUCCESS=false + fi + + # retrieve workflow outputs + echo "Retrieving workflow outputs for Submission ID: $SUBMISSION_ID..." + for WORKFLOW_ID in $(echo "$RESPONSE" | jq -r 'keys[]'); do + WORKFLOW_OUTPUT=$(python3 scripts/firecloud_api/firecloud_api.py get_workflow_outputs \ + --user "$USER" \ + --sa-json-b64 "$SA_JSON_B64" \ + --submission_id "$SUBMISSION_ID" \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --workflow_id "$WORKFLOW_ID" \ + --pipeline_name "$PIPELINE_NAME") + ALL_OUTPUTS+="$WORKFLOW_OUTPUT"$'\n' + done + break + done + done + + # Generate the final summary after all processing is complete + FINAL_SUMMARY="## Combined Workflow Statuses\n\n" + + # Add all workflow statuses to the summary + for SUBMISSION_ID in "${!WORKFLOW_STATUSES[@]}"; do + # Generate the Terra URL for the submission + SUBMISSION_URL="https://app.terra.bio/#workspaces/$WORKSPACE_NAMESPACE/WARP%20Tests/job_history/$SUBMISSION_ID" + + # Add the Submission ID as a hyperlink + FINAL_SUMMARY+="[Submission ID: $SUBMISSION_ID]($SUBMISSION_URL)\n" + + # Add the workflows and statuses for this submission + FINAL_SUMMARY+="${WORKFLOW_STATUSES[$SUBMISSION_ID]}\n\n" + done + + # Write the complete summary once at the end + echo -e "$FINAL_SUMMARY" >> $GITHUB_STEP_SUMMARY + + # Exit with error if any workflows failed + if [ "$OVERALL_SUCCESS" = false ]; then + echo "" + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "One or more workflows failed in Terra. Check the workflow status summary for details." + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "" + exit 1 + fi + + + + # Step 12: Cleanup + # Purpose: Ensures cleanup of Terra method configurations regardless of test outcome + - name: Delete Method Configuration + if: always() # Ensures it runs regardless of success or failure + run: | + echo "Deleting method configuration for branch: $BRANCH_NAME" + DELETE_RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py delete_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --method_config_name "${PIPELINE_NAME}_${testType}_${BRANCH_NAME}") + echo "Delete response: $DELETE_RESPONSE" + if [ "$DELETE_RESPONSE" == "True" ]; then + echo "Method configuration deleted successfully." + else + echo "Error: Method configuration deletion failed." + exit 1 + fi + + + # Step 13: Print Summary on Success + # Purpose: Prints the final summary of the pipeline execution in case of success + - name: Print Summary on Success + if: success() + run: | + echo "# :white_check_mark: Pipeline Execution Summary :white_check_mark:" >> $GITHUB_STEP_SUMMARY + + # Step 14: Print Summary on Failure + # Purpose: Prints the final summary of the pipeline execution in case of failure + - name: Print Summary on Failure + if: failure() + run: | + echo "# :x: Pipeline Execution Summary (on Failure) :x: " >> $GITHUB_STEP_SUMMARY \ No newline at end of file diff --git a/.github/workflows/test_exome_germline_single_sample.yml b/.github/workflows/test_exome_germline_single_sample.yml new file mode 100644 index 0000000000..40d544e984 --- /dev/null +++ b/.github/workflows/test_exome_germline_single_sample.yml @@ -0,0 +1,477 @@ +name: Test ExomeGermlineSingleSample + +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + - 'pipelines/broad/dna_seq/germline/single_sample/exome/**' + - 'tasks/broad/UnmappedBamToAlignedBam.wdl' + - 'tasks/broad/AggregatedBamQC.wdl' + - 'tasks/broad/Qc.wdl' + - 'tasks/broad/BamProcessing.wdl' + - 'tasks/broad/BamToCram.wdl' + - 'structs/dna_seq/DNASeqStructs.wdl' + - 'pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl' + - 'tasks/broad/GermlineVariantDiscovery.wdl' + - 'tasks/broad/Utilities.wdl' + - 'tasks/broad/DragenTasks.wdl' + - 'tasks/broad/Qc.wdl' + - 'tasks/broad/Utilities.wdl' + - 'verification/VerifyGermlineSingleSample.wdl' + - 'verification/VerifyMetrics.wdl' + - 'verification/VerifyTasks.wdl' + - 'verification/test-wdls/TestExomeGermlineSingleSample.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + - '.github/workflows/test_exome_germline_single_sample.yml' + + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + + +env: + # pipeline configuration + PIPELINE_NAME: TestExomeGermlineSingleSample + DOCKSTORE_PIPELINE_NAME: ExomeGermlineSingleSample + PIPELINE_DIR: "pipelines/broad/dna_seq/germline/single_sample/exome" + + # workspace configuration + TESTING_WORKSPACE: WARP Tests + WORKSPACE_NAMESPACE: warp-pipelines + + # service account configuration + SA_JSON_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + USER: pdt-tester@warp-pipeline-dev.iam.gserviceaccount.com + + +jobs: + TestExomeGermlineSingleSample: + runs-on: ubuntu-latest + # Add "id-token" with the intended permissions. + permissions: + contents: 'read' + id-token: 'write' + actions: write + + steps: + # Step 1: Checkout code + # Purpose: Clones the repository code at the specified reference + - uses: actions/checkout@v3 + with: + ref: ${{ github.ref }} + + # Step 2: Setup Python + # Purpose: Installs Python 3.11 for running pipeline scripts + - name: Set up python + id: setup-python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + # Step 3: Install Dependencies + # Purpose: Installs required Python packages for the pipeline + - name: Install dependencies + run: | + cd scripts/firecloud_api/ + pip install -r requirements.txt + + # Step 4: Set Branch Name + # Purpose: Determines and sets the correct branch name for either PR or direct commits + - name: Set Branch Name + id: set_branch + run: | + if [ -z "${{ github.head_ref }}" ]; then + echo "Branch name is missing, using ${GITHUB_REF##*/}" + echo "BRANCH_NAME=${GITHUB_REF##*/}" >> $GITHUB_ENV + else + echo "Branch name from PR: ${{ github.head_ref }}" + echo "BRANCH_NAME=${{ github.head_ref }}" >> $GITHUB_ENV + fi + + # Step 5: Set Test Type + # Purpose: Determines and sets the correct test type based on the branch name + - name: Set Test Type + id: set_test_type + run: | + if [ "${{ github.event_name }}" == "pull_request" ]; then + # For PRs, set based on target branch + if [ "${{ github.base_ref }}" == "master" ]; then + # If PR is targeting master branch, run Scientific tests + echo "testType=Scientific" >> $GITHUB_ENV + echo "testType=Scientific" + else + # If PR targets any other branch (develop, staging), run Plumbing tests + echo "testType=Plumbing" >> $GITHUB_ENV + echo "testType=Plumbing" + fi + else + # For manual workflow runs (workflow_dispatch) + echo "testType=${{ github.event.inputs.testType }}" >> $GITHUB_ENV + echo "testType=${{ github.event.inputs.testType }}" + fi + + # Step 6: Create Method Configuration + # Purpose: Sets up the testing configuration in Terra workspace + - name: Create new method configuration + run: | + echo "Creating new method configuration for branch: $BRANCH_NAME" + + METHOD_CONFIG_NAME=$(python3 scripts/firecloud_api/firecloud_api.py \ + create_new_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER") + + echo "METHOD_CONFIG_NAME=$METHOD_CONFIG_NAME" >> $GITHUB_ENV + + # Step 7: Cancel Previous Runs + # Purpose: Cancels previous GHA workflows from the same branch (regardless of plumbing or scientific test type) + # to avoid running multiple tests at the same time + - name: Cancel Previous GHA Runs + uses: styfle/cancel-workflow-action@0.11.0 + with: + access_token: ${{ github.token }} + all_but_latest: true + ignore_sha: true + + # Step 8: Cancel Previous Terra Submissions + # Purpose: Abort previous Terra submissions from the same branch to avoid running multiple tests at the same time + # Will not abort a Terra submission if it is a scientific test + - name: Cancel Previous Terra Submissions + if: ${{ !contains(env.METHOD_CONFIG_NAME, '_Scientific_') }} + run: | + python3 scripts/firecloud_api/firecloud_api.py \ + --workspace-namespace "${{ env.WORKSPACE_NAMESPACE }}" \ + --workspace-name "${{ env.TESTING_WORKSPACE }}" \ + --pipeline_name "${{ env.PIPELINE_NAME }}" \ + --branch_name "${{ env.BRANCH_NAME }}" \ + --sa-json-b64 "${{ secrets.PDT_TESTER_SA_B64 }}" \ + --user "${{ env.USER }}" \ + --test_type "$testType" \ + cancel_old_submissions + + # Step 9: Handle Git Commit Hash + # Purpose: Gets the correct Github commit hash for version tracking + - name: Determine Github Commit Hash + id: determine_github_commit_hash + run: | + if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then + echo "Using github.sha for manually triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.sha }}" >> $GITHUB_ENV + elif [ "${{ github.event_name }}" == "pull_request" ]; then + echo "Using github.event.pull_request.head.sha for PR-triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV + else + echo "Unsupported event type: ${{ github.event_name }}" + exit 1 + fi + + # Step 10: Compare Hashes + # Purpose: Compares the Dockstore and Github commit hashes to ensure they match + - name: Compare Dockstore and Github Commit Hashes with Retry + id: compare_hashes + run: | + + # Wait 5.5 minutes for Dockstore to update + sleep 330 + + MAX_WAIT_TIME=$((15 * 60)) # 15 minutes in seconds + WAIT_INTERVAL=60 # 1 minute in seconds + TOTAL_WAITED=0 + + echo "Starting hash comparison with retry mechanism..." + + while [ $TOTAL_WAITED -lt $MAX_WAIT_TIME ]; do + echo "Fetching Dockstore Commit Hash..." + DOCKSTORE_COMMIT_HASH=$(python scripts/dockstore_api/fetch_dockstore_commit.py \ + $DOCKSTORE_TOKEN \ + $DOCKSTORE_PIPELINE_NAME \ + $BRANCH_NAME) + echo "Fetched Dockstore Commit Hash: $DOCKSTORE_COMMIT_HASH" + + echo "GitHub Commit Hash: $GITHUB_COMMIT_HASH" + + if [ "$DOCKSTORE_COMMIT_HASH" == "$GITHUB_COMMIT_HASH" ]; then + echo "Success: The Dockstore Commit Hash matches the GitHub Commit Hash." + exit 0 + else + echo "Mismatch found: $DOCKSTORE_COMMIT_HASH != $GITHUB_COMMIT_HASH" + echo "Retrying in $WAIT_INTERVAL seconds..." + sleep $WAIT_INTERVAL + TOTAL_WAITED=$((TOTAL_WAITED + WAIT_INTERVAL)) + fi + done + + echo "Error: The Dockstore Commit Hash does not match the GitHub Commit Hash after 15 minutes of retries!" + exit 1 + + env: + GITHUB_COMMIT_HASH: ${{ env.GITHUB_COMMIT_HASH }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} + + # Step 11: Run Tests + # Purpose: Main testing step - runs the pipeline and collects results + - name: Update test inputs, Upload to Terra, Submit, Monitor and Retrieve Outputs + run: | + UPDATE_TRUTH="${{ github.event.inputs.updateTruth || 'false' }}" + USE_CALL_CACHE="${{ github.event.inputs.useCallCache || 'true' }}" + TRUTH_BRANCH="${{ github.event.inputs.truthBranch || 'master' }}" + CURRENT_TIME=$(date +"%Y-%m-%d-%H-%M-%S") + MAX_RETRIES=2 + RETRY_DELAY=300 # 300 seconds = 5 minutes + # Initialize variables to aggregate statuses and outputs + ALL_WORKFLOW_STATUSES="Workflow ID | Status"$'\n'"--- | ---" + ALL_OUTPUTS="" + # Initialize arrays to track submission and workflow statuses + declare -a SUBMISSION_IDS + declare -A WORKFLOW_STATUSES + OVERALL_SUCCESS=true + + # Convert UPDATE_TRUTH and USE_CALL_CACHE to a boolean-friendly format ("true" -> true, "false" -> false) + if [ "$UPDATE_TRUTH" = "true" ]; then + UPDATE_TRUTH_BOOL=true + else + UPDATE_TRUTH_BOOL=false + fi + + if [ "$USE_CALL_CACHE" == "true" ]; then + USE_CALL_CACHE_BOOL=true + else + USE_CALL_CACHE_BOOL=false + fi + + TEST_TYPE="${{ env.testType }}" + INPUTS_DIR="$PIPELINE_DIR/test_inputs/$TEST_TYPE" + echo "Running tests with test type: $TEST_TYPE" + + TRUTH_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/truth/$(echo "$TEST_TYPE" | tr '[:upper:]' '[:lower:]')/$TRUTH_BRANCH" + echo "Truth path: $TRUTH_PATH" + RESULTS_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/results/$CURRENT_TIME" + + + + # 1. Submit all jobs first and store their submission IDs + for input_file in "$INPUTS_DIR"/*.json; do + test_input_file=$(python3 scripts/firecloud_api/UpdateTestInputs.py --truth_path "$TRUTH_PATH" \ + --results_path "$RESULTS_PATH" \ + --inputs_json "$input_file" \ + --update_truth "$UPDATE_TRUTH_BOOL" \ + --branch_name "$BRANCH_NAME" ) + echo "Uploading the test input file: $test_input_file" + + # Create the submission_data.json file for this input_file + input_file_filename=$(basename $input_file) + SUBMISSION_DATA_FILE="submission_data.json" + printf '{ + "methodConfigurationNamespace": "%s", + "methodConfigurationName": "%s_%s_%s", + "useCallCache": %s, + "deleteIntermediateOutputFiles": false, + "useReferenceDisks": true, + "memoryRetryMultiplier": 1.2, + "workflowFailureMode": "NoNewCalls", + "userComment": "%s", + "ignoreEmptyOutputs": false + }' "$WORKSPACE_NAMESPACE" "$PIPELINE_NAME" "$TEST_TYPE" "$BRANCH_NAME" "$USE_CALL_CACHE_BOOL" "$input_file_filename" > "$SUBMISSION_DATA_FILE" + + echo "Created submission data file: $SUBMISSION_DATA_FILE" + cat "$SUBMISSION_DATA_FILE" + + # Upload test input file + python3 scripts/firecloud_api/firecloud_api.py \ + upload_test_inputs \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --test_input_file "$test_input_file" \ + --branch_name "$BRANCH_NAME" \ + --sa-json-b64 "$SA_JSON_B64" \ + --test_type "$TEST_TYPE" \ + --user "$USER" + + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + SUBMISSION_ID=$(python3 scripts/firecloud_api/firecloud_api.py submit_job \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --submission_data_file "$SUBMISSION_DATA_FILE") + + echo "Submission ID: $SUBMISSION_ID" + + if [[ "$SUBMISSION_ID" == *"404"* || -z "$SUBMISSION_ID" ]]; then + echo "Error in submission, retrying in $RETRY_DELAY seconds..." + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + echo "Submission successful. Submission ID: $SUBMISSION_ID" + SUBMISSION_IDS+=("$SUBMISSION_ID") + break + done + done + + echo "All jobs have been submitted. Starting to poll for statuses..." + + # Continue with polling and output retrieval... + + # 2. After all submissions are done, start polling for statuses of all jobs + for SUBMISSION_ID in "${SUBMISSION_IDS[@]}"; do + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + echo "Polling for Submission ID: $SUBMISSION_ID" + RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py poll_job_status \ + --submission_id "$SUBMISSION_ID" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE") + + if [ -z "$RESPONSE" ]; then + echo "Failed to retrieve Workflow IDs for submission: $SUBMISSION_ID" + OVERALL_SUCCESS=false + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + WORKFLOW_STATUSES_FOR_SUBMISSION=$(echo "$RESPONSE" | jq -r 'to_entries | map(.key + " | " + .value) | .[]') + WORKFLOW_STATUSES["$SUBMISSION_ID"]="$WORKFLOW_STATUSES_FOR_SUBMISSION" + + # Check if any workflow failed or errored + FAILED_WORKFLOWS=$(echo "$RESPONSE" | jq -r 'to_entries | .[] | select(.value == "Failed" or .value == "Aborted" or .value == "Aborting") | .key') + if [ ! -z "$FAILED_WORKFLOWS" ]; then + echo "Failed workflows detected:" + echo "$FAILED_WORKFLOWS" + OVERALL_SUCCESS=false + fi + + # retrieve workflow outputs + echo "Retrieving workflow outputs for Submission ID: $SUBMISSION_ID..." + for WORKFLOW_ID in $(echo "$RESPONSE" | jq -r 'keys[]'); do + WORKFLOW_OUTPUT=$(python3 scripts/firecloud_api/firecloud_api.py get_workflow_outputs \ + --user "$USER" \ + --sa-json-b64 "$SA_JSON_B64" \ + --submission_id "$SUBMISSION_ID" \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --workflow_id "$WORKFLOW_ID" \ + --pipeline_name "$PIPELINE_NAME") + ALL_OUTPUTS+="$WORKFLOW_OUTPUT"$'\n' + done + break + done + done + + # Generate the final summary after all processing is complete + FINAL_SUMMARY="## Combined Workflow Statuses\n\n" + + # Add all workflow statuses to the summary + for SUBMISSION_ID in "${!WORKFLOW_STATUSES[@]}"; do + # Generate the Terra URL for the submission + SUBMISSION_URL="https://app.terra.bio/#workspaces/$WORKSPACE_NAMESPACE/WARP%20Tests/job_history/$SUBMISSION_ID" + + # Add the Submission ID as a hyperlink + FINAL_SUMMARY+="[Submission ID: $SUBMISSION_ID]($SUBMISSION_URL)\n" + + # Add the workflows and statuses for this submission + FINAL_SUMMARY+="${WORKFLOW_STATUSES[$SUBMISSION_ID]}\n\n" + done + + # Write the complete summary once at the end + echo -e "$FINAL_SUMMARY" >> $GITHUB_STEP_SUMMARY + + # Exit with error if any workflows failed + if [ "$OVERALL_SUCCESS" = false ]; then + echo "" + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "One or more workflows failed in Terra. Check the workflow status summary for details." + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "" + exit 1 + fi + + + + # Step 12: Cleanup + # Purpose: Ensures cleanup of Terra method configurations regardless of test outcome + - name: Delete Method Configuration + if: always() # Ensures it runs regardless of success or failure + run: | + echo "Deleting method configuration for branch: $BRANCH_NAME" + DELETE_RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py delete_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --method_config_name "${PIPELINE_NAME}_${testType}_${BRANCH_NAME}") + echo "Delete response: $DELETE_RESPONSE" + if [ "$DELETE_RESPONSE" == "True" ]; then + echo "Method configuration deleted successfully." + else + echo "Error: Method configuration deletion failed." + exit 1 + fi + + + # Step 13: Print Summary on Success + # Purpose: Prints the final summary of the pipeline execution in case of success + - name: Print Summary on Success + if: success() + run: | + echo "# :white_check_mark: Pipeline Execution Summary :white_check_mark:" >> $GITHUB_STEP_SUMMARY + + # Step 14: Print Summary on Failure + # Purpose: Prints the final summary of the pipeline execution in case of failure + - name: Print Summary on Failure + if: failure() + run: | + echo "# :x: Pipeline Execution Summary (on Failure) :x: " >> $GITHUB_STEP_SUMMARY \ No newline at end of file diff --git a/.github/workflows/test_exome_reprocessing.yml b/.github/workflows/test_exome_reprocessing.yml new file mode 100644 index 0000000000..06d456cba8 --- /dev/null +++ b/.github/workflows/test_exome_reprocessing.yml @@ -0,0 +1,480 @@ +name: Test ExomeReprocessing + +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + - 'pipelines/broad/reprocessing/exome/**' + - 'pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.wdl' + - 'pipelines/broad/reprocessing/cram_to_unmapped_bams/CramToUnmappedBams.wdl' + - 'tasks/broad/UnmappedBamToAlignedBam.wdl' + - 'tasks/broad/AggregatedBamQC.wdl' + - 'tasks/broad/Qc.wdl' + - 'tasks/broad/BamProcessing.wdl' + - 'tasks/broad/BamToCram.wdl' + - 'structs/dna_seq/DNASeqStructs.wdl' + - 'pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl' + - 'tasks/broad/GermlineVariantDiscovery.wdl' + - 'tasks/broad/Utilities.wdl' + - 'tasks/broad/DragenTasks.wdl' + - 'tasks/broad/Qc.wdl' + - 'tasks/broad/Utilities.wdl' + - 'verification/VerifyExomeReprocessing.wdl' + - 'verification/VerifyGermlineSingleSample.wdl' + - 'verification/test-wdls/TestCramToUnmappedBams.wdl' + - 'verification/VerifyMetrics.wdl' + - 'verification/VerifyTasks.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + - '.github/workflows/test_exome_reprocessing.yml' + + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + + +env: + # pipeline configuration + PIPELINE_NAME: TestExomeReprocessing + DOCKSTORE_PIPELINE_NAME: ExomeReprocessing + PIPELINE_DIR: "pipelines/broad/reprocessing/exome" + + # workspace configuration + TESTING_WORKSPACE: WARP Tests + WORKSPACE_NAMESPACE: warp-pipelines + + # service account configuration + SA_JSON_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + USER: pdt-tester@warp-pipeline-dev.iam.gserviceaccount.com + + +jobs: + TestExomeReprocessing: + runs-on: ubuntu-latest + # Add "id-token" with the intended permissions. + permissions: + contents: 'read' + id-token: 'write' + actions: write + + steps: + # Step 1: Checkout code + # Purpose: Clones the repository code at the specified reference + - uses: actions/checkout@v3 + with: + ref: ${{ github.ref }} + + # Step 2: Setup Python + # Purpose: Installs Python 3.11 for running pipeline scripts + - name: Set up python + id: setup-python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + # Step 3: Install Dependencies + # Purpose: Installs required Python packages for the pipeline + - name: Install dependencies + run: | + cd scripts/firecloud_api/ + pip install -r requirements.txt + + # Step 4: Set Branch Name + # Purpose: Determines and sets the correct branch name for either PR or direct commits + - name: Set Branch Name + id: set_branch + run: | + if [ -z "${{ github.head_ref }}" ]; then + echo "Branch name is missing, using ${GITHUB_REF##*/}" + echo "BRANCH_NAME=${GITHUB_REF##*/}" >> $GITHUB_ENV + else + echo "Branch name from PR: ${{ github.head_ref }}" + echo "BRANCH_NAME=${{ github.head_ref }}" >> $GITHUB_ENV + fi + + # Step 5: Set Test Type + # Purpose: Determines and sets the correct test type based on the branch name + - name: Set Test Type + id: set_test_type + run: | + if [ "${{ github.event_name }}" == "pull_request" ]; then + # For PRs, set based on target branch + if [ "${{ github.base_ref }}" == "master" ]; then + # If PR is targeting master branch, run Scientific tests + echo "testType=Scientific" >> $GITHUB_ENV + echo "testType=Scientific" + else + # If PR targets any other branch (develop, staging), run Plumbing tests + echo "testType=Plumbing" >> $GITHUB_ENV + echo "testType=Plumbing" + fi + else + # For manual workflow runs (workflow_dispatch) + echo "testType=${{ github.event.inputs.testType }}" >> $GITHUB_ENV + echo "testType=${{ github.event.inputs.testType }}" + fi + + # Step 6: Create Method Configuration + # Purpose: Sets up the testing configuration in Terra workspace + - name: Create new method configuration + run: | + echo "Creating new method configuration for branch: $BRANCH_NAME" + + METHOD_CONFIG_NAME=$(python3 scripts/firecloud_api/firecloud_api.py \ + create_new_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER") + + echo "METHOD_CONFIG_NAME=$METHOD_CONFIG_NAME" >> $GITHUB_ENV + + # Step 7: Cancel Previous Runs + # Purpose: Cancels previous GHA workflows from the same branch (regardless of plumbing or scientific test type) + # to avoid running multiple tests at the same time + - name: Cancel Previous GHA Runs + uses: styfle/cancel-workflow-action@0.11.0 + with: + access_token: ${{ github.token }} + all_but_latest: true + ignore_sha: true + + # Step 8: Cancel Previous Terra Submissions + # Purpose: Abort previous Terra submissions from the same branch to avoid running multiple tests at the same time + # Will not abort a Terra submission if it is a scientific test + - name: Cancel Previous Terra Submissions + if: ${{ !contains(env.METHOD_CONFIG_NAME, '_Scientific_') }} + run: | + python3 scripts/firecloud_api/firecloud_api.py \ + --workspace-namespace "${{ env.WORKSPACE_NAMESPACE }}" \ + --workspace-name "${{ env.TESTING_WORKSPACE }}" \ + --pipeline_name "${{ env.PIPELINE_NAME }}" \ + --branch_name "${{ env.BRANCH_NAME }}" \ + --sa-json-b64 "${{ secrets.PDT_TESTER_SA_B64 }}" \ + --user "${{ env.USER }}" \ + --test_type "$testType" \ + cancel_old_submissions + + # Step 9: Handle Git Commit Hash + # Purpose: Gets the correct Github commit hash for version tracking + - name: Determine Github Commit Hash + id: determine_github_commit_hash + run: | + if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then + echo "Using github.sha for manually triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.sha }}" >> $GITHUB_ENV + elif [ "${{ github.event_name }}" == "pull_request" ]; then + echo "Using github.event.pull_request.head.sha for PR-triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV + else + echo "Unsupported event type: ${{ github.event_name }}" + exit 1 + fi + + # Step 10: Compare Hashes + # Purpose: Compares the Dockstore and Github commit hashes to ensure they match + - name: Compare Dockstore and Github Commit Hashes with Retry + id: compare_hashes + run: | + + # Wait 5.5 minutes for Dockstore to update + sleep 330 + + MAX_WAIT_TIME=$((15 * 60)) # 15 minutes in seconds + WAIT_INTERVAL=60 # 1 minute in seconds + TOTAL_WAITED=0 + + echo "Starting hash comparison with retry mechanism..." + + while [ $TOTAL_WAITED -lt $MAX_WAIT_TIME ]; do + echo "Fetching Dockstore Commit Hash..." + DOCKSTORE_COMMIT_HASH=$(python scripts/dockstore_api/fetch_dockstore_commit.py \ + $DOCKSTORE_TOKEN \ + $DOCKSTORE_PIPELINE_NAME \ + $BRANCH_NAME) + echo "Fetched Dockstore Commit Hash: $DOCKSTORE_COMMIT_HASH" + + echo "GitHub Commit Hash: $GITHUB_COMMIT_HASH" + + if [ "$DOCKSTORE_COMMIT_HASH" == "$GITHUB_COMMIT_HASH" ]; then + echo "Success: The Dockstore Commit Hash matches the GitHub Commit Hash." + exit 0 + else + echo "Mismatch found: $DOCKSTORE_COMMIT_HASH != $GITHUB_COMMIT_HASH" + echo "Retrying in $WAIT_INTERVAL seconds..." + sleep $WAIT_INTERVAL + TOTAL_WAITED=$((TOTAL_WAITED + WAIT_INTERVAL)) + fi + done + + echo "Error: The Dockstore Commit Hash does not match the GitHub Commit Hash after 15 minutes of retries!" + exit 1 + + env: + GITHUB_COMMIT_HASH: ${{ env.GITHUB_COMMIT_HASH }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} + + # Step 11: Run Tests + # Purpose: Main testing step - runs the pipeline and collects results + - name: Update test inputs, Upload to Terra, Submit, Monitor and Retrieve Outputs + run: | + UPDATE_TRUTH="${{ github.event.inputs.updateTruth || 'false' }}" + USE_CALL_CACHE="${{ github.event.inputs.useCallCache || 'true' }}" + TRUTH_BRANCH="${{ github.event.inputs.truthBranch || 'master' }}" + CURRENT_TIME=$(date +"%Y-%m-%d-%H-%M-%S") + MAX_RETRIES=2 + RETRY_DELAY=300 # 300 seconds = 5 minutes + # Initialize variables to aggregate statuses and outputs + ALL_WORKFLOW_STATUSES="Workflow ID | Status"$'\n'"--- | ---" + ALL_OUTPUTS="" + # Initialize arrays to track submission and workflow statuses + declare -a SUBMISSION_IDS + declare -A WORKFLOW_STATUSES + OVERALL_SUCCESS=true + + # Convert UPDATE_TRUTH and USE_CALL_CACHE to a boolean-friendly format ("true" -> true, "false" -> false) + if [ "$UPDATE_TRUTH" = "true" ]; then + UPDATE_TRUTH_BOOL=true + else + UPDATE_TRUTH_BOOL=false + fi + + if [ "$USE_CALL_CACHE" == "true" ]; then + USE_CALL_CACHE_BOOL=true + else + USE_CALL_CACHE_BOOL=false + fi + + TEST_TYPE="${{ env.testType }}" + INPUTS_DIR="$PIPELINE_DIR/test_inputs/$TEST_TYPE" + echo "Running tests with test type: $TEST_TYPE" + + TRUTH_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/truth/$(echo "$TEST_TYPE" | tr '[:upper:]' '[:lower:]')/$TRUTH_BRANCH" + echo "Truth path: $TRUTH_PATH" + RESULTS_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/results/$CURRENT_TIME" + + + + # 1. Submit all jobs first and store their submission IDs + for input_file in "$INPUTS_DIR"/*.json; do + test_input_file=$(python3 scripts/firecloud_api/UpdateTestInputs.py --truth_path "$TRUTH_PATH" \ + --results_path "$RESULTS_PATH" \ + --inputs_json "$input_file" \ + --update_truth "$UPDATE_TRUTH_BOOL" \ + --branch_name "$BRANCH_NAME" ) + echo "Uploading the test input file: $test_input_file" + + # Create the submission_data.json file for this input_file + input_file_filename=$(basename $input_file) + SUBMISSION_DATA_FILE="submission_data.json" + printf '{ + "methodConfigurationNamespace": "%s", + "methodConfigurationName": "%s_%s_%s", + "useCallCache": %s, + "deleteIntermediateOutputFiles": false, + "useReferenceDisks": true, + "memoryRetryMultiplier": 1.2, + "workflowFailureMode": "NoNewCalls", + "userComment": "%s", + "ignoreEmptyOutputs": false + }' "$WORKSPACE_NAMESPACE" "$PIPELINE_NAME" "$TEST_TYPE" "$BRANCH_NAME" "$USE_CALL_CACHE_BOOL" "$input_file_filename" > "$SUBMISSION_DATA_FILE" + + echo "Created submission data file: $SUBMISSION_DATA_FILE" + cat "$SUBMISSION_DATA_FILE" + + # Upload test input file + python3 scripts/firecloud_api/firecloud_api.py \ + upload_test_inputs \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --test_input_file "$test_input_file" \ + --branch_name "$BRANCH_NAME" \ + --sa-json-b64 "$SA_JSON_B64" \ + --test_type "$TEST_TYPE" \ + --user "$USER" + + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + SUBMISSION_ID=$(python3 scripts/firecloud_api/firecloud_api.py submit_job \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --submission_data_file "$SUBMISSION_DATA_FILE") + + echo "Submission ID: $SUBMISSION_ID" + + if [[ "$SUBMISSION_ID" == *"404"* || -z "$SUBMISSION_ID" ]]; then + echo "Error in submission, retrying in $RETRY_DELAY seconds..." + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + echo "Submission successful. Submission ID: $SUBMISSION_ID" + SUBMISSION_IDS+=("$SUBMISSION_ID") + break + done + done + + echo "All jobs have been submitted. Starting to poll for statuses..." + + # Continue with polling and output retrieval... + + # 2. After all submissions are done, start polling for statuses of all jobs + for SUBMISSION_ID in "${SUBMISSION_IDS[@]}"; do + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + echo "Polling for Submission ID: $SUBMISSION_ID" + RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py poll_job_status \ + --submission_id "$SUBMISSION_ID" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE") + + if [ -z "$RESPONSE" ]; then + echo "Failed to retrieve Workflow IDs for submission: $SUBMISSION_ID" + OVERALL_SUCCESS=false + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + WORKFLOW_STATUSES_FOR_SUBMISSION=$(echo "$RESPONSE" | jq -r 'to_entries | map(.key + " | " + .value) | .[]') + WORKFLOW_STATUSES["$SUBMISSION_ID"]="$WORKFLOW_STATUSES_FOR_SUBMISSION" + + # Check if any workflow failed or errored + FAILED_WORKFLOWS=$(echo "$RESPONSE" | jq -r 'to_entries | .[] | select(.value == "Failed" or .value == "Aborted" or .value == "Aborting") | .key') + if [ ! -z "$FAILED_WORKFLOWS" ]; then + echo "Failed workflows detected:" + echo "$FAILED_WORKFLOWS" + OVERALL_SUCCESS=false + fi + + # retrieve workflow outputs + echo "Retrieving workflow outputs for Submission ID: $SUBMISSION_ID..." + for WORKFLOW_ID in $(echo "$RESPONSE" | jq -r 'keys[]'); do + WORKFLOW_OUTPUT=$(python3 scripts/firecloud_api/firecloud_api.py get_workflow_outputs \ + --user "$USER" \ + --sa-json-b64 "$SA_JSON_B64" \ + --submission_id "$SUBMISSION_ID" \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --workflow_id "$WORKFLOW_ID" \ + --pipeline_name "$PIPELINE_NAME") + ALL_OUTPUTS+="$WORKFLOW_OUTPUT"$'\n' + done + break + done + done + + # Generate the final summary after all processing is complete + FINAL_SUMMARY="## Combined Workflow Statuses\n\n" + + # Add all workflow statuses to the summary + for SUBMISSION_ID in "${!WORKFLOW_STATUSES[@]}"; do + # Generate the Terra URL for the submission + SUBMISSION_URL="https://app.terra.bio/#workspaces/$WORKSPACE_NAMESPACE/WARP%20Tests/job_history/$SUBMISSION_ID" + + # Add the Submission ID as a hyperlink + FINAL_SUMMARY+="[Submission ID: $SUBMISSION_ID]($SUBMISSION_URL)\n" + + # Add the workflows and statuses for this submission + FINAL_SUMMARY+="${WORKFLOW_STATUSES[$SUBMISSION_ID]}\n\n" + done + + # Write the complete summary once at the end + echo -e "$FINAL_SUMMARY" >> $GITHUB_STEP_SUMMARY + + # Exit with error if any workflows failed + if [ "$OVERALL_SUCCESS" = false ]; then + echo "" + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "One or more workflows failed in Terra. Check the workflow status summary for details." + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "" + exit 1 + fi + + + + # Step 12: Cleanup + # Purpose: Ensures cleanup of Terra method configurations regardless of test outcome + - name: Delete Method Configuration + if: always() # Ensures it runs regardless of success or failure + run: | + echo "Deleting method configuration for branch: $BRANCH_NAME" + DELETE_RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py delete_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --method_config_name "${PIPELINE_NAME}_${testType}_${BRANCH_NAME}") + echo "Delete response: $DELETE_RESPONSE" + if [ "$DELETE_RESPONSE" == "True" ]; then + echo "Method configuration deleted successfully." + else + echo "Error: Method configuration deletion failed." + exit 1 + fi + + + # Step 13: Print Summary on Success + # Purpose: Prints the final summary of the pipeline execution in case of success + - name: Print Summary on Success + if: success() + run: | + echo "# :white_check_mark: Pipeline Execution Summary :white_check_mark:" >> $GITHUB_STEP_SUMMARY + + # Step 14: Print Summary on Failure + # Purpose: Prints the final summary of the pipeline execution in case of failure + - name: Print Summary on Failure + if: failure() + run: | + echo "# :x: Pipeline Execution Summary (on Failure) :x: " >> $GITHUB_STEP_SUMMARY \ No newline at end of file diff --git a/.github/workflows/test_illumina_genotyping_array.yml b/.github/workflows/test_illumina_genotyping_array.yml index e1774240bb..2c63e50e80 100644 --- a/.github/workflows/test_illumina_genotyping_array.yml +++ b/.github/workflows/test_illumina_genotyping_array.yml @@ -1,20 +1,24 @@ - name: Test Illumina Genotyping Array # Controls when the workflow will run on: - #run on push to feature branch "kp_GHA_Terra_auth_PD-2682" - REMOVE WHEN DONE TESTING - # push: - # branches: - # - kp_GHA_Terra_auth_PD-2682 pull_request: branches: [ "develop", "staging", "master" ] - # Only run if files in these paths changed: pipelines/broad/genotyping/illumina, tasks, verification, .github/workflows/test_illumina_genotyping_array.yml + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### paths: - 'pipelines/broad/genotyping/illumina/**' - - 'tasks/**' - - 'verification/**' + - 'tasks/broad/IlluminaGenotypingArrayTasks.wdl' + - 'tasks/broad/Qc.wdl' + - 'verification/VerifyIlluminaGenotypingArray.wdl' + - 'verification/test-wdls/TestIlluminaGenotypingArray.wdl' + - 'tasks/broad/Utilities.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' - '.github/workflows/test_illumina_genotyping_array.yml' + + # Allows you to run this workflow manually from the Actions tab workflow_dispatch: inputs: @@ -22,170 +26,440 @@ on: description: 'Use call cache (default: true)' required: false default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + env: - PROJECT_NAME: WARP - # Github repo name - REPOSITORY_NAME: ${{ github.event.repository.name }} + # pipeline configuration + PIPELINE_NAME: TestIlluminaGenotypingArray + DOCKSTORE_PIPELINE_NAME: IlluminaGenotypingArray + PIPELINE_DIR: "pipelines/broad/genotyping/illumina" + + # workspace configuration + TESTING_WORKSPACE: WARP Tests + WORKSPACE_NAMESPACE: warp-pipelines + + # service account configuration + SA_JSON_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + USER: pdt-tester@warp-pipeline-dev.iam.gserviceaccount.com + jobs: - run_pipeline: + TestIlluminaGenotypingArray: runs-on: ubuntu-latest # Add "id-token" with the intended permissions. permissions: contents: 'read' id-token: 'write' + actions: write steps: - # actions/checkout MUST come before auth - - uses: 'actions/checkout@v3' - - - id: 'auth' - name: 'Authenticate to Google Cloud' - uses: 'google-github-actions/auth@v2' - with: - token_format: 'access_token' - # Centralized in dsp-tools-k8s; ask in #dsp-devops-champions for help troubleshooting - # This is provided by the DevOps team - do not change! - workload_identity_provider: 'projects/1038484894585/locations/global/workloadIdentityPools/github-wi-pool/providers/github-wi-provider' - # This is our tester service account - service_account: 'pdt-tester@warp-pipeline-dev.iam.gserviceaccount.com' - access_token_lifetime: '3600' #seconds, default is 3600 - access_token_scopes: 'profile, email, openid' - - # ... further steps are automatically authenticated - - name: Check working directory - run: | - echo "Current directory:" - pwd - ls -lht - - - name: Submit job, poll status, and get outputs - id: pipeline_run - run: | - # Set these environment variables - TOKEN="${{ steps.auth.outputs.access_token }}" - NAMESPACE="warp-pipelines" - WORKSPACE="WARP Tests" - PIPELINE_NAME="IlluminaGenotypingArray" - USE_CALL_CACHE="${{ github.event.inputs.useCallCache }}" - - # Function to call the Firecloud API using the firecloud_api.py script - firecloud_action() { - python3 scripts/firecloud_api/firecloud_api.py --token "$TOKEN" --namespace "$NAMESPACE" --workspace "$WORKSPACE" --action "$1" "${@:2}" - } - - # Create the submission_data.json file - SUBMISSION_DATA_FILE="submission_data.json" - # Convert USE_CALL_CACHE to a boolean-friendly format ("true" -> true, "false" -> false) - if [ "$USE_CALL_CACHE" = "true" ]; then + # Step 1: Checkout code + # Purpose: Clones the repository code at the specified reference + - uses: actions/checkout@v3 + with: + ref: ${{ github.ref }} + + # Step 2: Setup Python + # Purpose: Installs Python 3.11 for running pipeline scripts + - name: Set up python + id: setup-python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + # Step 3: Install Dependencies + # Purpose: Installs required Python packages for the pipeline + - name: Install dependencies + run: | + cd scripts/firecloud_api/ + pip install -r requirements.txt + + # Step 4: Set Branch Name + # Purpose: Determines and sets the correct branch name for either PR or direct commits + - name: Set Branch Name + id: set_branch + run: | + if [ -z "${{ github.head_ref }}" ]; then + echo "Branch name is missing, using ${GITHUB_REF##*/}" + echo "BRANCH_NAME=${GITHUB_REF##*/}" >> $GITHUB_ENV + else + echo "Branch name from PR: ${{ github.head_ref }}" + echo "BRANCH_NAME=${{ github.head_ref }}" >> $GITHUB_ENV + fi + + # Step 5: Set Test Type + # Purpose: Determines and sets the correct test type based on the branch name + - name: Set Test Type + id: set_test_type + run: | + if [ "${{ github.event_name }}" == "pull_request" ]; then + # For PRs, set based on target branch + if [ "${{ github.base_ref }}" == "master" ]; then + # If PR is targeting master branch, run Scientific tests + echo "testType=Scientific" >> $GITHUB_ENV + echo "testType=Scientific" + else + # If PR targets any other branch (develop, staging), run Plumbing tests + echo "testType=Plumbing" >> $GITHUB_ENV + echo "testType=Plumbing" + fi + else + # For manual workflow runs (workflow_dispatch) + echo "testType=${{ github.event.inputs.testType }}" >> $GITHUB_ENV + echo "testType=${{ github.event.inputs.testType }}" + fi + + # Step 6: Create Method Configuration + # Purpose: Sets up the testing configuration in Terra workspace + - name: Create new method configuration + run: | + echo "Creating new method configuration for branch: $BRANCH_NAME" + + METHOD_CONFIG_NAME=$(python3 scripts/firecloud_api/firecloud_api.py \ + create_new_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER") + + echo "METHOD_CONFIG_NAME=$METHOD_CONFIG_NAME" >> $GITHUB_ENV + + # Step 7: Cancel Previous Runs + # Purpose: Cancels previous GHA workflows from the same branch (regardless of plumbing or scientific test type) + # to avoid running multiple tests at the same time + - name: Cancel Previous GHA Runs + uses: styfle/cancel-workflow-action@0.11.0 + with: + access_token: ${{ github.token }} + all_but_latest: true + ignore_sha: true + + # Step 8: Cancel Previous Terra Submissions + # Purpose: Abort previous Terra submissions from the same branch to avoid running multiple tests at the same time + # Will not abort a Terra submission if it is a scientific test + - name: Cancel Previous Terra Submissions + if: ${{ !contains(env.METHOD_CONFIG_NAME, '_Scientific_') }} + run: | + python3 scripts/firecloud_api/firecloud_api.py \ + --workspace-namespace "${{ env.WORKSPACE_NAMESPACE }}" \ + --workspace-name "${{ env.TESTING_WORKSPACE }}" \ + --pipeline_name "${{ env.PIPELINE_NAME }}" \ + --branch_name "${{ env.BRANCH_NAME }}" \ + --sa-json-b64 "${{ secrets.PDT_TESTER_SA_B64 }}" \ + --user "${{ env.USER }}" \ + --test_type "$testType" \ + cancel_old_submissions + + # Step 9: Handle Git Commit Hash + # Purpose: Gets the correct Github commit hash for version tracking + - name: Determine Github Commit Hash + id: determine_github_commit_hash + run: | + if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then + echo "Using github.sha for manually triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.sha }}" >> $GITHUB_ENV + elif [ "${{ github.event_name }}" == "pull_request" ]; then + echo "Using github.event.pull_request.head.sha for PR-triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV + else + echo "Unsupported event type: ${{ github.event_name }}" + exit 1 + fi + + # Step 10: Compare Hashes + # Purpose: Compares the Dockstore and Github commit hashes to ensure they match + - name: Compare Dockstore and Github Commit Hashes with Retry + id: compare_hashes + run: | + + # Wait 5.5 minutes for Dockstore to update + sleep 330 + + MAX_WAIT_TIME=$((15 * 60)) # 15 minutes in seconds + WAIT_INTERVAL=60 # 1 minute in seconds + TOTAL_WAITED=0 + + echo "Starting hash comparison with retry mechanism..." + + while [ $TOTAL_WAITED -lt $MAX_WAIT_TIME ]; do + echo "Fetching Dockstore Commit Hash..." + DOCKSTORE_COMMIT_HASH=$(python scripts/dockstore_api/fetch_dockstore_commit.py \ + $DOCKSTORE_TOKEN \ + $DOCKSTORE_PIPELINE_NAME \ + $BRANCH_NAME) + echo "Fetched Dockstore Commit Hash: $DOCKSTORE_COMMIT_HASH" + + echo "GitHub Commit Hash: $GITHUB_COMMIT_HASH" + + if [ "$DOCKSTORE_COMMIT_HASH" == "$GITHUB_COMMIT_HASH" ]; then + echo "Success: The Dockstore Commit Hash matches the GitHub Commit Hash." + exit 0 + else + echo "Mismatch found: $DOCKSTORE_COMMIT_HASH != $GITHUB_COMMIT_HASH" + echo "Retrying in $WAIT_INTERVAL seconds..." + sleep $WAIT_INTERVAL + TOTAL_WAITED=$((TOTAL_WAITED + WAIT_INTERVAL)) + fi + done + + echo "Error: The Dockstore Commit Hash does not match the GitHub Commit Hash after 15 minutes of retries!" + exit 1 + + env: + GITHUB_COMMIT_HASH: ${{ env.GITHUB_COMMIT_HASH }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} + + # Step 11: Run Tests + # Purpose: Main testing step - runs the pipeline and collects results + - name: Update test inputs, Upload to Terra, Submit, Monitor and Retrieve Outputs + run: | + UPDATE_TRUTH="${{ github.event.inputs.updateTruth || 'false' }}" + USE_CALL_CACHE="${{ github.event.inputs.useCallCache || 'true' }}" + TRUTH_BRANCH="${{ github.event.inputs.truthBranch || 'master' }}" + CURRENT_TIME=$(date +"%Y-%m-%d-%H-%M-%S") + MAX_RETRIES=2 + RETRY_DELAY=300 # 300 seconds = 5 minutes + # Initialize variables to aggregate statuses and outputs + ALL_WORKFLOW_STATUSES="Workflow ID | Status"$'\n'"--- | ---" + ALL_OUTPUTS="" + # Initialize arrays to track submission and workflow statuses + declare -a SUBMISSION_IDS + declare -A WORKFLOW_STATUSES + OVERALL_SUCCESS=true + + # Convert UPDATE_TRUTH and USE_CALL_CACHE to a boolean-friendly format ("true" -> true, "false" -> false) + if [ "$UPDATE_TRUTH" = "true" ]; then + UPDATE_TRUTH_BOOL=true + else + UPDATE_TRUTH_BOOL=false + fi + + if [ "$USE_CALL_CACHE" == "true" ]; then USE_CALL_CACHE_BOOL=true else USE_CALL_CACHE_BOOL=false fi - # Use a heredoc to generate the JSON file content dynamically - cat < "$SUBMISSION_DATA_FILE" - { - "methodConfigurationNamespace": "warp-pipelines", - "methodConfigurationName": "$PIPELINE_NAME", - "useCallCache": $USE_CALL_CACHE_BOOL, - "deleteIntermediateOutputFiles": true, - "useReferenceDisks": true, - "memoryRetryMultiplier": 1.2, - "workflowFailureMode": "NoNewCalls", - "userComment": "Automated submission", - "ignoreEmptyOutputs": false - } - EOF - - echo "Created submission data file: $SUBMISSION_DATA_FILE" + + TEST_TYPE="${{ env.testType }}" + INPUTS_DIR="$PIPELINE_DIR/test_inputs/$TEST_TYPE" + echo "Running tests with test type: $TEST_TYPE" + + TRUTH_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/truth/$(echo "$TEST_TYPE" | tr '[:upper:]' '[:lower:]')/$TRUTH_BRANCH" + echo "Truth path: $TRUTH_PATH" + RESULTS_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/results/$CURRENT_TIME" + - # 1. Submit a new workflow using the generated submission_data.json - SUBMISSION_ID=$(firecloud_action submit --submission_data_file "$SUBMISSION_DATA_FILE") + + # 1. Submit all jobs first and store their submission IDs + for input_file in "$INPUTS_DIR"/*.json; do + test_input_file=$(python3 scripts/firecloud_api/UpdateTestInputs.py --truth_path "$TRUTH_PATH" \ + --results_path "$RESULTS_PATH" \ + --inputs_json "$input_file" \ + --update_truth "$UPDATE_TRUTH_BOOL" \ + --branch_name "$BRANCH_NAME" ) + echo "Uploading the test input file: $test_input_file" - # Check if submission was successful - if [ -z "$SUBMISSION_ID" ]; then - echo "Submission failed." # Log failure to stdout - echo "submission_id=" >> $GITHUB_OUTPUT # Set empty submission id - exit 1 - fi + # Create the submission_data.json file for this input_file + input_file_filename=$(basename $input_file) + SUBMISSION_DATA_FILE="submission_data.json" + printf '{ + "methodConfigurationNamespace": "%s", + "methodConfigurationName": "%s_%s_%s", + "useCallCache": %s, + "deleteIntermediateOutputFiles": false, + "useReferenceDisks": true, + "memoryRetryMultiplier": 1.2, + "workflowFailureMode": "NoNewCalls", + "userComment": "%s", + "ignoreEmptyOutputs": false + }' "$WORKSPACE_NAMESPACE" "$PIPELINE_NAME" "$TEST_TYPE" "$BRANCH_NAME" "$USE_CALL_CACHE_BOOL" "$input_file_filename" > "$SUBMISSION_DATA_FILE" + + echo "Created submission data file: $SUBMISSION_DATA_FILE" + cat "$SUBMISSION_DATA_FILE" - echo "Submission ID: $SUBMISSION_ID" - echo "submission_id=$SUBMISSION_ID" >> $GITHUB_OUTPUT # Write the submission ID to GITHUB_OUTPUT + # Upload test input file + python3 scripts/firecloud_api/firecloud_api.py \ + upload_test_inputs \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --test_input_file "$test_input_file" \ + --branch_name "$BRANCH_NAME" \ + --sa-json-b64 "$SA_JSON_B64" \ + --test_type "$TEST_TYPE" \ + --user "$USER" - # 2. Poll submission status and get workflow IDs and statuses - echo "Polling submission status..." - RESPONSE=$(firecloud_action poll_status --submission_id "$SUBMISSION_ID") + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + SUBMISSION_ID=$(python3 scripts/firecloud_api/firecloud_api.py submit_job \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --submission_data_file "$SUBMISSION_DATA_FILE") - # Parse the JSON response to get the workflow ID and statuses - echo "Workflows and their statuses:" - echo "$RESPONSE" | jq + echo "Submission ID: $SUBMISSION_ID" - # Check if RESPONSE is empty - if [ -z "$RESPONSE" ]; then - echo "Failed to retrieve Workflow IDs." # Log failure to stdout - exit 1 - fi + if [[ "$SUBMISSION_ID" == *"404"* || -z "$SUBMISSION_ID" ]]; then + echo "Error in submission, retrying in $RETRY_DELAY seconds..." + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi - # Extract workflows and their statuses - WORKFLOW_STATUSES=$(echo "$RESPONSE" | jq -r 'to_entries | map(.key + ": " + .value) | .[]') - echo "workflow_statuses=$WORKFLOW_STATUSES" >> $GITHUB_OUTPUT # Write workflow statuses to GITHUB_OUTPUT + echo "Submission successful. Submission ID: $SUBMISSION_ID" + SUBMISSION_IDS+=("$SUBMISSION_ID") + break + done + done - # Generate markdown summary table for workflows and statuses - WORKFLOW_TABLE=$(echo "$RESPONSE" | jq -r 'to_entries | ["Workflow ID | Status", "--- | ---"] + map(.key + " | " + .value) | .[]') + echo "All jobs have been submitted. Starting to poll for statuses..." + + # Continue with polling and output retrieval... + + # 2. After all submissions are done, start polling for statuses of all jobs + for SUBMISSION_ID in "${SUBMISSION_IDS[@]}"; do + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + echo "Polling for Submission ID: $SUBMISSION_ID" + RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py poll_job_status \ + --submission_id "$SUBMISSION_ID" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE") + + if [ -z "$RESPONSE" ]; then + echo "Failed to retrieve Workflow IDs for submission: $SUBMISSION_ID" + OVERALL_SUCCESS=false + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + WORKFLOW_STATUSES_FOR_SUBMISSION=$(echo "$RESPONSE" | jq -r 'to_entries | map(.key + " | " + .value) | .[]') + WORKFLOW_STATUSES["$SUBMISSION_ID"]="$WORKFLOW_STATUSES_FOR_SUBMISSION" - # Print workflow table to stdout - echo "$WORKFLOW_TABLE" - - # 3. Iterate over the Workflow IDs to get outputs - OUTPUTS="" - echo "Retrieving workflow outputs..." - for WORKFLOW_ID in $(echo "$RESPONSE" | jq -r 'keys[]'); do - WORKFLOW_OUTPUT=$(firecloud_action get_outputs --submission_id "$SUBMISSION_ID" --workflow_id "$WORKFLOW_ID" --pipeline_name "$PIPELINE_NAME") - OUTPUTS+="$WORKFLOW_OUTPUT"$'\n' + # Check if any workflow failed or errored + FAILED_WORKFLOWS=$(echo "$RESPONSE" | jq -r 'to_entries | .[] | select(.value == "Failed" or .value == "Aborted" or .value == "Aborting") | .key') + if [ ! -z "$FAILED_WORKFLOWS" ]; then + echo "Failed workflows detected:" + echo "$FAILED_WORKFLOWS" + OVERALL_SUCCESS=false + fi + + # retrieve workflow outputs + echo "Retrieving workflow outputs for Submission ID: $SUBMISSION_ID..." + for WORKFLOW_ID in $(echo "$RESPONSE" | jq -r 'keys[]'); do + WORKFLOW_OUTPUT=$(python3 scripts/firecloud_api/firecloud_api.py get_workflow_outputs \ + --user "$USER" \ + --sa-json-b64 "$SA_JSON_B64" \ + --submission_id "$SUBMISSION_ID" \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --workflow_id "$WORKFLOW_ID" \ + --pipeline_name "$PIPELINE_NAME") + ALL_OUTPUTS+="$WORKFLOW_OUTPUT"$'\n' + done + break + done done - echo "Workflow outputs retrieved successfully." - echo "Raw output before jq:" - echo "$OUTPUTS" - echo "outputs=$OUTPUTS" >> $GITHUB_OUTPUT # Write the outputs to GITHUB_OUTPUT - - # Handle null values, strings, and numbers in the outputs by converting everything to a string and replacing null with '-' - OUTPUTS_TABLE=$(echo "$OUTPUTS" | jq -r 'to_entries | ["Output | Value", "--- | ---"] + map(.key + " | " + (if .value == null then "-" else (.value | tostring) end)) | .[]') - #print outputs table to stdout - echo "$OUTPUTS_TABLE" - - - name: Print Summary on Success - if: success() - run: | + + # Generate the final summary after all processing is complete + FINAL_SUMMARY="## Combined Workflow Statuses\n\n" + + # Add all workflow statuses to the summary + for SUBMISSION_ID in "${!WORKFLOW_STATUSES[@]}"; do + # Generate the Terra URL for the submission + SUBMISSION_URL="https://app.terra.bio/#workspaces/$WORKSPACE_NAMESPACE/WARP%20Tests/job_history/$SUBMISSION_ID" + + # Add the Submission ID as a hyperlink + FINAL_SUMMARY+="[Submission ID: $SUBMISSION_ID]($SUBMISSION_URL)\n" + + # Add the workflows and statuses for this submission + FINAL_SUMMARY+="${WORKFLOW_STATUSES[$SUBMISSION_ID]}\n\n" + done + + # Write the complete summary once at the end + echo -e "$FINAL_SUMMARY" >> $GITHUB_STEP_SUMMARY + + # Exit with error if any workflows failed + if [ "$OVERALL_SUCCESS" = false ]; then + echo "" + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "One or more workflows failed in Terra. Check the workflow status summary for details." + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "" + exit 1 + fi + + + + # Step 12: Cleanup + # Purpose: Ensures cleanup of Terra method configurations regardless of test outcome + - name: Delete Method Configuration + if: always() # Ensures it runs regardless of success or failure + run: | + echo "Deleting method configuration for branch: $BRANCH_NAME" + DELETE_RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py delete_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --method_config_name "${PIPELINE_NAME}_${testType}_${BRANCH_NAME}") + echo "Delete response: $DELETE_RESPONSE" + if [ "$DELETE_RESPONSE" == "True" ]; then + echo "Method configuration deleted successfully." + else + echo "Error: Method configuration deletion failed." + exit 1 + fi + + + # Step 13: Print Summary on Success + # Purpose: Prints the final summary of the pipeline execution in case of success + - name: Print Summary on Success + if: success() + run: | echo "# :white_check_mark: Pipeline Execution Summary :white_check_mark:" >> $GITHUB_STEP_SUMMARY - echo "- **Pipeline Name**: IlluminaGenotypingArray" >> $GITHUB_STEP_SUMMARY - echo "- **Submission ID**: ${{ steps.pipeline_run.outputs.submission_id }}" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - - echo "## Workflows and their statuses" >> $GITHUB_STEP_SUMMARY - echo "\`\`\`" >> $GITHUB_STEP_SUMMARY - echo "${{ steps.pipeline_run.outputs.workflow_statuses }}" >> $GITHUB_STEP_SUMMARY - echo "\`\`\`" >> $GITHUB_STEP_SUMMARY - - echo "## Workflow Outputs" >> $GITHUB_STEP_SUMMARY - echo "\`\`\`" >> $GITHUB_STEP_SUMMARY - echo "${{ steps.pipeline_run.outputs.outputs }}" >> $GITHUB_STEP_SUMMARY - echo "\`\`\`" >> $GITHUB_STEP_SUMMARY - echo " :shipit: " >> $GITHUB_STEP_SUMMARY - - - name: Print Summary on Failure - if: failure() - run: | - echo "# :x: Pipeline Execution Summary (on Failure) :x: " >> $GITHUB_STEP_SUMMARY - echo "- **Pipeline Name**: IlluminaGenotypingArray" >> $GITHUB_STEP_SUMMARY - echo "- **Submission ID**: ${{ steps.pipeline_run.outputs.submission_id }}" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - - echo "## Workflows and their statuses (if available)" >> $GITHUB_STEP_SUMMARY - echo "\`\`\`" >> $GITHUB_STEP_SUMMARY - echo "${{ steps.pipeline_run.outputs.workflow_statuses }}" >> $GITHUB_STEP_SUMMARY - echo "\`\`\`" >> $GITHUB_STEP_SUMMARY - - echo "## Workflow Outputs (if available)" >> $GITHUB_STEP_SUMMARY - echo "\`\`\`" >> $GITHUB_STEP_SUMMARY - echo "${{ steps.pipeline_run.outputs.outputs }}" >> $GITHUB_STEP_SUMMARY - echo "\`\`\`" >> $GITHUB_STEP_SUMMARY \ No newline at end of file + + # Step 14: Print Summary on Failure + # Purpose: Prints the final summary of the pipeline execution in case of failure + - name: Print Summary on Failure + if: failure() + run: | + echo "# :x: Pipeline Execution Summary (on Failure) :x: " >> $GITHUB_STEP_SUMMARY \ No newline at end of file diff --git a/.github/workflows/test_imputation.yml b/.github/workflows/test_imputation.yml new file mode 100644 index 0000000000..0352a07db4 --- /dev/null +++ b/.github/workflows/test_imputation.yml @@ -0,0 +1,465 @@ +name: Test Imputation + +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + - 'pipelines/broad/arrays/imputation/**' + - 'structs/imputation/ImputationStructs.wdl' + - 'tasks/broad/ImputationTasks.wdl' + - 'verification/VerifyImputation.wdl' + - 'verification/test-wdls/TestImputation.wdl' + - 'tasks/broad/Utilities.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + - '.github/workflows/test_imputation.yml' + + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + +env: + # pipeline configuration + PIPELINE_NAME: TestImputation + DOCKSTORE_PIPELINE_NAME: Imputation + PIPELINE_DIR: "pipelines/broad/arrays/imputation" + + # workspace configuration + TESTING_WORKSPACE: WARP Tests + WORKSPACE_NAMESPACE: warp-pipelines + + # service account configuration + SA_JSON_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + USER: pdt-tester@warp-pipeline-dev.iam.gserviceaccount.com + + +jobs: + TestImputation: + runs-on: ubuntu-latest + # Add "id-token" with the intended permissions. + permissions: + contents: 'read' + id-token: 'write' + actions: write + + steps: + # Step 1: Checkout code + # Purpose: Clones the repository code at the specified reference + - uses: actions/checkout@v3 + with: + ref: ${{ github.ref }} + + # Step 2: Setup Python + # Purpose: Installs Python 3.11 for running pipeline scripts + - name: Set up python + id: setup-python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + # Step 3: Install Dependencies + # Purpose: Installs required Python packages for the pipeline + - name: Install dependencies + run: | + cd scripts/firecloud_api/ + pip install -r requirements.txt + + # Step 4: Set Branch Name + # Purpose: Determines and sets the correct branch name for either PR or direct commits + - name: Set Branch Name + id: set_branch + run: | + if [ -z "${{ github.head_ref }}" ]; then + echo "Branch name is missing, using ${GITHUB_REF##*/}" + echo "BRANCH_NAME=${GITHUB_REF##*/}" >> $GITHUB_ENV + else + echo "Branch name from PR: ${{ github.head_ref }}" + echo "BRANCH_NAME=${{ github.head_ref }}" >> $GITHUB_ENV + fi + + # Step 5: Set Test Type + # Purpose: Determines and sets the correct test type based on the branch name + - name: Set Test Type + id: set_test_type + run: | + if [ "${{ github.event_name }}" == "pull_request" ]; then + # For PRs, set based on target branch + if [ "${{ github.base_ref }}" == "master" ]; then + # If PR is targeting master branch, run Scientific tests + echo "testType=Scientific" >> $GITHUB_ENV + echo "testType=Scientific" + else + # If PR targets any other branch (develop, staging), run Plumbing tests + echo "testType=Plumbing" >> $GITHUB_ENV + echo "testType=Plumbing" + fi + else + # For manual workflow runs (workflow_dispatch) + echo "testType=${{ github.event.inputs.testType }}" >> $GITHUB_ENV + echo "testType=${{ github.event.inputs.testType }}" + fi + + # Step 6: Create Method Configuration + # Purpose: Sets up the testing configuration in Terra workspace + - name: Create new method configuration + run: | + echo "Creating new method configuration for branch: $BRANCH_NAME" + + METHOD_CONFIG_NAME=$(python3 scripts/firecloud_api/firecloud_api.py \ + create_new_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER") + + echo "METHOD_CONFIG_NAME=$METHOD_CONFIG_NAME" >> $GITHUB_ENV + + # Step 7: Cancel Previous Runs + # Purpose: Cancels previous GHA workflows from the same branch (regardless of plumbing or scientific test type) + # to avoid running multiple tests at the same time + - name: Cancel Previous GHA Runs + uses: styfle/cancel-workflow-action@0.11.0 + with: + access_token: ${{ github.token }} + all_but_latest: true + ignore_sha: true + + # Step 8: Cancel Previous Terra Submissions + # Purpose: Abort previous Terra submissions from the same branch to avoid running multiple tests at the same time + # Will not abort a Terra submission if it is a scientific test + - name: Cancel Previous Terra Submissions + if: ${{ !contains(env.METHOD_CONFIG_NAME, '_Scientific_') }} + run: | + python3 scripts/firecloud_api/firecloud_api.py \ + --workspace-namespace "${{ env.WORKSPACE_NAMESPACE }}" \ + --workspace-name "${{ env.TESTING_WORKSPACE }}" \ + --pipeline_name "${{ env.PIPELINE_NAME }}" \ + --branch_name "${{ env.BRANCH_NAME }}" \ + --sa-json-b64 "${{ secrets.PDT_TESTER_SA_B64 }}" \ + --user "${{ env.USER }}" \ + --test_type "$testType" \ + cancel_old_submissions + + # Step 9: Handle Git Commit Hash + # Purpose: Gets the correct Github commit hash for version tracking + - name: Determine Github Commit Hash + id: determine_github_commit_hash + run: | + if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then + echo "Using github.sha for manually triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.sha }}" >> $GITHUB_ENV + elif [ "${{ github.event_name }}" == "pull_request" ]; then + echo "Using github.event.pull_request.head.sha for PR-triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV + else + echo "Unsupported event type: ${{ github.event_name }}" + exit 1 + fi + + # Step 10: Compare Hashes + # Purpose: Compares the Dockstore and Github commit hashes to ensure they match + - name: Compare Dockstore and Github Commit Hashes with Retry + id: compare_hashes + run: | + + # Wait 5.5 minutes for Dockstore to update + sleep 330 + + MAX_WAIT_TIME=$((15 * 60)) # 15 minutes in seconds + WAIT_INTERVAL=60 # 1 minute in seconds + TOTAL_WAITED=0 + + echo "Starting hash comparison with retry mechanism..." + + while [ $TOTAL_WAITED -lt $MAX_WAIT_TIME ]; do + echo "Fetching Dockstore Commit Hash..." + DOCKSTORE_COMMIT_HASH=$(python scripts/dockstore_api/fetch_dockstore_commit.py \ + $DOCKSTORE_TOKEN \ + $DOCKSTORE_PIPELINE_NAME \ + $BRANCH_NAME) + echo "Fetched Dockstore Commit Hash: $DOCKSTORE_COMMIT_HASH" + + echo "GitHub Commit Hash: $GITHUB_COMMIT_HASH" + + if [ "$DOCKSTORE_COMMIT_HASH" == "$GITHUB_COMMIT_HASH" ]; then + echo "Success: The Dockstore Commit Hash matches the GitHub Commit Hash." + exit 0 + else + echo "Mismatch found: $DOCKSTORE_COMMIT_HASH != $GITHUB_COMMIT_HASH" + echo "Retrying in $WAIT_INTERVAL seconds..." + sleep $WAIT_INTERVAL + TOTAL_WAITED=$((TOTAL_WAITED + WAIT_INTERVAL)) + fi + done + + echo "Error: The Dockstore Commit Hash does not match the GitHub Commit Hash after 15 minutes of retries!" + exit 1 + + env: + GITHUB_COMMIT_HASH: ${{ env.GITHUB_COMMIT_HASH }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} + + # Step 11: Run Tests + # Purpose: Main testing step - runs the pipeline and collects results + - name: Update test inputs, Upload to Terra, Submit, Monitor and Retrieve Outputs + run: | + UPDATE_TRUTH="${{ github.event.inputs.updateTruth || 'false' }}" + USE_CALL_CACHE="${{ github.event.inputs.useCallCache || 'true' }}" + TRUTH_BRANCH="${{ github.event.inputs.truthBranch || 'master' }}" + CURRENT_TIME=$(date +"%Y-%m-%d-%H-%M-%S") + MAX_RETRIES=2 + RETRY_DELAY=300 # 300 seconds = 5 minutes + # Initialize variables to aggregate statuses and outputs + ALL_WORKFLOW_STATUSES="Workflow ID | Status"$'\n'"--- | ---" + ALL_OUTPUTS="" + # Initialize arrays to track submission and workflow statuses + declare -a SUBMISSION_IDS + declare -A WORKFLOW_STATUSES + OVERALL_SUCCESS=true + + # Convert UPDATE_TRUTH and USE_CALL_CACHE to a boolean-friendly format ("true" -> true, "false" -> false) + if [ "$UPDATE_TRUTH" = "true" ]; then + UPDATE_TRUTH_BOOL=true + else + UPDATE_TRUTH_BOOL=false + fi + + if [ "$USE_CALL_CACHE" == "true" ]; then + USE_CALL_CACHE_BOOL=true + else + USE_CALL_CACHE_BOOL=false + fi + + TEST_TYPE="${{ env.testType }}" + INPUTS_DIR="$PIPELINE_DIR/test_inputs/$TEST_TYPE" + echo "Running tests with test type: $TEST_TYPE" + + TRUTH_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/truth/$(echo "$TEST_TYPE" | tr '[:upper:]' '[:lower:]')/$TRUTH_BRANCH" + echo "Truth path: $TRUTH_PATH" + RESULTS_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/results/$CURRENT_TIME" + + + + # 1. Submit all jobs first and store their submission IDs + for input_file in "$INPUTS_DIR"/*.json; do + test_input_file=$(python3 scripts/firecloud_api/UpdateTestInputs.py --truth_path "$TRUTH_PATH" \ + --results_path "$RESULTS_PATH" \ + --inputs_json "$input_file" \ + --update_truth "$UPDATE_TRUTH_BOOL" \ + --branch_name "$BRANCH_NAME" ) + echo "Uploading the test input file: $test_input_file" + + # Create the submission_data.json file for this input_file + input_file_filename=$(basename $input_file) + SUBMISSION_DATA_FILE="submission_data.json" + printf '{ + "methodConfigurationNamespace": "%s", + "methodConfigurationName": "%s_%s_%s", + "useCallCache": %s, + "deleteIntermediateOutputFiles": false, + "useReferenceDisks": true, + "memoryRetryMultiplier": 1.2, + "workflowFailureMode": "NoNewCalls", + "userComment": "%s", + "ignoreEmptyOutputs": false + }' "$WORKSPACE_NAMESPACE" "$PIPELINE_NAME" "$TEST_TYPE" "$BRANCH_NAME" "$USE_CALL_CACHE_BOOL" "$input_file_filename" > "$SUBMISSION_DATA_FILE" + + echo "Created submission data file: $SUBMISSION_DATA_FILE" + cat "$SUBMISSION_DATA_FILE" + + # Upload test input file + python3 scripts/firecloud_api/firecloud_api.py \ + upload_test_inputs \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --test_input_file "$test_input_file" \ + --branch_name "$BRANCH_NAME" \ + --sa-json-b64 "$SA_JSON_B64" \ + --test_type "$TEST_TYPE" \ + --user "$USER" + + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + SUBMISSION_ID=$(python3 scripts/firecloud_api/firecloud_api.py submit_job \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --submission_data_file "$SUBMISSION_DATA_FILE") + + echo "Submission ID: $SUBMISSION_ID" + + if [[ "$SUBMISSION_ID" == *"404"* || -z "$SUBMISSION_ID" ]]; then + echo "Error in submission, retrying in $RETRY_DELAY seconds..." + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + echo "Submission successful. Submission ID: $SUBMISSION_ID" + SUBMISSION_IDS+=("$SUBMISSION_ID") + break + done + done + + echo "All jobs have been submitted. Starting to poll for statuses..." + + # Continue with polling and output retrieval... + + # 2. After all submissions are done, start polling for statuses of all jobs + for SUBMISSION_ID in "${SUBMISSION_IDS[@]}"; do + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + echo "Polling for Submission ID: $SUBMISSION_ID" + RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py poll_job_status \ + --submission_id "$SUBMISSION_ID" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE") + + if [ -z "$RESPONSE" ]; then + echo "Failed to retrieve Workflow IDs for submission: $SUBMISSION_ID" + OVERALL_SUCCESS=false + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + WORKFLOW_STATUSES_FOR_SUBMISSION=$(echo "$RESPONSE" | jq -r 'to_entries | map(.key + " | " + .value) | .[]') + WORKFLOW_STATUSES["$SUBMISSION_ID"]="$WORKFLOW_STATUSES_FOR_SUBMISSION" + + # Check if any workflow failed or errored + FAILED_WORKFLOWS=$(echo "$RESPONSE" | jq -r 'to_entries | .[] | select(.value == "Failed" or .value == "Aborted" or .value == "Aborting") | .key') + if [ ! -z "$FAILED_WORKFLOWS" ]; then + echo "Failed workflows detected:" + echo "$FAILED_WORKFLOWS" + OVERALL_SUCCESS=false + fi + + # retrieve workflow outputs + echo "Retrieving workflow outputs for Submission ID: $SUBMISSION_ID..." + for WORKFLOW_ID in $(echo "$RESPONSE" | jq -r 'keys[]'); do + WORKFLOW_OUTPUT=$(python3 scripts/firecloud_api/firecloud_api.py get_workflow_outputs \ + --user "$USER" \ + --sa-json-b64 "$SA_JSON_B64" \ + --submission_id "$SUBMISSION_ID" \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --workflow_id "$WORKFLOW_ID" \ + --pipeline_name "$PIPELINE_NAME") + ALL_OUTPUTS+="$WORKFLOW_OUTPUT"$'\n' + done + break + done + done + + # Generate the final summary after all processing is complete + FINAL_SUMMARY="## Combined Workflow Statuses\n\n" + + # Add all workflow statuses to the summary + for SUBMISSION_ID in "${!WORKFLOW_STATUSES[@]}"; do + # Generate the Terra URL for the submission + SUBMISSION_URL="https://app.terra.bio/#workspaces/$WORKSPACE_NAMESPACE/WARP%20Tests/job_history/$SUBMISSION_ID" + + # Add the Submission ID as a hyperlink + FINAL_SUMMARY+="[Submission ID: $SUBMISSION_ID]($SUBMISSION_URL)\n" + + # Add the workflows and statuses for this submission + FINAL_SUMMARY+="${WORKFLOW_STATUSES[$SUBMISSION_ID]}\n\n" + done + + # Write the complete summary once at the end + echo -e "$FINAL_SUMMARY" >> $GITHUB_STEP_SUMMARY + + # Exit with error if any workflows failed + if [ "$OVERALL_SUCCESS" = false ]; then + echo "" + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "One or more workflows failed in Terra. Check the workflow status summary for details." + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "" + exit 1 + fi + + + + # Step 12: Cleanup + # Purpose: Ensures cleanup of Terra method configurations regardless of test outcome + - name: Delete Method Configuration + if: always() # Ensures it runs regardless of success or failure + run: | + echo "Deleting method configuration for branch: $BRANCH_NAME" + DELETE_RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py delete_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --method_config_name "${PIPELINE_NAME}_${testType}_${BRANCH_NAME}") + echo "Delete response: $DELETE_RESPONSE" + if [ "$DELETE_RESPONSE" == "True" ]; then + echo "Method configuration deleted successfully." + else + echo "Error: Method configuration deletion failed." + exit 1 + fi + + + # Step 13: Print Summary on Success + # Purpose: Prints the final summary of the pipeline execution in case of success + - name: Print Summary on Success + if: success() + run: | + echo "# :white_check_mark: Pipeline Execution Summary :white_check_mark:" >> $GITHUB_STEP_SUMMARY + + # Step 14: Print Summary on Failure + # Purpose: Prints the final summary of the pipeline execution in case of failure + - name: Print Summary on Failure + if: failure() + run: | + echo "# :x: Pipeline Execution Summary (on Failure) :x: " >> $GITHUB_STEP_SUMMARY \ No newline at end of file diff --git a/.github/workflows/test_joint_genotyping.yml b/.github/workflows/test_joint_genotyping.yml new file mode 100644 index 0000000000..f846ee81bf --- /dev/null +++ b/.github/workflows/test_joint_genotyping.yml @@ -0,0 +1,468 @@ +name: Test JointGenotyping + +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + - 'pipelines/broad/dna_seq/germline/joint_genotyping/**' + - 'tasks/broad/JointGenotypingTasks.wdl' + - 'verification/VerifyJointGenotyping.wdl' + - 'verification/VerifyTasks.wdl' + - 'verification/VerifyMetrics.wdl' + - 'verification/VerifyGermlineSingleSample.wdl' + - 'verification/VerifyNA12878.wdl' + - 'verification/test-wdls/TestJointGenotyping.wdl' + - 'tasks/broad/Utilities.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + - '.github/workflows/test_joint_genotyping.yml' + + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + +env: + # pipeline configuration + PIPELINE_NAME: TestJointGenotyping + DOCKSTORE_PIPELINE_NAME: JointGenotyping + PIPELINE_DIR: "pipelines/broad/dna_seq/germline/joint_genotyping" + + # workspace configuration + TESTING_WORKSPACE: WARP Tests + WORKSPACE_NAMESPACE: warp-pipelines + + # service account configuration + SA_JSON_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + USER: pdt-tester@warp-pipeline-dev.iam.gserviceaccount.com + + +jobs: + TestJointGenotyping: + runs-on: ubuntu-latest + # Add "id-token" with the intended permissions. + permissions: + contents: 'read' + id-token: 'write' + actions: write + + steps: + # Step 1: Checkout code + # Purpose: Clones the repository code at the specified reference + - uses: actions/checkout@v3 + with: + ref: ${{ github.ref }} + + # Step 2: Setup Python + # Purpose: Installs Python 3.11 for running pipeline scripts + - name: Set up python + id: setup-python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + # Step 3: Install Dependencies + # Purpose: Installs required Python packages for the pipeline + - name: Install dependencies + run: | + cd scripts/firecloud_api/ + pip install -r requirements.txt + + # Step 4: Set Branch Name + # Purpose: Determines and sets the correct branch name for either PR or direct commits + - name: Set Branch Name + id: set_branch + run: | + if [ -z "${{ github.head_ref }}" ]; then + echo "Branch name is missing, using ${GITHUB_REF##*/}" + echo "BRANCH_NAME=${GITHUB_REF##*/}" >> $GITHUB_ENV + else + echo "Branch name from PR: ${{ github.head_ref }}" + echo "BRANCH_NAME=${{ github.head_ref }}" >> $GITHUB_ENV + fi + + # Step 5: Set Test Type + # Purpose: Determines and sets the correct test type based on the branch name + - name: Set Test Type + id: set_test_type + run: | + if [ "${{ github.event_name }}" == "pull_request" ]; then + # For PRs, set based on target branch + if [ "${{ github.base_ref }}" == "master" ]; then + # If PR is targeting master branch, run Scientific tests + echo "testType=Scientific" >> $GITHUB_ENV + echo "testType=Scientific" + else + # If PR targets any other branch (develop, staging), run Plumbing tests + echo "testType=Plumbing" >> $GITHUB_ENV + echo "testType=Plumbing" + fi + else + # For manual workflow runs (workflow_dispatch) + echo "testType=${{ github.event.inputs.testType }}" >> $GITHUB_ENV + echo "testType=${{ github.event.inputs.testType }}" + fi + + # Step 6: Create Method Configuration + # Purpose: Sets up the testing configuration in Terra workspace + - name: Create new method configuration + run: | + echo "Creating new method configuration for branch: $BRANCH_NAME" + + METHOD_CONFIG_NAME=$(python3 scripts/firecloud_api/firecloud_api.py \ + create_new_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER") + + echo "METHOD_CONFIG_NAME=$METHOD_CONFIG_NAME" >> $GITHUB_ENV + + # Step 7: Cancel Previous Runs + # Purpose: Cancels previous GHA workflows from the same branch (regardless of plumbing or scientific test type) + # to avoid running multiple tests at the same time + - name: Cancel Previous GHA Runs + uses: styfle/cancel-workflow-action@0.11.0 + with: + access_token: ${{ github.token }} + all_but_latest: true + ignore_sha: true + + # Step 8: Cancel Previous Terra Submissions + # Purpose: Abort previous Terra submissions from the same branch to avoid running multiple tests at the same time + # Will not abort a Terra submission if it is a scientific test + - name: Cancel Previous Terra Submissions + if: ${{ !contains(env.METHOD_CONFIG_NAME, '_Scientific_') }} + run: | + python3 scripts/firecloud_api/firecloud_api.py \ + --workspace-namespace "${{ env.WORKSPACE_NAMESPACE }}" \ + --workspace-name "${{ env.TESTING_WORKSPACE }}" \ + --pipeline_name "${{ env.PIPELINE_NAME }}" \ + --branch_name "${{ env.BRANCH_NAME }}" \ + --sa-json-b64 "${{ secrets.PDT_TESTER_SA_B64 }}" \ + --user "${{ env.USER }}" \ + --test_type "$testType" \ + cancel_old_submissions + + # Step 9: Handle Git Commit Hash + # Purpose: Gets the correct Github commit hash for version tracking + - name: Determine Github Commit Hash + id: determine_github_commit_hash + run: | + if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then + echo "Using github.sha for manually triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.sha }}" >> $GITHUB_ENV + elif [ "${{ github.event_name }}" == "pull_request" ]; then + echo "Using github.event.pull_request.head.sha for PR-triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV + else + echo "Unsupported event type: ${{ github.event_name }}" + exit 1 + fi + + # Step 10: Compare Hashes + # Purpose: Compares the Dockstore and Github commit hashes to ensure they match + - name: Compare Dockstore and Github Commit Hashes with Retry + id: compare_hashes + run: | + + # Wait 5.5 minutes for Dockstore to update + sleep 330 + + MAX_WAIT_TIME=$((15 * 60)) # 15 minutes in seconds + WAIT_INTERVAL=60 # 1 minute in seconds + TOTAL_WAITED=0 + + echo "Starting hash comparison with retry mechanism..." + + while [ $TOTAL_WAITED -lt $MAX_WAIT_TIME ]; do + echo "Fetching Dockstore Commit Hash..." + DOCKSTORE_COMMIT_HASH=$(python scripts/dockstore_api/fetch_dockstore_commit.py \ + $DOCKSTORE_TOKEN \ + $DOCKSTORE_PIPELINE_NAME \ + $BRANCH_NAME) + echo "Fetched Dockstore Commit Hash: $DOCKSTORE_COMMIT_HASH" + + echo "GitHub Commit Hash: $GITHUB_COMMIT_HASH" + + if [ "$DOCKSTORE_COMMIT_HASH" == "$GITHUB_COMMIT_HASH" ]; then + echo "Success: The Dockstore Commit Hash matches the GitHub Commit Hash." + exit 0 + else + echo "Mismatch found: $DOCKSTORE_COMMIT_HASH != $GITHUB_COMMIT_HASH" + echo "Retrying in $WAIT_INTERVAL seconds..." + sleep $WAIT_INTERVAL + TOTAL_WAITED=$((TOTAL_WAITED + WAIT_INTERVAL)) + fi + done + + echo "Error: The Dockstore Commit Hash does not match the GitHub Commit Hash after 15 minutes of retries!" + exit 1 + + env: + GITHUB_COMMIT_HASH: ${{ env.GITHUB_COMMIT_HASH }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} + + # Step 11: Run Tests + # Purpose: Main testing step - runs the pipeline and collects results + - name: Update test inputs, Upload to Terra, Submit, Monitor and Retrieve Outputs + run: | + UPDATE_TRUTH="${{ github.event.inputs.updateTruth || 'false' }}" + USE_CALL_CACHE="${{ github.event.inputs.useCallCache || 'true' }}" + TRUTH_BRANCH="${{ github.event.inputs.truthBranch || 'master' }}" + CURRENT_TIME=$(date +"%Y-%m-%d-%H-%M-%S") + MAX_RETRIES=2 + RETRY_DELAY=300 # 300 seconds = 5 minutes + # Initialize variables to aggregate statuses and outputs + ALL_WORKFLOW_STATUSES="Workflow ID | Status"$'\n'"--- | ---" + ALL_OUTPUTS="" + # Initialize arrays to track submission and workflow statuses + declare -a SUBMISSION_IDS + declare -A WORKFLOW_STATUSES + OVERALL_SUCCESS=true + + # Convert UPDATE_TRUTH and USE_CALL_CACHE to a boolean-friendly format ("true" -> true, "false" -> false) + if [ "$UPDATE_TRUTH" = "true" ]; then + UPDATE_TRUTH_BOOL=true + else + UPDATE_TRUTH_BOOL=false + fi + + if [ "$USE_CALL_CACHE" == "true" ]; then + USE_CALL_CACHE_BOOL=true + else + USE_CALL_CACHE_BOOL=false + fi + + TEST_TYPE="${{ env.testType }}" + INPUTS_DIR="$PIPELINE_DIR/test_inputs/$TEST_TYPE" + echo "Running tests with test type: $TEST_TYPE" + + TRUTH_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/truth/$(echo "$TEST_TYPE" | tr '[:upper:]' '[:lower:]')/$TRUTH_BRANCH" + echo "Truth path: $TRUTH_PATH" + RESULTS_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/results/$CURRENT_TIME" + + + + # 1. Submit all jobs first and store their submission IDs + for input_file in "$INPUTS_DIR"/*.json; do + test_input_file=$(python3 scripts/firecloud_api/UpdateTestInputs.py --truth_path "$TRUTH_PATH" \ + --results_path "$RESULTS_PATH" \ + --inputs_json "$input_file" \ + --update_truth "$UPDATE_TRUTH_BOOL" \ + --branch_name "$BRANCH_NAME" ) + echo "Uploading the test input file: $test_input_file" + + # Create the submission_data.json file for this input_file + input_file_filename=$(basename $input_file) + SUBMISSION_DATA_FILE="submission_data.json" + printf '{ + "methodConfigurationNamespace": "%s", + "methodConfigurationName": "%s_%s_%s", + "useCallCache": %s, + "deleteIntermediateOutputFiles": false, + "useReferenceDisks": true, + "memoryRetryMultiplier": 1.2, + "workflowFailureMode": "NoNewCalls", + "userComment": "%s", + "ignoreEmptyOutputs": false + }' "$WORKSPACE_NAMESPACE" "$PIPELINE_NAME" "$TEST_TYPE" "$BRANCH_NAME" "$USE_CALL_CACHE_BOOL" "$input_file_filename" > "$SUBMISSION_DATA_FILE" + + echo "Created submission data file: $SUBMISSION_DATA_FILE" + cat "$SUBMISSION_DATA_FILE" + + # Upload test input file + python3 scripts/firecloud_api/firecloud_api.py \ + upload_test_inputs \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --test_input_file "$test_input_file" \ + --branch_name "$BRANCH_NAME" \ + --sa-json-b64 "$SA_JSON_B64" \ + --test_type "$TEST_TYPE" \ + --user "$USER" + + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + SUBMISSION_ID=$(python3 scripts/firecloud_api/firecloud_api.py submit_job \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --submission_data_file "$SUBMISSION_DATA_FILE") + + echo "Submission ID: $SUBMISSION_ID" + + if [[ "$SUBMISSION_ID" == *"404"* || -z "$SUBMISSION_ID" ]]; then + echo "Error in submission, retrying in $RETRY_DELAY seconds..." + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + echo "Submission successful. Submission ID: $SUBMISSION_ID" + SUBMISSION_IDS+=("$SUBMISSION_ID") + break + done + done + + echo "All jobs have been submitted. Starting to poll for statuses..." + + # Continue with polling and output retrieval... + + # 2. After all submissions are done, start polling for statuses of all jobs + for SUBMISSION_ID in "${SUBMISSION_IDS[@]}"; do + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + echo "Polling for Submission ID: $SUBMISSION_ID" + RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py poll_job_status \ + --submission_id "$SUBMISSION_ID" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE") + + if [ -z "$RESPONSE" ]; then + echo "Failed to retrieve Workflow IDs for submission: $SUBMISSION_ID" + OVERALL_SUCCESS=false + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + WORKFLOW_STATUSES_FOR_SUBMISSION=$(echo "$RESPONSE" | jq -r 'to_entries | map(.key + " | " + .value) | .[]') + WORKFLOW_STATUSES["$SUBMISSION_ID"]="$WORKFLOW_STATUSES_FOR_SUBMISSION" + + # Check if any workflow failed or errored + FAILED_WORKFLOWS=$(echo "$RESPONSE" | jq -r 'to_entries | .[] | select(.value == "Failed" or .value == "Aborted" or .value == "Aborting") | .key') + if [ ! -z "$FAILED_WORKFLOWS" ]; then + echo "Failed workflows detected:" + echo "$FAILED_WORKFLOWS" + OVERALL_SUCCESS=false + fi + + # retrieve workflow outputs + echo "Retrieving workflow outputs for Submission ID: $SUBMISSION_ID..." + for WORKFLOW_ID in $(echo "$RESPONSE" | jq -r 'keys[]'); do + WORKFLOW_OUTPUT=$(python3 scripts/firecloud_api/firecloud_api.py get_workflow_outputs \ + --user "$USER" \ + --sa-json-b64 "$SA_JSON_B64" \ + --submission_id "$SUBMISSION_ID" \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --workflow_id "$WORKFLOW_ID" \ + --pipeline_name "$PIPELINE_NAME") + ALL_OUTPUTS+="$WORKFLOW_OUTPUT"$'\n' + done + break + done + done + + # Generate the final summary after all processing is complete + FINAL_SUMMARY="## Combined Workflow Statuses\n\n" + + # Add all workflow statuses to the summary + for SUBMISSION_ID in "${!WORKFLOW_STATUSES[@]}"; do + # Generate the Terra URL for the submission + SUBMISSION_URL="https://app.terra.bio/#workspaces/$WORKSPACE_NAMESPACE/WARP%20Tests/job_history/$SUBMISSION_ID" + + # Add the Submission ID as a hyperlink + FINAL_SUMMARY+="[Submission ID: $SUBMISSION_ID]($SUBMISSION_URL)\n" + + # Add the workflows and statuses for this submission + FINAL_SUMMARY+="${WORKFLOW_STATUSES[$SUBMISSION_ID]}\n\n" + done + + # Write the complete summary once at the end + echo -e "$FINAL_SUMMARY" >> $GITHUB_STEP_SUMMARY + + # Exit with error if any workflows failed + if [ "$OVERALL_SUCCESS" = false ]; then + echo "" + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "One or more workflows failed in Terra. Check the workflow status summary for details." + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "" + exit 1 + fi + + + + # Step 12: Cleanup + # Purpose: Ensures cleanup of Terra method configurations regardless of test outcome + - name: Delete Method Configuration + if: always() # Ensures it runs regardless of success or failure + run: | + echo "Deleting method configuration for branch: $BRANCH_NAME" + DELETE_RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py delete_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --method_config_name "${PIPELINE_NAME}_${testType}_${BRANCH_NAME}") + echo "Delete response: $DELETE_RESPONSE" + if [ "$DELETE_RESPONSE" == "True" ]; then + echo "Method configuration deleted successfully." + else + echo "Error: Method configuration deletion failed." + exit 1 + fi + + + # Step 13: Print Summary on Success + # Purpose: Prints the final summary of the pipeline execution in case of success + - name: Print Summary on Success + if: success() + run: | + echo "# :white_check_mark: Pipeline Execution Summary :white_check_mark:" >> $GITHUB_STEP_SUMMARY + + # Step 14: Print Summary on Failure + # Purpose: Prints the final summary of the pipeline execution in case of failure + - name: Print Summary on Failure + if: failure() + run: | + echo "# :x: Pipeline Execution Summary (on Failure) :x: " >> $GITHUB_STEP_SUMMARY \ No newline at end of file diff --git a/.github/workflows/test_multiome.yml b/.github/workflows/test_multiome.yml new file mode 100644 index 0000000000..a479ef4a7a --- /dev/null +++ b/.github/workflows/test_multiome.yml @@ -0,0 +1,475 @@ +name: Test Multiome + +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + - 'pipelines/skylab/multiome/**' + - 'tasks/skylab/MergeSortBam.wdl' + - 'tasks/skylab/FastqProcessing.wdl' + - 'tasks/skylab/PairedTagUtils.wdl' + - 'pipelines/skylab/optimus/Optimus.wdl' + - 'tasks/skylab/FastqProcessing.wdl' + - 'tasks/skylab/StarAlign.wdl' + - 'tasks/skylab/Metrics.wdl' + - 'tasks/skylab/RunEmptyDrops.wdl' + - 'tasks/skylab/CheckInputs.wdl' + - 'tasks/skylab/MergeSortBam.wdl' + - 'tasks/skylab/H5adUtils.wdl' + - 'tasks/broad/Utilities.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + - '.github/workflows/test_multiome.yml' + - 'verification/VerifyMultiome.wdl' + - 'verification/VerifyTasks.wdl' + - 'verification/test-wdls/TestMultiome.wdl' + + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + +env: + # pipeline configuration + PIPELINE_NAME: TestMultiome + DOCKSTORE_PIPELINE_NAME: Multiome + PIPELINE_DIR: "pipelines/skylab/multiome" + + # workspace configuration + TESTING_WORKSPACE: WARP Tests + WORKSPACE_NAMESPACE: warp-pipelines + + # service account configuration + SA_JSON_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + USER: pdt-tester@warp-pipeline-dev.iam.gserviceaccount.com + + +jobs: + TestMultiome: + runs-on: ubuntu-latest + # Add "id-token" with the intended permissions. + permissions: + contents: 'read' + id-token: 'write' + actions: write + + steps: + # Step 1: Checkout code + # Purpose: Clones the repository code at the specified reference + - uses: actions/checkout@v3 + with: + ref: ${{ github.ref }} + + # Step 2: Setup Python + # Purpose: Installs Python 3.11 for running pipeline scripts + - name: Set up python + id: setup-python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + # Step 3: Install Dependencies + # Purpose: Installs required Python packages for the pipeline + - name: Install dependencies + run: | + cd scripts/firecloud_api/ + pip install -r requirements.txt + + # Step 4: Set Branch Name + # Purpose: Determines and sets the correct branch name for either PR or direct commits + - name: Set Branch Name + id: set_branch + run: | + if [ -z "${{ github.head_ref }}" ]; then + echo "Branch name is missing, using ${GITHUB_REF##*/}" + echo "BRANCH_NAME=${GITHUB_REF##*/}" >> $GITHUB_ENV + else + echo "Branch name from PR: ${{ github.head_ref }}" + echo "BRANCH_NAME=${{ github.head_ref }}" >> $GITHUB_ENV + fi + + # Step 5: Set Test Type + # Purpose: Determines and sets the correct test type based on the branch name + - name: Set Test Type + id: set_test_type + run: | + if [ "${{ github.event_name }}" == "pull_request" ]; then + # For PRs, set based on target branch + if [ "${{ github.base_ref }}" == "master" ]; then + # If PR is targeting master branch, run Scientific tests + echo "testType=Scientific" >> $GITHUB_ENV + echo "testType=Scientific" + else + # If PR targets any other branch (develop, staging), run Plumbing tests + echo "testType=Plumbing" >> $GITHUB_ENV + echo "testType=Plumbing" + fi + else + # For manual workflow runs (workflow_dispatch) + echo "testType=${{ github.event.inputs.testType }}" >> $GITHUB_ENV + echo "testType=${{ github.event.inputs.testType }}" + fi + + # Step 6: Create Method Configuration + # Purpose: Sets up the testing configuration in Terra workspace + - name: Create new method configuration + run: | + echo "Creating new method configuration for branch: $BRANCH_NAME" + + METHOD_CONFIG_NAME=$(python3 scripts/firecloud_api/firecloud_api.py \ + create_new_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER") + + echo "METHOD_CONFIG_NAME=$METHOD_CONFIG_NAME" >> $GITHUB_ENV + + # Step 7: Cancel Previous Runs + # Purpose: Cancels previous GHA workflows from the same branch (regardless of plumbing or scientific test type) + # to avoid running multiple tests at the same time + - name: Cancel Previous GHA Runs + uses: styfle/cancel-workflow-action@0.11.0 + with: + access_token: ${{ github.token }} + all_but_latest: true + ignore_sha: true + + # Step 8: Cancel Previous Terra Submissions + # Purpose: Abort previous Terra submissions from the same branch to avoid running multiple tests at the same time + # Will not abort a Terra submission if it is a scientific test + - name: Cancel Previous Terra Submissions + if: ${{ !contains(env.METHOD_CONFIG_NAME, '_Scientific_') }} + run: | + python3 scripts/firecloud_api/firecloud_api.py \ + --workspace-namespace "${{ env.WORKSPACE_NAMESPACE }}" \ + --workspace-name "${{ env.TESTING_WORKSPACE }}" \ + --pipeline_name "${{ env.PIPELINE_NAME }}" \ + --branch_name "${{ env.BRANCH_NAME }}" \ + --sa-json-b64 "${{ secrets.PDT_TESTER_SA_B64 }}" \ + --user "${{ env.USER }}" \ + --test_type "$testType" \ + cancel_old_submissions + + # Step 9: Handle Git Commit Hash + # Purpose: Gets the correct Github commit hash for version tracking + - name: Determine Github Commit Hash + id: determine_github_commit_hash + run: | + if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then + echo "Using github.sha for manually triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.sha }}" >> $GITHUB_ENV + elif [ "${{ github.event_name }}" == "pull_request" ]; then + echo "Using github.event.pull_request.head.sha for PR-triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV + else + echo "Unsupported event type: ${{ github.event_name }}" + exit 1 + fi + + # Step 10: Compare Hashes + # Purpose: Compares the Dockstore and Github commit hashes to ensure they match + - name: Compare Dockstore and Github Commit Hashes with Retry + id: compare_hashes + run: | + + # Wait 5.5 minutes for Dockstore to update + sleep 330 + + MAX_WAIT_TIME=$((15 * 60)) # 15 minutes in seconds + WAIT_INTERVAL=60 # 1 minute in seconds + TOTAL_WAITED=0 + + echo "Starting hash comparison with retry mechanism..." + + while [ $TOTAL_WAITED -lt $MAX_WAIT_TIME ]; do + echo "Fetching Dockstore Commit Hash..." + DOCKSTORE_COMMIT_HASH=$(python scripts/dockstore_api/fetch_dockstore_commit.py \ + $DOCKSTORE_TOKEN \ + $DOCKSTORE_PIPELINE_NAME \ + $BRANCH_NAME) + echo "Fetched Dockstore Commit Hash: $DOCKSTORE_COMMIT_HASH" + + echo "GitHub Commit Hash: $GITHUB_COMMIT_HASH" + + if [ "$DOCKSTORE_COMMIT_HASH" == "$GITHUB_COMMIT_HASH" ]; then + echo "Success: The Dockstore Commit Hash matches the GitHub Commit Hash." + exit 0 + else + echo "Mismatch found: $DOCKSTORE_COMMIT_HASH != $GITHUB_COMMIT_HASH" + echo "Retrying in $WAIT_INTERVAL seconds..." + sleep $WAIT_INTERVAL + TOTAL_WAITED=$((TOTAL_WAITED + WAIT_INTERVAL)) + fi + done + + echo "Error: The Dockstore Commit Hash does not match the GitHub Commit Hash after 15 minutes of retries!" + exit 1 + + env: + GITHUB_COMMIT_HASH: ${{ env.GITHUB_COMMIT_HASH }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} + + # Step 11: Run Tests + # Purpose: Main testing step - runs the pipeline and collects results + - name: Update test inputs, Upload to Terra, Submit, Monitor and Retrieve Outputs + run: | + UPDATE_TRUTH="${{ github.event.inputs.updateTruth || 'false' }}" + USE_CALL_CACHE="${{ github.event.inputs.useCallCache || 'true' }}" + TRUTH_BRANCH="${{ github.event.inputs.truthBranch || 'master' }}" + CURRENT_TIME=$(date +"%Y-%m-%d-%H-%M-%S") + MAX_RETRIES=2 + RETRY_DELAY=300 # 300 seconds = 5 minutes + # Initialize variables to aggregate statuses and outputs + ALL_WORKFLOW_STATUSES="Workflow ID | Status"$'\n'"--- | ---" + ALL_OUTPUTS="" + # Initialize arrays to track submission and workflow statuses + declare -a SUBMISSION_IDS + declare -A WORKFLOW_STATUSES + OVERALL_SUCCESS=true + + # Convert UPDATE_TRUTH and USE_CALL_CACHE to a boolean-friendly format ("true" -> true, "false" -> false) + if [ "$UPDATE_TRUTH" = "true" ]; then + UPDATE_TRUTH_BOOL=true + else + UPDATE_TRUTH_BOOL=false + fi + + if [ "$USE_CALL_CACHE" == "true" ]; then + USE_CALL_CACHE_BOOL=true + else + USE_CALL_CACHE_BOOL=false + fi + + TEST_TYPE="${{ env.testType }}" + INPUTS_DIR="$PIPELINE_DIR/test_inputs/$TEST_TYPE" + echo "Running tests with test type: $TEST_TYPE" + + TRUTH_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/truth/$(echo "$TEST_TYPE" | tr '[:upper:]' '[:lower:]')/$TRUTH_BRANCH" + echo "Truth path: $TRUTH_PATH" + RESULTS_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/results/$CURRENT_TIME" + + + + # 1. Submit all jobs first and store their submission IDs + for input_file in "$INPUTS_DIR"/*.json; do + test_input_file=$(python3 scripts/firecloud_api/UpdateTestInputs.py --truth_path "$TRUTH_PATH" \ + --results_path "$RESULTS_PATH" \ + --inputs_json "$input_file" \ + --update_truth "$UPDATE_TRUTH_BOOL" \ + --branch_name "$BRANCH_NAME" ) + echo "Uploading the test input file: $test_input_file" + + # Create the submission_data.json file for this input_file + input_file_filename=$(basename $input_file) + SUBMISSION_DATA_FILE="submission_data.json" + printf '{ + "methodConfigurationNamespace": "%s", + "methodConfigurationName": "%s_%s_%s", + "useCallCache": %s, + "deleteIntermediateOutputFiles": false, + "useReferenceDisks": true, + "memoryRetryMultiplier": 1.2, + "workflowFailureMode": "NoNewCalls", + "userComment": "%s", + "ignoreEmptyOutputs": false + }' "$WORKSPACE_NAMESPACE" "$PIPELINE_NAME" "$TEST_TYPE" "$BRANCH_NAME" "$USE_CALL_CACHE_BOOL" "$input_file_filename" > "$SUBMISSION_DATA_FILE" + + echo "Created submission data file: $SUBMISSION_DATA_FILE" + cat "$SUBMISSION_DATA_FILE" + + # Upload test input file + python3 scripts/firecloud_api/firecloud_api.py \ + upload_test_inputs \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --test_input_file "$test_input_file" \ + --branch_name "$BRANCH_NAME" \ + --sa-json-b64 "$SA_JSON_B64" \ + --test_type "$TEST_TYPE" \ + --user "$USER" + + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + SUBMISSION_ID=$(python3 scripts/firecloud_api/firecloud_api.py submit_job \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --submission_data_file "$SUBMISSION_DATA_FILE") + + echo "Submission ID: $SUBMISSION_ID" + + if [[ "$SUBMISSION_ID" == *"404"* || -z "$SUBMISSION_ID" ]]; then + echo "Error in submission, retrying in $RETRY_DELAY seconds..." + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + echo "Submission successful. Submission ID: $SUBMISSION_ID" + SUBMISSION_IDS+=("$SUBMISSION_ID") + break + done + done + + echo "All jobs have been submitted. Starting to poll for statuses..." + + # Continue with polling and output retrieval... + + # 2. After all submissions are done, start polling for statuses of all jobs + for SUBMISSION_ID in "${SUBMISSION_IDS[@]}"; do + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + echo "Polling for Submission ID: $SUBMISSION_ID" + RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py poll_job_status \ + --submission_id "$SUBMISSION_ID" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE") + + if [ -z "$RESPONSE" ]; then + echo "Failed to retrieve Workflow IDs for submission: $SUBMISSION_ID" + OVERALL_SUCCESS=false + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + WORKFLOW_STATUSES_FOR_SUBMISSION=$(echo "$RESPONSE" | jq -r 'to_entries | map(.key + " | " + .value) | .[]') + WORKFLOW_STATUSES["$SUBMISSION_ID"]="$WORKFLOW_STATUSES_FOR_SUBMISSION" + + # Check if any workflow failed or errored + FAILED_WORKFLOWS=$(echo "$RESPONSE" | jq -r 'to_entries | .[] | select(.value == "Failed" or .value == "Aborted" or .value == "Aborting") | .key') + if [ ! -z "$FAILED_WORKFLOWS" ]; then + echo "Failed workflows detected:" + echo "$FAILED_WORKFLOWS" + OVERALL_SUCCESS=false + fi + + # retrieve workflow outputs + echo "Retrieving workflow outputs for Submission ID: $SUBMISSION_ID..." + for WORKFLOW_ID in $(echo "$RESPONSE" | jq -r 'keys[]'); do + WORKFLOW_OUTPUT=$(python3 scripts/firecloud_api/firecloud_api.py get_workflow_outputs \ + --user "$USER" \ + --sa-json-b64 "$SA_JSON_B64" \ + --submission_id "$SUBMISSION_ID" \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --workflow_id "$WORKFLOW_ID" \ + --pipeline_name "$PIPELINE_NAME") + ALL_OUTPUTS+="$WORKFLOW_OUTPUT"$'\n' + done + break + done + done + + # Generate the final summary after all processing is complete + FINAL_SUMMARY="## Combined Workflow Statuses\n\n" + + # Add all workflow statuses to the summary + for SUBMISSION_ID in "${!WORKFLOW_STATUSES[@]}"; do + # Generate the Terra URL for the submission + SUBMISSION_URL="https://app.terra.bio/#workspaces/$WORKSPACE_NAMESPACE/WARP%20Tests/job_history/$SUBMISSION_ID" + + # Add the Submission ID as a hyperlink + FINAL_SUMMARY+="[Submission ID: $SUBMISSION_ID]($SUBMISSION_URL)\n" + + # Add the workflows and statuses for this submission + FINAL_SUMMARY+="${WORKFLOW_STATUSES[$SUBMISSION_ID]}\n\n" + done + + # Write the complete summary once at the end + echo -e "$FINAL_SUMMARY" >> $GITHUB_STEP_SUMMARY + + # Exit with error if any workflows failed + if [ "$OVERALL_SUCCESS" = false ]; then + echo "" + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "One or more workflows failed in Terra. Check the workflow status summary for details." + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "" + exit 1 + fi + + + + # Step 12: Cleanup + # Purpose: Ensures cleanup of Terra method configurations regardless of test outcome + - name: Delete Method Configuration + if: always() # Ensures it runs regardless of success or failure + run: | + echo "Deleting method configuration for branch: $BRANCH_NAME" + DELETE_RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py delete_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --method_config_name "${PIPELINE_NAME}_${testType}_${BRANCH_NAME}") + echo "Delete response: $DELETE_RESPONSE" + if [ "$DELETE_RESPONSE" == "True" ]; then + echo "Method configuration deleted successfully." + else + echo "Error: Method configuration deletion failed." + exit 1 + fi + + + # Step 13: Print Summary on Success + # Purpose: Prints the final summary of the pipeline execution in case of success + - name: Print Summary on Success + if: success() + run: | + echo "# :white_check_mark: Pipeline Execution Summary :white_check_mark:" >> $GITHUB_STEP_SUMMARY + + # Step 14: Print Summary on Failure + # Purpose: Prints the final summary of the pipeline execution in case of failure + - name: Print Summary on Failure + if: failure() + run: | + echo "# :x: Pipeline Execution Summary (on Failure) :x: " >> $GITHUB_STEP_SUMMARY \ No newline at end of file diff --git a/.github/workflows/test_multisamplesmartseq2singlenucleus.yml b/.github/workflows/test_multisamplesmartseq2singlenucleus.yml new file mode 100644 index 0000000000..b9c513c5b7 --- /dev/null +++ b/.github/workflows/test_multisamplesmartseq2singlenucleus.yml @@ -0,0 +1,473 @@ +name: Test Multi Sample Smart Seq 2 Single Nucleus +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + - 'pipelines/skylab/smartseq2_single_nucleus_multisample/**' + # tasks from the pipeline WDL and their dependencies + - 'tasks/skylab/CheckInputs.wdl' + - 'tasks/skylab/TrimAdapters.wdl' + - 'tasks/skylab/StarAlign.wdl' + - 'tasks/skylab/Picard.wdl' + - 'tasks/skylab/FeatureCounts.wdl' + - 'tasks/skylab/H5adUtils.wdl' + - 'tasks/broad/Utilities.wdl' + # verification WDL and its dependencies + - 'verification/VerifyMultiSampleSmartSeq2SingleNucleus.wdl' + - 'verification/VerifyTasks.wdl' + # test WDL and its dependencies + - 'verification/test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + # this file + - '.github/workflows/test_multisamplesmartseq2singlenucleus.yml' + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + + +env: + # pipeline configuration + PIPELINE_NAME: TestMultiSampleSmartSeq2SingleNucleus + DOCKSTORE_PIPELINE_NAME: Smartseq2_Single_Nucleus_Multisample + PIPELINE_DIR: "pipelines/skylab/smartseq2_single_nucleus_multisample" + + # workspace configuration + TESTING_WORKSPACE: WARP Tests + WORKSPACE_NAMESPACE: warp-pipelines + + # service account configuration + SA_JSON_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + USER: pdt-tester@warp-pipeline-dev.iam.gserviceaccount.com + + +jobs: + TestMultiSampleSmartSeq2SingleNucleus: + runs-on: ubuntu-latest + # Add "id-token" with the intended permissions. + permissions: + contents: 'read' + id-token: 'write' + actions: write + + steps: + # Step 1: Checkout code + # Purpose: Clones the repository code at the specified reference + - uses: actions/checkout@v3 + with: + ref: ${{ github.ref }} + + # Step 2: Setup Python + # Purpose: Installs Python 3.11 for running pipeline scripts + - name: Set up python + id: setup-python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + # Step 3: Install Dependencies + # Purpose: Installs required Python packages for the pipeline + - name: Install dependencies + run: | + cd scripts/firecloud_api/ + pip install -r requirements.txt + + # Step 4: Set Branch Name + # Purpose: Determines and sets the correct branch name for either PR or direct commits + - name: Set Branch Name + id: set_branch + run: | + if [ -z "${{ github.head_ref }}" ]; then + echo "Branch name is missing, using ${GITHUB_REF##*/}" + echo "BRANCH_NAME=${GITHUB_REF##*/}" >> $GITHUB_ENV + else + echo "Branch name from PR: ${{ github.head_ref }}" + echo "BRANCH_NAME=${{ github.head_ref }}" >> $GITHUB_ENV + fi + + # Step 5: Set Test Type + # Purpose: Determines and sets the correct test type based on the branch name + - name: Set Test Type + id: set_test_type + run: | + if [ "${{ github.event_name }}" == "pull_request" ]; then + # For PRs, set based on target branch + if [ "${{ github.base_ref }}" == "master" ]; then + # If PR is targeting master branch, run Scientific tests + echo "testType=Scientific" >> $GITHUB_ENV + echo "testType=Scientific" + else + # If PR targets any other branch (develop, staging), run Plumbing tests + echo "testType=Plumbing" >> $GITHUB_ENV + echo "testType=Plumbing" + fi + else + # For manual workflow runs (workflow_dispatch) + echo "testType=${{ github.event.inputs.testType }}" >> $GITHUB_ENV + echo "testType=${{ github.event.inputs.testType }}" + fi + + # Step 6: Create Method Configuration + # Purpose: Sets up the testing configuration in Terra workspace + - name: Create new method configuration + run: | + echo "Creating new method configuration for branch: $BRANCH_NAME" + + METHOD_CONFIG_NAME=$(python3 scripts/firecloud_api/firecloud_api.py \ + create_new_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER") + + echo "METHOD_CONFIG_NAME=$METHOD_CONFIG_NAME" >> $GITHUB_ENV + + # Step 7: Cancel Previous Runs + # Purpose: Cancels previous GHA workflows from the same branch (regardless of plumbing or scientific test type) + # to avoid running multiple tests at the same time + - name: Cancel Previous GHA Runs + uses: styfle/cancel-workflow-action@0.11.0 + with: + access_token: ${{ github.token }} + all_but_latest: true + ignore_sha: true + + # Step 8: Cancel Previous Terra Submissions + # Purpose: Abort previous Terra submissions from the same branch to avoid running multiple tests at the same time + # Will not abort a Terra submission if it is a scientific test + - name: Cancel Previous Terra Submissions + if: ${{ !contains(env.METHOD_CONFIG_NAME, '_Scientific_') }} + run: | + python3 scripts/firecloud_api/firecloud_api.py \ + --workspace-namespace "${{ env.WORKSPACE_NAMESPACE }}" \ + --workspace-name "${{ env.TESTING_WORKSPACE }}" \ + --pipeline_name "${{ env.PIPELINE_NAME }}" \ + --branch_name "${{ env.BRANCH_NAME }}" \ + --sa-json-b64 "${{ secrets.PDT_TESTER_SA_B64 }}" \ + --user "${{ env.USER }}" \ + --test_type "$testType" \ + cancel_old_submissions + + # Step 9: Handle Git Commit Hash + # Purpose: Gets the correct Github commit hash for version tracking + - name: Determine Github Commit Hash + id: determine_github_commit_hash + run: | + if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then + echo "Using github.sha for manually triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.sha }}" >> $GITHUB_ENV + elif [ "${{ github.event_name }}" == "pull_request" ]; then + echo "Using github.event.pull_request.head.sha for PR-triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV + else + echo "Unsupported event type: ${{ github.event_name }}" + exit 1 + fi + + # Step 10: Compare Hashes + # Purpose: Compares the Dockstore and Github commit hashes to ensure they match + - name: Compare Dockstore and Github Commit Hashes with Retry + id: compare_hashes + run: | + + # Wait 5.5 minutes for Dockstore to update + sleep 330 + + MAX_WAIT_TIME=$((15 * 60)) # 15 minutes in seconds + WAIT_INTERVAL=60 # 1 minute in seconds + TOTAL_WAITED=0 + + echo "Starting hash comparison with retry mechanism..." + + while [ $TOTAL_WAITED -lt $MAX_WAIT_TIME ]; do + echo "Fetching Dockstore Commit Hash..." + DOCKSTORE_COMMIT_HASH=$(python scripts/dockstore_api/fetch_dockstore_commit.py \ + $DOCKSTORE_TOKEN \ + $DOCKSTORE_PIPELINE_NAME \ + $BRANCH_NAME) + echo "Fetched Dockstore Commit Hash: $DOCKSTORE_COMMIT_HASH" + + echo "GitHub Commit Hash: $GITHUB_COMMIT_HASH" + + if [ "$DOCKSTORE_COMMIT_HASH" == "$GITHUB_COMMIT_HASH" ]; then + echo "Success: The Dockstore Commit Hash matches the GitHub Commit Hash." + exit 0 + else + echo "Mismatch found: $DOCKSTORE_COMMIT_HASH != $GITHUB_COMMIT_HASH" + echo "Retrying in $WAIT_INTERVAL seconds..." + sleep $WAIT_INTERVAL + TOTAL_WAITED=$((TOTAL_WAITED + WAIT_INTERVAL)) + fi + done + + echo "Error: The Dockstore Commit Hash does not match the GitHub Commit Hash after 15 minutes of retries!" + exit 1 + + env: + GITHUB_COMMIT_HASH: ${{ env.GITHUB_COMMIT_HASH }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} + + # Step 11: Run Tests + # Purpose: Main testing step - runs the pipeline and collects results + - name: Update test inputs, Upload to Terra, Submit, Monitor and Retrieve Outputs + run: | + UPDATE_TRUTH="${{ github.event.inputs.updateTruth || 'false' }}" + USE_CALL_CACHE="${{ github.event.inputs.useCallCache || 'true' }}" + TRUTH_BRANCH="${{ github.event.inputs.truthBranch || 'master' }}" + CURRENT_TIME=$(date +"%Y-%m-%d-%H-%M-%S") + MAX_RETRIES=2 + RETRY_DELAY=300 # 300 seconds = 5 minutes + # Initialize variables to aggregate statuses and outputs + ALL_WORKFLOW_STATUSES="Workflow ID | Status"$'\n'"--- | ---" + ALL_OUTPUTS="" + # Initialize arrays to track submission and workflow statuses + declare -a SUBMISSION_IDS + declare -A WORKFLOW_STATUSES + OVERALL_SUCCESS=true + + # Convert UPDATE_TRUTH and USE_CALL_CACHE to a boolean-friendly format ("true" -> true, "false" -> false) + if [ "$UPDATE_TRUTH" = "true" ]; then + UPDATE_TRUTH_BOOL=true + else + UPDATE_TRUTH_BOOL=false + fi + + if [ "$USE_CALL_CACHE" == "true" ]; then + USE_CALL_CACHE_BOOL=true + else + USE_CALL_CACHE_BOOL=false + fi + + TEST_TYPE="${{ env.testType }}" + INPUTS_DIR="$PIPELINE_DIR/test_inputs/$TEST_TYPE" + echo "Running tests with test type: $TEST_TYPE" + + TRUTH_PATH="gs://broad-gotc-test-storage/MultiSampleSmartSeq2SingleNucleus/truth/$(echo "$TEST_TYPE" | tr '[:upper:]' '[:lower:]')/$TRUTH_BRANCH" + echo "Truth path: $TRUTH_PATH" + RESULTS_PATH="gs://broad-gotc-test-storage/MultiSampleSmartSeq2SingleNucleus/results/$CURRENT_TIME" + + + + # 1. Submit all jobs first and store their submission IDs + for input_file in "$INPUTS_DIR"/*.json; do + test_input_file=$(python3 scripts/firecloud_api/UpdateTestInputs.py --truth_path "$TRUTH_PATH" \ + --results_path "$RESULTS_PATH" \ + --inputs_json "$input_file" \ + --update_truth "$UPDATE_TRUTH_BOOL" \ + --branch_name "$BRANCH_NAME" ) + echo "Uploading the test input file: $test_input_file" + + # Create the submission_data.json file for this input_file + input_file_filename=$(basename $input_file) + SUBMISSION_DATA_FILE="submission_data.json" + printf '{ + "methodConfigurationNamespace": "%s", + "methodConfigurationName": "%s_%s_%s", + "useCallCache": %s, + "deleteIntermediateOutputFiles": false, + "useReferenceDisks": true, + "memoryRetryMultiplier": 1.2, + "workflowFailureMode": "NoNewCalls", + "userComment": "%s", + "ignoreEmptyOutputs": false + }' "$WORKSPACE_NAMESPACE" "$PIPELINE_NAME" "$TEST_TYPE" "$BRANCH_NAME" "$USE_CALL_CACHE_BOOL" "$input_file_filename" > "$SUBMISSION_DATA_FILE" + + echo "Created submission data file: $SUBMISSION_DATA_FILE" + cat "$SUBMISSION_DATA_FILE" + + # Upload test input file + python3 scripts/firecloud_api/firecloud_api.py \ + upload_test_inputs \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --test_input_file "$test_input_file" \ + --branch_name "$BRANCH_NAME" \ + --sa-json-b64 "$SA_JSON_B64" \ + --test_type "$TEST_TYPE" \ + --user "$USER" + + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + SUBMISSION_ID=$(python3 scripts/firecloud_api/firecloud_api.py submit_job \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --submission_data_file "$SUBMISSION_DATA_FILE") + + echo "Submission ID: $SUBMISSION_ID" + + if [[ "$SUBMISSION_ID" == *"404"* || -z "$SUBMISSION_ID" ]]; then + echo "Error in submission, retrying in $RETRY_DELAY seconds..." + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + echo "Submission successful. Submission ID: $SUBMISSION_ID" + SUBMISSION_IDS+=("$SUBMISSION_ID") + break + done + done + + echo "All jobs have been submitted. Starting to poll for statuses..." + + # Continue with polling and output retrieval... + + # 2. After all submissions are done, start polling for statuses of all jobs + for SUBMISSION_ID in "${SUBMISSION_IDS[@]}"; do + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + echo "Polling for Submission ID: $SUBMISSION_ID" + RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py poll_job_status \ + --submission_id "$SUBMISSION_ID" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE") + + if [ -z "$RESPONSE" ]; then + echo "Failed to retrieve Workflow IDs for submission: $SUBMISSION_ID" + OVERALL_SUCCESS=false + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + WORKFLOW_STATUSES_FOR_SUBMISSION=$(echo "$RESPONSE" | jq -r 'to_entries | map(.key + " | " + .value) | .[]') + WORKFLOW_STATUSES["$SUBMISSION_ID"]="$WORKFLOW_STATUSES_FOR_SUBMISSION" + + # Check if any workflow failed or errored + FAILED_WORKFLOWS=$(echo "$RESPONSE" | jq -r 'to_entries | .[] | select(.value == "Failed" or .value == "Aborted" or .value == "Aborting") | .key') + if [ ! -z "$FAILED_WORKFLOWS" ]; then + echo "Failed workflows detected:" + echo "$FAILED_WORKFLOWS" + OVERALL_SUCCESS=false + fi + + # retrieve workflow outputs + echo "Retrieving workflow outputs for Submission ID: $SUBMISSION_ID..." + for WORKFLOW_ID in $(echo "$RESPONSE" | jq -r 'keys[]'); do + WORKFLOW_OUTPUT=$(python3 scripts/firecloud_api/firecloud_api.py get_workflow_outputs \ + --user "$USER" \ + --sa-json-b64 "$SA_JSON_B64" \ + --submission_id "$SUBMISSION_ID" \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --workflow_id "$WORKFLOW_ID" \ + --pipeline_name "$PIPELINE_NAME") + ALL_OUTPUTS+="$WORKFLOW_OUTPUT"$'\n' + done + break + done + done + + # Generate the final summary after all processing is complete + FINAL_SUMMARY="## Combined Workflow Statuses\n\n" + + # Add all workflow statuses to the summary + for SUBMISSION_ID in "${!WORKFLOW_STATUSES[@]}"; do + # Generate the Terra URL for the submission + SUBMISSION_URL="https://app.terra.bio/#workspaces/$WORKSPACE_NAMESPACE/WARP%20Tests/job_history/$SUBMISSION_ID" + + # Add the Submission ID as a hyperlink + FINAL_SUMMARY+="[Submission ID: $SUBMISSION_ID]($SUBMISSION_URL)\n" + + # Add the workflows and statuses for this submission + FINAL_SUMMARY+="${WORKFLOW_STATUSES[$SUBMISSION_ID]}\n\n" + done + + # Write the complete summary once at the end + echo -e "$FINAL_SUMMARY" >> $GITHUB_STEP_SUMMARY + + # Exit with error if any workflows failed + if [ "$OVERALL_SUCCESS" = false ]; then + echo "" + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "One or more workflows failed in Terra. Check the workflow status summary for details." + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "" + exit 1 + fi + + + + # Step 12: Cleanup + # Purpose: Ensures cleanup of Terra method configurations regardless of test outcome + - name: Delete Method Configuration + if: always() # Ensures it runs regardless of success or failure + run: | + echo "Deleting method configuration for branch: $BRANCH_NAME" + DELETE_RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py delete_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --method_config_name "${PIPELINE_NAME}_${testType}_${BRANCH_NAME}") + echo "Delete response: $DELETE_RESPONSE" + if [ "$DELETE_RESPONSE" == "True" ]; then + echo "Method configuration deleted successfully." + else + echo "Error: Method configuration deletion failed." + exit 1 + fi + + + # Step 13: Print Summary on Success + # Purpose: Prints the final summary of the pipeline execution in case of success + - name: Print Summary on Success + if: success() + run: | + echo "# :white_check_mark: Pipeline Execution Summary :white_check_mark:" >> $GITHUB_STEP_SUMMARY + + # Step 14: Print Summary on Failure + # Purpose: Prints the final summary of the pipeline execution in case of failure + - name: Print Summary on Failure + if: failure() + run: | + echo "# :x: Pipeline Execution Summary (on Failure) :x: " >> $GITHUB_STEP_SUMMARY \ No newline at end of file diff --git a/.github/workflows/test_optimus.yml b/.github/workflows/test_optimus.yml new file mode 100644 index 0000000000..55fc315837 --- /dev/null +++ b/.github/workflows/test_optimus.yml @@ -0,0 +1,473 @@ +name: Test Optimus + +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + - 'pipelines/skylab/optimus/**' + - 'tasks/skylab/FastqProcessing.wdl' + - 'tasks/skylab/StarAlign.wdl' + - 'tasks/skylab/Metrics.wdl' + - 'tasks/skylab/RunEmptyDrops.wdl' + - 'tasks/skylab/CheckInputs.wdl' + - 'tasks/skylab/MergeSortBam.wdl' + - 'tasks/skylab/H5adUtils.wdl' + - 'tasks/broad/Utilities.wdl' + - 'verification/VerifyOptimus.wdl' + - 'verification/VerifyTasks.wdl' + - 'verification/test-wdls/TestOptimus.wdl' + - 'tasks/broad/Utilities.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + - '.github/workflows/test_optimus.yml' + + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + + +env: + # pipeline configuration + PIPELINE_NAME: TestOptimus + DOCKSTORE_PIPELINE_NAME: Optimus + PIPELINE_DIR: "pipelines/skylab/optimus" + + # workspace configuration + TESTING_WORKSPACE: WARP Tests + WORKSPACE_NAMESPACE: warp-pipelines + + # service account configuration + SA_JSON_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + USER: pdt-tester@warp-pipeline-dev.iam.gserviceaccount.com + + +jobs: + TestOptimus: + runs-on: ubuntu-latest + # Add "id-token" with the intended permissions. + permissions: + contents: 'read' + id-token: 'write' + actions: write + + steps: + # Step 1: Checkout code + # Purpose: Clones the repository code at the specified reference + - uses: actions/checkout@v3 + with: + ref: ${{ github.ref }} + + # Step 2: Setup Python + # Purpose: Installs Python 3.11 for running pipeline scripts + - name: Set up python + id: setup-python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + # Step 3: Install Dependencies + # Purpose: Installs required Python packages for the pipeline + - name: Install dependencies + run: | + cd scripts/firecloud_api/ + pip install -r requirements.txt + + # Step 4: Set Branch Name + # Purpose: Determines and sets the correct branch name for either PR or direct commits + - name: Set Branch Name + id: set_branch + run: | + if [ -z "${{ github.head_ref }}" ]; then + echo "Branch name is missing, using ${GITHUB_REF##*/}" + echo "BRANCH_NAME=${GITHUB_REF##*/}" >> $GITHUB_ENV + else + echo "Branch name from PR: ${{ github.head_ref }}" + echo "BRANCH_NAME=${{ github.head_ref }}" >> $GITHUB_ENV + fi + + # Step 5: Set Test Type + # Purpose: Determines and sets the correct test type based on the branch name + - name: Set Test Type + id: set_test_type + run: | + if [ "${{ github.event_name }}" == "pull_request" ]; then + # For PRs, set based on target branch + if [ "${{ github.base_ref }}" == "master" ]; then + # If PR is targeting master branch, run Scientific tests + echo "testType=Scientific" >> $GITHUB_ENV + echo "testType=Scientific" + else + # If PR targets any other branch (develop, staging), run Plumbing tests + echo "testType=Plumbing" >> $GITHUB_ENV + echo "testType=Plumbing" + fi + else + # For manual workflow runs (workflow_dispatch) + echo "testType=${{ github.event.inputs.testType }}" >> $GITHUB_ENV + echo "testType=${{ github.event.inputs.testType }}" + fi + + # Step 6: Create Method Configuration + # Purpose: Sets up the testing configuration in Terra workspace + - name: Create new method configuration + run: | + echo "Creating new method configuration for branch: $BRANCH_NAME" + + METHOD_CONFIG_NAME=$(python3 scripts/firecloud_api/firecloud_api.py \ + create_new_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER") + + echo "METHOD_CONFIG_NAME=$METHOD_CONFIG_NAME" >> $GITHUB_ENV + + # Step 7: Cancel Previous Runs + # Purpose: Cancels previous GHA workflows from the same branch (regardless of plumbing or scientific test type) + # to avoid running multiple tests at the same time + - name: Cancel Previous GHA Runs + uses: styfle/cancel-workflow-action@0.11.0 + with: + access_token: ${{ github.token }} + all_but_latest: true + ignore_sha: true + + # Step 8: Cancel Previous Terra Submissions + # Purpose: Abort previous Terra submissions from the same branch to avoid running multiple tests at the same time + # Will not abort a Terra submission if it is a scientific test + - name: Cancel Previous Terra Submissions + if: ${{ !contains(env.METHOD_CONFIG_NAME, '_Scientific_') }} + run: | + python3 scripts/firecloud_api/firecloud_api.py \ + --workspace-namespace "${{ env.WORKSPACE_NAMESPACE }}" \ + --workspace-name "${{ env.TESTING_WORKSPACE }}" \ + --pipeline_name "${{ env.PIPELINE_NAME }}" \ + --branch_name "${{ env.BRANCH_NAME }}" \ + --sa-json-b64 "${{ secrets.PDT_TESTER_SA_B64 }}" \ + --user "${{ env.USER }}" \ + --test_type "$testType" \ + cancel_old_submissions + + # Step 9: Handle Git Commit Hash + # Purpose: Gets the correct Github commit hash for version tracking + - name: Determine Github Commit Hash + id: determine_github_commit_hash + run: | + if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then + echo "Using github.sha for manually triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.sha }}" >> $GITHUB_ENV + elif [ "${{ github.event_name }}" == "pull_request" ]; then + echo "Using github.event.pull_request.head.sha for PR-triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV + else + echo "Unsupported event type: ${{ github.event_name }}" + exit 1 + fi + + # Step 10: Compare Hashes + # Purpose: Compares the Dockstore and Github commit hashes to ensure they match + - name: Compare Dockstore and Github Commit Hashes with Retry + id: compare_hashes + run: | + + # Wait 5.5 minutes for Dockstore to update + sleep 330 + + MAX_WAIT_TIME=$((15 * 60)) # 15 minutes in seconds + WAIT_INTERVAL=60 # 1 minute in seconds + TOTAL_WAITED=0 + + echo "Starting hash comparison with retry mechanism..." + + while [ $TOTAL_WAITED -lt $MAX_WAIT_TIME ]; do + echo "Fetching Dockstore Commit Hash..." + DOCKSTORE_COMMIT_HASH=$(python scripts/dockstore_api/fetch_dockstore_commit.py \ + $DOCKSTORE_TOKEN \ + $DOCKSTORE_PIPELINE_NAME \ + $BRANCH_NAME) + echo "Fetched Dockstore Commit Hash: $DOCKSTORE_COMMIT_HASH" + + echo "GitHub Commit Hash: $GITHUB_COMMIT_HASH" + + if [ "$DOCKSTORE_COMMIT_HASH" == "$GITHUB_COMMIT_HASH" ]; then + echo "Success: The Dockstore Commit Hash matches the GitHub Commit Hash." + exit 0 + else + echo "Mismatch found: $DOCKSTORE_COMMIT_HASH != $GITHUB_COMMIT_HASH" + echo "Retrying in $WAIT_INTERVAL seconds..." + sleep $WAIT_INTERVAL + TOTAL_WAITED=$((TOTAL_WAITED + WAIT_INTERVAL)) + fi + done + + echo "Error: The Dockstore Commit Hash does not match the GitHub Commit Hash after 15 minutes of retries!" + exit 1 + + env: + GITHUB_COMMIT_HASH: ${{ env.GITHUB_COMMIT_HASH }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} + + # Step 11: Run Tests + # Purpose: Main testing step - runs the pipeline and collects results + - name: Update test inputs, Upload to Terra, Submit, Monitor and Retrieve Outputs + run: | + UPDATE_TRUTH="${{ github.event.inputs.updateTruth || 'false' }}" + USE_CALL_CACHE="${{ github.event.inputs.useCallCache || 'true' }}" + TRUTH_BRANCH="${{ github.event.inputs.truthBranch || 'master' }}" + CURRENT_TIME=$(date +"%Y-%m-%d-%H-%M-%S") + MAX_RETRIES=2 + RETRY_DELAY=300 # 300 seconds = 5 minutes + # Initialize variables to aggregate statuses and outputs + ALL_WORKFLOW_STATUSES="Workflow ID | Status"$'\n'"--- | ---" + ALL_OUTPUTS="" + # Initialize arrays to track submission and workflow statuses + declare -a SUBMISSION_IDS + declare -A WORKFLOW_STATUSES + OVERALL_SUCCESS=true + + # Convert UPDATE_TRUTH and USE_CALL_CACHE to a boolean-friendly format ("true" -> true, "false" -> false) + if [ "$UPDATE_TRUTH" = "true" ]; then + UPDATE_TRUTH_BOOL=true + else + UPDATE_TRUTH_BOOL=false + fi + + if [ "$USE_CALL_CACHE" == "true" ]; then + USE_CALL_CACHE_BOOL=true + else + USE_CALL_CACHE_BOOL=false + fi + + TEST_TYPE="${{ env.testType }}" + INPUTS_DIR="$PIPELINE_DIR/test_inputs/$TEST_TYPE" + echo "Running tests with test type: $TEST_TYPE" + + TRUTH_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/truth/$(echo "$TEST_TYPE" | tr '[:upper:]' '[:lower:]')/$TRUTH_BRANCH" + echo "Truth path: $TRUTH_PATH" + RESULTS_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/results/$CURRENT_TIME" + + + + # 1. Submit all jobs first and store their submission IDs + for input_file in "$INPUTS_DIR"/*.json; do + test_input_file=$(python3 scripts/firecloud_api/UpdateTestInputs.py --truth_path "$TRUTH_PATH" \ + --results_path "$RESULTS_PATH" \ + --inputs_json "$input_file" \ + --update_truth "$UPDATE_TRUTH_BOOL" \ + --branch_name "$BRANCH_NAME" ) + echo "Uploading the test input file: $test_input_file" + + # Create the submission_data.json file for this input_file + input_file_filename=$(basename $input_file) + SUBMISSION_DATA_FILE="submission_data.json" + printf '{ + "methodConfigurationNamespace": "%s", + "methodConfigurationName": "%s_%s_%s", + "useCallCache": %s, + "deleteIntermediateOutputFiles": false, + "useReferenceDisks": true, + "memoryRetryMultiplier": 1.2, + "workflowFailureMode": "NoNewCalls", + "userComment": "%s", + "ignoreEmptyOutputs": false + }' "$WORKSPACE_NAMESPACE" "$PIPELINE_NAME" "$TEST_TYPE" "$BRANCH_NAME" "$USE_CALL_CACHE_BOOL" "$input_file_filename" > "$SUBMISSION_DATA_FILE" + + echo "Created submission data file: $SUBMISSION_DATA_FILE" + cat "$SUBMISSION_DATA_FILE" + + # Upload test input file + python3 scripts/firecloud_api/firecloud_api.py \ + upload_test_inputs \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --test_input_file "$test_input_file" \ + --branch_name "$BRANCH_NAME" \ + --sa-json-b64 "$SA_JSON_B64" \ + --test_type "$TEST_TYPE" \ + --user "$USER" + + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + SUBMISSION_ID=$(python3 scripts/firecloud_api/firecloud_api.py submit_job \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --submission_data_file "$SUBMISSION_DATA_FILE") + + echo "Submission ID: $SUBMISSION_ID" + + if [[ "$SUBMISSION_ID" == *"404"* || -z "$SUBMISSION_ID" ]]; then + echo "Error in submission, retrying in $RETRY_DELAY seconds..." + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + echo "Submission successful. Submission ID: $SUBMISSION_ID" + SUBMISSION_IDS+=("$SUBMISSION_ID") + break + done + done + + echo "All jobs have been submitted. Starting to poll for statuses..." + + # Continue with polling and output retrieval... + + # 2. After all submissions are done, start polling for statuses of all jobs + for SUBMISSION_ID in "${SUBMISSION_IDS[@]}"; do + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + echo "Polling for Submission ID: $SUBMISSION_ID" + RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py poll_job_status \ + --submission_id "$SUBMISSION_ID" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE") + + if [ -z "$RESPONSE" ]; then + echo "Failed to retrieve Workflow IDs for submission: $SUBMISSION_ID" + OVERALL_SUCCESS=false + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + WORKFLOW_STATUSES_FOR_SUBMISSION=$(echo "$RESPONSE" | jq -r 'to_entries | map(.key + " | " + .value) | .[]') + WORKFLOW_STATUSES["$SUBMISSION_ID"]="$WORKFLOW_STATUSES_FOR_SUBMISSION" + + # Check if any workflow failed or errored + FAILED_WORKFLOWS=$(echo "$RESPONSE" | jq -r 'to_entries | .[] | select(.value == "Failed" or .value == "Aborted" or .value == "Aborting") | .key') + if [ ! -z "$FAILED_WORKFLOWS" ]; then + echo "Failed workflows detected:" + echo "$FAILED_WORKFLOWS" + OVERALL_SUCCESS=false + fi + + # retrieve workflow outputs + echo "Retrieving workflow outputs for Submission ID: $SUBMISSION_ID..." + for WORKFLOW_ID in $(echo "$RESPONSE" | jq -r 'keys[]'); do + WORKFLOW_OUTPUT=$(python3 scripts/firecloud_api/firecloud_api.py get_workflow_outputs \ + --user "$USER" \ + --sa-json-b64 "$SA_JSON_B64" \ + --submission_id "$SUBMISSION_ID" \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --workflow_id "$WORKFLOW_ID" \ + --pipeline_name "$PIPELINE_NAME") + ALL_OUTPUTS+="$WORKFLOW_OUTPUT"$'\n' + done + break + done + done + + # Generate the final summary after all processing is complete + FINAL_SUMMARY="## Combined Workflow Statuses\n\n" + + # Add all workflow statuses to the summary + for SUBMISSION_ID in "${!WORKFLOW_STATUSES[@]}"; do + # Generate the Terra URL for the submission + SUBMISSION_URL="https://app.terra.bio/#workspaces/$WORKSPACE_NAMESPACE/WARP%20Tests/job_history/$SUBMISSION_ID" + + # Add the Submission ID as a hyperlink + FINAL_SUMMARY+="[Submission ID: $SUBMISSION_ID]($SUBMISSION_URL)\n" + + # Add the workflows and statuses for this submission + FINAL_SUMMARY+="${WORKFLOW_STATUSES[$SUBMISSION_ID]}\n\n" + done + + # Write the complete summary once at the end + echo -e "$FINAL_SUMMARY" >> $GITHUB_STEP_SUMMARY + + # Exit with error if any workflows failed + if [ "$OVERALL_SUCCESS" = false ]; then + echo "" + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "One or more workflows failed in Terra. Check the workflow status summary for details." + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "" + exit 1 + fi + + + + # Step 12: Cleanup + # Purpose: Ensures cleanup of Terra method configurations regardless of test outcome + - name: Delete Method Configuration + if: always() # Ensures it runs regardless of success or failure + run: | + echo "Deleting method configuration for branch: $BRANCH_NAME" + DELETE_RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py delete_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --method_config_name "${PIPELINE_NAME}_${testType}_${BRANCH_NAME}") + echo "Delete response: $DELETE_RESPONSE" + if [ "$DELETE_RESPONSE" == "True" ]; then + echo "Method configuration deleted successfully." + else + echo "Error: Method configuration deletion failed." + exit 1 + fi + + + # Step 13: Print Summary on Success + # Purpose: Prints the final summary of the pipeline execution in case of success + - name: Print Summary on Success + if: success() + run: | + echo "# :white_check_mark: Pipeline Execution Summary :white_check_mark:" >> $GITHUB_STEP_SUMMARY + + # Step 14: Print Summary on Failure + # Purpose: Prints the final summary of the pipeline execution in case of failure + - name: Print Summary on Failure + if: failure() + run: | + echo "# :x: Pipeline Execution Summary (on Failure) :x: " >> $GITHUB_STEP_SUMMARY \ No newline at end of file diff --git a/.github/workflows/test_pairedtag.yml b/.github/workflows/test_pairedtag.yml new file mode 100644 index 0000000000..43cdd02d13 --- /dev/null +++ b/.github/workflows/test_pairedtag.yml @@ -0,0 +1,475 @@ +name: Test PairedTag + +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + - 'pipelines/skylab/paired_tag/**' + - 'pipelines/skylab/optimus/Optimus.wdl' + - 'tasks/skylab/H5adUtils.wdl' + - 'tasks/skylab/PairedTagUtils.wdl' + - 'tasks/broad/Utilities.wdl' + - 'tasks/skylab/FastqProcessing.wdl' + - 'tasks/skylab/StarAlign.wdl' + - 'tasks/skylab/Metrics.wdl' + - 'tasks/skylab/RunEmptyDrops.wdl' + - 'tasks/skylab/CheckInputs.wdl' + - 'tasks/skylab/MergeSortBam.wdl' + - 'verification/VerifyPairedTag.wdl' + - 'verification/VerifyTasks.wdl' + - 'verification/test-wdls/TestPairedTag.wdl' + - 'tasks/broad/Utilities.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + - '.github/workflows/test_pairedtag.yml' + + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + + +env: + # pipeline configuration + PIPELINE_NAME: TestPairedTag + DOCKSTORE_PIPELINE_NAME: PairedTag + PIPELINE_DIR: "pipelines/skylab/paired_tag" + + # workspace configuration + TESTING_WORKSPACE: WARP Tests + WORKSPACE_NAMESPACE: warp-pipelines + + # service account configuration + SA_JSON_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + USER: pdt-tester@warp-pipeline-dev.iam.gserviceaccount.com + + +jobs: + TestPairedTag: + runs-on: ubuntu-latest + # Add "id-token" with the intended permissions. + permissions: + contents: 'read' + id-token: 'write' + actions: write + + steps: + # Step 1: Checkout code + # Purpose: Clones the repository code at the specified reference + - uses: actions/checkout@v3 + with: + ref: ${{ github.ref }} + + # Step 2: Setup Python + # Purpose: Installs Python 3.11 for running pipeline scripts + - name: Set up python + id: setup-python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + # Step 3: Install Dependencies + # Purpose: Installs required Python packages for the pipeline + - name: Install dependencies + run: | + cd scripts/firecloud_api/ + pip install -r requirements.txt + + # Step 4: Set Branch Name + # Purpose: Determines and sets the correct branch name for either PR or direct commits + - name: Set Branch Name + id: set_branch + run: | + if [ -z "${{ github.head_ref }}" ]; then + echo "Branch name is missing, using ${GITHUB_REF##*/}" + echo "BRANCH_NAME=${GITHUB_REF##*/}" >> $GITHUB_ENV + else + echo "Branch name from PR: ${{ github.head_ref }}" + echo "BRANCH_NAME=${{ github.head_ref }}" >> $GITHUB_ENV + fi + + # Step 5: Set Test Type + # Purpose: Determines and sets the correct test type based on the branch name + - name: Set Test Type + id: set_test_type + run: | + if [ "${{ github.event_name }}" == "pull_request" ]; then + # For PRs, set based on target branch + if [ "${{ github.base_ref }}" == "master" ]; then + # If PR is targeting master branch, run Scientific tests + echo "testType=Scientific" >> $GITHUB_ENV + echo "testType=Scientific" + else + # If PR targets any other branch (develop, staging), run Plumbing tests + echo "testType=Plumbing" >> $GITHUB_ENV + echo "testType=Plumbing" + fi + else + # For manual workflow runs (workflow_dispatch) + echo "testType=${{ github.event.inputs.testType }}" >> $GITHUB_ENV + echo "testType=${{ github.event.inputs.testType }}" + fi + + # Step 6: Create Method Configuration + # Purpose: Sets up the testing configuration in Terra workspace + - name: Create new method configuration + run: | + echo "Creating new method configuration for branch: $BRANCH_NAME" + + METHOD_CONFIG_NAME=$(python3 scripts/firecloud_api/firecloud_api.py \ + create_new_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER") + + echo "METHOD_CONFIG_NAME=$METHOD_CONFIG_NAME" >> $GITHUB_ENV + + # Step 7: Cancel Previous Runs + # Purpose: Cancels previous GHA workflows from the same branch (regardless of plumbing or scientific test type) + # to avoid running multiple tests at the same time + - name: Cancel Previous GHA Runs + uses: styfle/cancel-workflow-action@0.11.0 + with: + access_token: ${{ github.token }} + all_but_latest: true + ignore_sha: true + + # Step 8: Cancel Previous Terra Submissions + # Purpose: Abort previous Terra submissions from the same branch to avoid running multiple tests at the same time + # Will not abort a Terra submission if it is a scientific test + - name: Cancel Previous Terra Submissions + if: ${{ !contains(env.METHOD_CONFIG_NAME, '_Scientific_') }} + run: | + python3 scripts/firecloud_api/firecloud_api.py \ + --workspace-namespace "${{ env.WORKSPACE_NAMESPACE }}" \ + --workspace-name "${{ env.TESTING_WORKSPACE }}" \ + --pipeline_name "${{ env.PIPELINE_NAME }}" \ + --branch_name "${{ env.BRANCH_NAME }}" \ + --sa-json-b64 "${{ secrets.PDT_TESTER_SA_B64 }}" \ + --user "${{ env.USER }}" \ + --test_type "$testType" \ + cancel_old_submissions + + # Step 9: Handle Git Commit Hash + # Purpose: Gets the correct Github commit hash for version tracking + - name: Determine Github Commit Hash + id: determine_github_commit_hash + run: | + if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then + echo "Using github.sha for manually triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.sha }}" >> $GITHUB_ENV + elif [ "${{ github.event_name }}" == "pull_request" ]; then + echo "Using github.event.pull_request.head.sha for PR-triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV + else + echo "Unsupported event type: ${{ github.event_name }}" + exit 1 + fi + + # Step 10: Compare Hashes + # Purpose: Compares the Dockstore and Github commit hashes to ensure they match + - name: Compare Dockstore and Github Commit Hashes with Retry + id: compare_hashes + run: | + + # Wait 5.5 minutes for Dockstore to update + sleep 330 + + MAX_WAIT_TIME=$((15 * 60)) # 15 minutes in seconds + WAIT_INTERVAL=60 # 1 minute in seconds + TOTAL_WAITED=0 + + echo "Starting hash comparison with retry mechanism..." + + while [ $TOTAL_WAITED -lt $MAX_WAIT_TIME ]; do + echo "Fetching Dockstore Commit Hash..." + DOCKSTORE_COMMIT_HASH=$(python scripts/dockstore_api/fetch_dockstore_commit.py \ + $DOCKSTORE_TOKEN \ + $DOCKSTORE_PIPELINE_NAME \ + $BRANCH_NAME) + echo "Fetched Dockstore Commit Hash: $DOCKSTORE_COMMIT_HASH" + + echo "GitHub Commit Hash: $GITHUB_COMMIT_HASH" + + if [ "$DOCKSTORE_COMMIT_HASH" == "$GITHUB_COMMIT_HASH" ]; then + echo "Success: The Dockstore Commit Hash matches the GitHub Commit Hash." + exit 0 + else + echo "Mismatch found: $DOCKSTORE_COMMIT_HASH != $GITHUB_COMMIT_HASH" + echo "Retrying in $WAIT_INTERVAL seconds..." + sleep $WAIT_INTERVAL + TOTAL_WAITED=$((TOTAL_WAITED + WAIT_INTERVAL)) + fi + done + + echo "Error: The Dockstore Commit Hash does not match the GitHub Commit Hash after 15 minutes of retries!" + exit 1 + + env: + GITHUB_COMMIT_HASH: ${{ env.GITHUB_COMMIT_HASH }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} + + # Step 11: Run Tests + # Purpose: Main testing step - runs the pipeline and collects results + - name: Update test inputs, Upload to Terra, Submit, Monitor and Retrieve Outputs + run: | + UPDATE_TRUTH="${{ github.event.inputs.updateTruth || 'false' }}" + USE_CALL_CACHE="${{ github.event.inputs.useCallCache || 'true' }}" + TRUTH_BRANCH="${{ github.event.inputs.truthBranch || 'master' }}" + CURRENT_TIME=$(date +"%Y-%m-%d-%H-%M-%S") + MAX_RETRIES=2 + RETRY_DELAY=300 # 300 seconds = 5 minutes + # Initialize variables to aggregate statuses and outputs + ALL_WORKFLOW_STATUSES="Workflow ID | Status"$'\n'"--- | ---" + ALL_OUTPUTS="" + # Initialize arrays to track submission and workflow statuses + declare -a SUBMISSION_IDS + declare -A WORKFLOW_STATUSES + OVERALL_SUCCESS=true + + # Convert UPDATE_TRUTH and USE_CALL_CACHE to a boolean-friendly format ("true" -> true, "false" -> false) + if [ "$UPDATE_TRUTH" = "true" ]; then + UPDATE_TRUTH_BOOL=true + else + UPDATE_TRUTH_BOOL=false + fi + + if [ "$USE_CALL_CACHE" == "true" ]; then + USE_CALL_CACHE_BOOL=true + else + USE_CALL_CACHE_BOOL=false + fi + + TEST_TYPE="${{ env.testType }}" + INPUTS_DIR="$PIPELINE_DIR/test_inputs/$TEST_TYPE" + echo "Running tests with test type: $TEST_TYPE" + + TRUTH_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/truth/$(echo "$TEST_TYPE" | tr '[:upper:]' '[:lower:]')/$TRUTH_BRANCH" + echo "Truth path: $TRUTH_PATH" + RESULTS_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/results/$CURRENT_TIME" + + + + # 1. Submit all jobs first and store their submission IDs + for input_file in "$INPUTS_DIR"/*.json; do + test_input_file=$(python3 scripts/firecloud_api/UpdateTestInputs.py --truth_path "$TRUTH_PATH" \ + --results_path "$RESULTS_PATH" \ + --inputs_json "$input_file" \ + --update_truth "$UPDATE_TRUTH_BOOL" \ + --branch_name "$BRANCH_NAME" ) + echo "Uploading the test input file: $test_input_file" + + # Create the submission_data.json file for this input_file + input_file_filename=$(basename $input_file) + SUBMISSION_DATA_FILE="submission_data.json" + printf '{ + "methodConfigurationNamespace": "%s", + "methodConfigurationName": "%s_%s_%s", + "useCallCache": %s, + "deleteIntermediateOutputFiles": false, + "useReferenceDisks": true, + "memoryRetryMultiplier": 1.2, + "workflowFailureMode": "NoNewCalls", + "userComment": "%s", + "ignoreEmptyOutputs": false + }' "$WORKSPACE_NAMESPACE" "$PIPELINE_NAME" "$TEST_TYPE" "$BRANCH_NAME" "$USE_CALL_CACHE_BOOL" "$input_file_filename" > "$SUBMISSION_DATA_FILE" + + echo "Created submission data file: $SUBMISSION_DATA_FILE" + cat "$SUBMISSION_DATA_FILE" + + # Upload test input file + python3 scripts/firecloud_api/firecloud_api.py \ + upload_test_inputs \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --test_input_file "$test_input_file" \ + --branch_name "$BRANCH_NAME" \ + --sa-json-b64 "$SA_JSON_B64" \ + --test_type "$TEST_TYPE" \ + --user "$USER" + + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + SUBMISSION_ID=$(python3 scripts/firecloud_api/firecloud_api.py submit_job \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --submission_data_file "$SUBMISSION_DATA_FILE") + + echo "Submission ID: $SUBMISSION_ID" + + if [[ "$SUBMISSION_ID" == *"404"* || -z "$SUBMISSION_ID" ]]; then + echo "Error in submission, retrying in $RETRY_DELAY seconds..." + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + echo "Submission successful. Submission ID: $SUBMISSION_ID" + SUBMISSION_IDS+=("$SUBMISSION_ID") + break + done + done + + echo "All jobs have been submitted. Starting to poll for statuses..." + + # Continue with polling and output retrieval... + + # 2. After all submissions are done, start polling for statuses of all jobs + for SUBMISSION_ID in "${SUBMISSION_IDS[@]}"; do + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + echo "Polling for Submission ID: $SUBMISSION_ID" + RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py poll_job_status \ + --submission_id "$SUBMISSION_ID" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE") + + if [ -z "$RESPONSE" ]; then + echo "Failed to retrieve Workflow IDs for submission: $SUBMISSION_ID" + OVERALL_SUCCESS=false + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + WORKFLOW_STATUSES_FOR_SUBMISSION=$(echo "$RESPONSE" | jq -r 'to_entries | map(.key + " | " + .value) | .[]') + WORKFLOW_STATUSES["$SUBMISSION_ID"]="$WORKFLOW_STATUSES_FOR_SUBMISSION" + + # Check if any workflow failed or errored + FAILED_WORKFLOWS=$(echo "$RESPONSE" | jq -r 'to_entries | .[] | select(.value == "Failed" or .value == "Aborted" or .value == "Aborting") | .key') + if [ ! -z "$FAILED_WORKFLOWS" ]; then + echo "Failed workflows detected:" + echo "$FAILED_WORKFLOWS" + OVERALL_SUCCESS=false + fi + + # retrieve workflow outputs + echo "Retrieving workflow outputs for Submission ID: $SUBMISSION_ID..." + for WORKFLOW_ID in $(echo "$RESPONSE" | jq -r 'keys[]'); do + WORKFLOW_OUTPUT=$(python3 scripts/firecloud_api/firecloud_api.py get_workflow_outputs \ + --user "$USER" \ + --sa-json-b64 "$SA_JSON_B64" \ + --submission_id "$SUBMISSION_ID" \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --workflow_id "$WORKFLOW_ID" \ + --pipeline_name "$PIPELINE_NAME") + ALL_OUTPUTS+="$WORKFLOW_OUTPUT"$'\n' + done + break + done + done + + # Generate the final summary after all processing is complete + FINAL_SUMMARY="## Combined Workflow Statuses\n\n" + + # Add all workflow statuses to the summary + for SUBMISSION_ID in "${!WORKFLOW_STATUSES[@]}"; do + # Generate the Terra URL for the submission + SUBMISSION_URL="https://app.terra.bio/#workspaces/$WORKSPACE_NAMESPACE/WARP%20Tests/job_history/$SUBMISSION_ID" + + # Add the Submission ID as a hyperlink + FINAL_SUMMARY+="[Submission ID: $SUBMISSION_ID]($SUBMISSION_URL)\n" + + # Add the workflows and statuses for this submission + FINAL_SUMMARY+="${WORKFLOW_STATUSES[$SUBMISSION_ID]}\n\n" + done + + # Write the complete summary once at the end + echo -e "$FINAL_SUMMARY" >> $GITHUB_STEP_SUMMARY + + # Exit with error if any workflows failed + if [ "$OVERALL_SUCCESS" = false ]; then + echo "" + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "One or more workflows failed in Terra. Check the workflow status summary for details." + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "" + exit 1 + fi + + + + # Step 12: Cleanup + # Purpose: Ensures cleanup of Terra method configurations regardless of test outcome + - name: Delete Method Configuration + if: always() # Ensures it runs regardless of success or failure + run: | + echo "Deleting method configuration for branch: $BRANCH_NAME" + DELETE_RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py delete_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --method_config_name "${PIPELINE_NAME}_${testType}_${BRANCH_NAME}") + echo "Delete response: $DELETE_RESPONSE" + if [ "$DELETE_RESPONSE" == "True" ]; then + echo "Method configuration deleted successfully." + else + echo "Error: Method configuration deletion failed." + exit 1 + fi + + + # Step 13: Print Summary on Success + # Purpose: Prints the final summary of the pipeline execution in case of success + - name: Print Summary on Success + if: success() + run: | + echo "# :white_check_mark: Pipeline Execution Summary :white_check_mark:" >> $GITHUB_STEP_SUMMARY + + # Step 14: Print Summary on Failure + # Purpose: Prints the final summary of the pipeline execution in case of failure + - name: Print Summary on Failure + if: failure() + run: | + echo "# :x: Pipeline Execution Summary (on Failure) :x: " >> $GITHUB_STEP_SUMMARY \ No newline at end of file diff --git a/.github/workflows/test_reblockGVCF.yml b/.github/workflows/test_reblockGVCF.yml new file mode 100644 index 0000000000..5194e48423 --- /dev/null +++ b/.github/workflows/test_reblockGVCF.yml @@ -0,0 +1,467 @@ +name: Test ReblockGVCF + +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + - 'pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/**' + - 'tasks/broad/GermlineVariantDiscovery.wdl' + - 'tasks/broad/Qc.wdl' + - 'tasks/broad/Utilities.wdl' + - 'verification/VerifyGvcf.wdl' + - 'verification/VerifyTasks.wdl' + - 'verification/test-wdls/TestReblockGVCF.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + - '.github/workflows/test_reblockGVCF.yml' + + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + + +env: + # pipeline configuration + PIPELINE_NAME: TestReblockGVCF + DOCKSTORE_PIPELINE_NAME: ReblockGVCF + PIPELINE_DIR: "pipelines/broad/dna_seq/germline/joint_genotyping/reblocking" + + # workspace configuration + TESTING_WORKSPACE: WARP Tests + WORKSPACE_NAMESPACE: warp-pipelines + + # service account configuration + SA_JSON_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + USER: pdt-tester@warp-pipeline-dev.iam.gserviceaccount.com + + +jobs: + TestReblockGVCF: + runs-on: ubuntu-latest + # Add "id-token" with the intended permissions. + permissions: + contents: 'read' + id-token: 'write' + actions: write + + steps: + # Step 1: Checkout code + # Purpose: Clones the repository code at the specified reference + - uses: actions/checkout@v3 + with: + ref: ${{ github.ref }} + + # Step 2: Setup Python + # Purpose: Installs Python 3.11 for running pipeline scripts + - name: Set up python + id: setup-python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + # Step 3: Install Dependencies + # Purpose: Installs required Python packages for the pipeline + - name: Install dependencies + run: | + cd scripts/firecloud_api/ + pip install -r requirements.txt + + # Step 4: Set Branch Name + # Purpose: Determines and sets the correct branch name for either PR or direct commits + - name: Set Branch Name + id: set_branch + run: | + if [ -z "${{ github.head_ref }}" ]; then + echo "Branch name is missing, using ${GITHUB_REF##*/}" + echo "BRANCH_NAME=${GITHUB_REF##*/}" >> $GITHUB_ENV + else + echo "Branch name from PR: ${{ github.head_ref }}" + echo "BRANCH_NAME=${{ github.head_ref }}" >> $GITHUB_ENV + fi + + # Step 5: Set Test Type + # Purpose: Determines and sets the correct test type based on the branch name + - name: Set Test Type + id: set_test_type + run: | + if [ "${{ github.event_name }}" == "pull_request" ]; then + # For PRs, set based on target branch + if [ "${{ github.base_ref }}" == "master" ]; then + # If PR is targeting master branch, run Scientific tests + echo "testType=Scientific" >> $GITHUB_ENV + echo "testType=Scientific" + else + # If PR targets any other branch (develop, staging), run Plumbing tests + echo "testType=Plumbing" >> $GITHUB_ENV + echo "testType=Plumbing" + fi + else + # For manual workflow runs (workflow_dispatch) + echo "testType=${{ github.event.inputs.testType }}" >> $GITHUB_ENV + echo "testType=${{ github.event.inputs.testType }}" + fi + + # Step 6: Create Method Configuration + # Purpose: Sets up the testing configuration in Terra workspace + - name: Create new method configuration + run: | + echo "Creating new method configuration for branch: $BRANCH_NAME" + + METHOD_CONFIG_NAME=$(python3 scripts/firecloud_api/firecloud_api.py \ + create_new_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER") + + echo "METHOD_CONFIG_NAME=$METHOD_CONFIG_NAME" >> $GITHUB_ENV + + # Step 7: Cancel Previous Runs + # Purpose: Cancels previous GHA workflows from the same branch (regardless of plumbing or scientific test type) + # to avoid running multiple tests at the same time + - name: Cancel Previous GHA Runs + uses: styfle/cancel-workflow-action@0.11.0 + with: + access_token: ${{ github.token }} + all_but_latest: true + ignore_sha: true + + # Step 8: Cancel Previous Terra Submissions + # Purpose: Abort previous Terra submissions from the same branch to avoid running multiple tests at the same time + # Will not abort a Terra submission if it is a scientific test + - name: Cancel Previous Terra Submissions + if: ${{ !contains(env.METHOD_CONFIG_NAME, '_Scientific_') }} + run: | + python3 scripts/firecloud_api/firecloud_api.py \ + --workspace-namespace "${{ env.WORKSPACE_NAMESPACE }}" \ + --workspace-name "${{ env.TESTING_WORKSPACE }}" \ + --pipeline_name "${{ env.PIPELINE_NAME }}" \ + --branch_name "${{ env.BRANCH_NAME }}" \ + --sa-json-b64 "${{ secrets.PDT_TESTER_SA_B64 }}" \ + --user "${{ env.USER }}" \ + --test_type "$testType" \ + cancel_old_submissions + + # Step 9: Handle Git Commit Hash + # Purpose: Gets the correct Github commit hash for version tracking + - name: Determine Github Commit Hash + id: determine_github_commit_hash + run: | + if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then + echo "Using github.sha for manually triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.sha }}" >> $GITHUB_ENV + elif [ "${{ github.event_name }}" == "pull_request" ]; then + echo "Using github.event.pull_request.head.sha for PR-triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV + else + echo "Unsupported event type: ${{ github.event_name }}" + exit 1 + fi + + # Step 10: Compare Hashes + # Purpose: Compares the Dockstore and Github commit hashes to ensure they match + - name: Compare Dockstore and Github Commit Hashes with Retry + id: compare_hashes + run: | + + # Wait 5.5 minutes for Dockstore to update + sleep 330 + + MAX_WAIT_TIME=$((15 * 60)) # 15 minutes in seconds + WAIT_INTERVAL=60 # 1 minute in seconds + TOTAL_WAITED=0 + + echo "Starting hash comparison with retry mechanism..." + + while [ $TOTAL_WAITED -lt $MAX_WAIT_TIME ]; do + echo "Fetching Dockstore Commit Hash..." + DOCKSTORE_COMMIT_HASH=$(python scripts/dockstore_api/fetch_dockstore_commit.py \ + $DOCKSTORE_TOKEN \ + $DOCKSTORE_PIPELINE_NAME \ + $BRANCH_NAME) + echo "Fetched Dockstore Commit Hash: $DOCKSTORE_COMMIT_HASH" + + echo "GitHub Commit Hash: $GITHUB_COMMIT_HASH" + + if [ "$DOCKSTORE_COMMIT_HASH" == "$GITHUB_COMMIT_HASH" ]; then + echo "Success: The Dockstore Commit Hash matches the GitHub Commit Hash." + exit 0 + else + echo "Mismatch found: $DOCKSTORE_COMMIT_HASH != $GITHUB_COMMIT_HASH" + echo "Retrying in $WAIT_INTERVAL seconds..." + sleep $WAIT_INTERVAL + TOTAL_WAITED=$((TOTAL_WAITED + WAIT_INTERVAL)) + fi + done + + echo "Error: The Dockstore Commit Hash does not match the GitHub Commit Hash after 15 minutes of retries!" + exit 1 + + env: + GITHUB_COMMIT_HASH: ${{ env.GITHUB_COMMIT_HASH }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} + + # Step 11: Run Tests + # Purpose: Main testing step - runs the pipeline and collects results + - name: Update test inputs, Upload to Terra, Submit, Monitor and Retrieve Outputs + run: | + UPDATE_TRUTH="${{ github.event.inputs.updateTruth || 'false' }}" + USE_CALL_CACHE="${{ github.event.inputs.useCallCache || 'true' }}" + TRUTH_BRANCH="${{ github.event.inputs.truthBranch || 'master' }}" + CURRENT_TIME=$(date +"%Y-%m-%d-%H-%M-%S") + MAX_RETRIES=2 + RETRY_DELAY=300 # 300 seconds = 5 minutes + # Initialize variables to aggregate statuses and outputs + ALL_WORKFLOW_STATUSES="Workflow ID | Status"$'\n'"--- | ---" + ALL_OUTPUTS="" + # Initialize arrays to track submission and workflow statuses + declare -a SUBMISSION_IDS + declare -A WORKFLOW_STATUSES + OVERALL_SUCCESS=true + + # Convert UPDATE_TRUTH and USE_CALL_CACHE to a boolean-friendly format ("true" -> true, "false" -> false) + if [ "$UPDATE_TRUTH" = "true" ]; then + UPDATE_TRUTH_BOOL=true + else + UPDATE_TRUTH_BOOL=false + fi + + if [ "$USE_CALL_CACHE" == "true" ]; then + USE_CALL_CACHE_BOOL=true + else + USE_CALL_CACHE_BOOL=false + fi + + TEST_TYPE="${{ env.testType }}" + INPUTS_DIR="$PIPELINE_DIR/test_inputs/$TEST_TYPE" + echo "Running tests with test type: $TEST_TYPE" + + TRUTH_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/truth/$(echo "$TEST_TYPE" | tr '[:upper:]' '[:lower:]')/$TRUTH_BRANCH" + echo "Truth path: $TRUTH_PATH" + RESULTS_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/results/$CURRENT_TIME" + + + + # 1. Submit all jobs first and store their submission IDs + for input_file in "$INPUTS_DIR"/*.json; do + test_input_file=$(python3 scripts/firecloud_api/UpdateTestInputs.py --truth_path "$TRUTH_PATH" \ + --results_path "$RESULTS_PATH" \ + --inputs_json "$input_file" \ + --update_truth "$UPDATE_TRUTH_BOOL" \ + --branch_name "$BRANCH_NAME" ) + echo "Uploading the test input file: $test_input_file" + + # Create the submission_data.json file for this input_file + input_file_filename=$(basename $input_file) + SUBMISSION_DATA_FILE="submission_data.json" + printf '{ + "methodConfigurationNamespace": "%s", + "methodConfigurationName": "%s_%s_%s", + "useCallCache": %s, + "deleteIntermediateOutputFiles": false, + "useReferenceDisks": true, + "memoryRetryMultiplier": 1.2, + "workflowFailureMode": "NoNewCalls", + "userComment": "%s", + "ignoreEmptyOutputs": false + }' "$WORKSPACE_NAMESPACE" "$PIPELINE_NAME" "$TEST_TYPE" "$BRANCH_NAME" "$USE_CALL_CACHE_BOOL" "$input_file_filename" > "$SUBMISSION_DATA_FILE" + + echo "Created submission data file: $SUBMISSION_DATA_FILE" + cat "$SUBMISSION_DATA_FILE" + + # Upload test input file + python3 scripts/firecloud_api/firecloud_api.py \ + upload_test_inputs \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --test_input_file "$test_input_file" \ + --branch_name "$BRANCH_NAME" \ + --sa-json-b64 "$SA_JSON_B64" \ + --test_type "$TEST_TYPE" \ + --user "$USER" + + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + SUBMISSION_ID=$(python3 scripts/firecloud_api/firecloud_api.py submit_job \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --submission_data_file "$SUBMISSION_DATA_FILE") + + echo "Submission ID: $SUBMISSION_ID" + + if [[ "$SUBMISSION_ID" == *"404"* || -z "$SUBMISSION_ID" ]]; then + echo "Error in submission, retrying in $RETRY_DELAY seconds..." + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + echo "Submission successful. Submission ID: $SUBMISSION_ID" + SUBMISSION_IDS+=("$SUBMISSION_ID") + break + done + done + + echo "All jobs have been submitted. Starting to poll for statuses..." + + # Continue with polling and output retrieval... + + # 2. After all submissions are done, start polling for statuses of all jobs + for SUBMISSION_ID in "${SUBMISSION_IDS[@]}"; do + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + echo "Polling for Submission ID: $SUBMISSION_ID" + RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py poll_job_status \ + --submission_id "$SUBMISSION_ID" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE") + + if [ -z "$RESPONSE" ]; then + echo "Failed to retrieve Workflow IDs for submission: $SUBMISSION_ID" + OVERALL_SUCCESS=false + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + WORKFLOW_STATUSES_FOR_SUBMISSION=$(echo "$RESPONSE" | jq -r 'to_entries | map(.key + " | " + .value) | .[]') + WORKFLOW_STATUSES["$SUBMISSION_ID"]="$WORKFLOW_STATUSES_FOR_SUBMISSION" + + # Check if any workflow failed or errored + FAILED_WORKFLOWS=$(echo "$RESPONSE" | jq -r 'to_entries | .[] | select(.value == "Failed" or .value == "Aborted" or .value == "Aborting") | .key') + if [ ! -z "$FAILED_WORKFLOWS" ]; then + echo "Failed workflows detected:" + echo "$FAILED_WORKFLOWS" + OVERALL_SUCCESS=false + fi + + # retrieve workflow outputs + echo "Retrieving workflow outputs for Submission ID: $SUBMISSION_ID..." + for WORKFLOW_ID in $(echo "$RESPONSE" | jq -r 'keys[]'); do + WORKFLOW_OUTPUT=$(python3 scripts/firecloud_api/firecloud_api.py get_workflow_outputs \ + --user "$USER" \ + --sa-json-b64 "$SA_JSON_B64" \ + --submission_id "$SUBMISSION_ID" \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --workflow_id "$WORKFLOW_ID" \ + --pipeline_name "$PIPELINE_NAME") + ALL_OUTPUTS+="$WORKFLOW_OUTPUT"$'\n' + done + break + done + done + + # Generate the final summary after all processing is complete + FINAL_SUMMARY="## Combined Workflow Statuses\n\n" + + # Add all workflow statuses to the summary + for SUBMISSION_ID in "${!WORKFLOW_STATUSES[@]}"; do + # Generate the Terra URL for the submission + SUBMISSION_URL="https://app.terra.bio/#workspaces/$WORKSPACE_NAMESPACE/WARP%20Tests/job_history/$SUBMISSION_ID" + + # Add the Submission ID as a hyperlink + FINAL_SUMMARY+="[Submission ID: $SUBMISSION_ID]($SUBMISSION_URL)\n" + + # Add the workflows and statuses for this submission + FINAL_SUMMARY+="${WORKFLOW_STATUSES[$SUBMISSION_ID]}\n\n" + done + + # Write the complete summary once at the end + echo -e "$FINAL_SUMMARY" >> $GITHUB_STEP_SUMMARY + + # Exit with error if any workflows failed + if [ "$OVERALL_SUCCESS" = false ]; then + echo "" + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "One or more workflows failed in Terra. Check the workflow status summary for details." + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "" + exit 1 + fi + + + + # Step 12: Cleanup + # Purpose: Ensures cleanup of Terra method configurations regardless of test outcome + - name: Delete Method Configuration + if: always() # Ensures it runs regardless of success or failure + run: | + echo "Deleting method configuration for branch: $BRANCH_NAME" + DELETE_RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py delete_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --method_config_name "${PIPELINE_NAME}_${testType}_${BRANCH_NAME}") + echo "Delete response: $DELETE_RESPONSE" + if [ "$DELETE_RESPONSE" == "True" ]; then + echo "Method configuration deleted successfully." + else + echo "Error: Method configuration deletion failed." + exit 1 + fi + + + # Step 13: Print Summary on Success + # Purpose: Prints the final summary of the pipeline execution in case of success + - name: Print Summary on Success + if: success() + run: | + echo "# :white_check_mark: Pipeline Execution Summary :white_check_mark:" >> $GITHUB_STEP_SUMMARY + + # Step 14: Print Summary on Failure + # Purpose: Prints the final summary of the pipeline execution in case of failure + - name: Print Summary on Failure + if: failure() + run: | + echo "# :x: Pipeline Execution Summary (on Failure) :x: " >> $GITHUB_STEP_SUMMARY \ No newline at end of file diff --git a/.github/workflows/test_rna_with_umis.yml b/.github/workflows/test_rna_with_umis.yml new file mode 100644 index 0000000000..3682af62c5 --- /dev/null +++ b/.github/workflows/test_rna_with_umis.yml @@ -0,0 +1,468 @@ +name: Test RNA with UMIs + +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + - 'pipelines/broad/rna_seq/**' + - 'tasks/broad/UMIAwareDuplicateMarking.wdl' + - 'tasks/broad/RNAWithUMIsTasks.wdl' + - 'tasks/broad/Utilities.wdl' + - 'verification/VerifyRNAWithUMIs.wdl' + - 'verification/VerifyMetrics.wdl' + - 'verification/VerifyTasks.wdl' + - 'verification/test-wdls/TestRNAWithUMIsPipeline.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + - '.github/workflows/test_rna_with_umis.yml' + + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + + +env: + # pipeline configuration + PIPELINE_NAME: TestRNAWithUMIsPipeline + DOCKSTORE_PIPELINE_NAME: RNAWithUMIsPipeline + PIPELINE_DIR: "pipelines/broad/rna_seq" + + # workspace configuration + TESTING_WORKSPACE: WARP Tests + WORKSPACE_NAMESPACE: warp-pipelines + + # service account configuration + SA_JSON_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + USER: pdt-tester@warp-pipeline-dev.iam.gserviceaccount.com + + +jobs: + TestRNAWithUMIsPipeline: + runs-on: ubuntu-latest + # Add "id-token" with the intended permissions. + permissions: + contents: 'read' + id-token: 'write' + actions: write + + steps: + # Step 1: Checkout code + # Purpose: Clones the repository code at the specified reference + - uses: actions/checkout@v3 + with: + ref: ${{ github.ref }} + + # Step 2: Setup Python + # Purpose: Installs Python 3.11 for running pipeline scripts + - name: Set up python + id: setup-python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + # Step 3: Install Dependencies + # Purpose: Installs required Python packages for the pipeline + - name: Install dependencies + run: | + cd scripts/firecloud_api/ + pip install -r requirements.txt + + # Step 4: Set Branch Name + # Purpose: Determines and sets the correct branch name for either PR or direct commits + - name: Set Branch Name + id: set_branch + run: | + if [ -z "${{ github.head_ref }}" ]; then + echo "Branch name is missing, using ${GITHUB_REF##*/}" + echo "BRANCH_NAME=${GITHUB_REF##*/}" >> $GITHUB_ENV + else + echo "Branch name from PR: ${{ github.head_ref }}" + echo "BRANCH_NAME=${{ github.head_ref }}" >> $GITHUB_ENV + fi + + # Step 5: Set Test Type + # Purpose: Determines and sets the correct test type based on the branch name + - name: Set Test Type + id: set_test_type + run: | + if [ "${{ github.event_name }}" == "pull_request" ]; then + # For PRs, set based on target branch + if [ "${{ github.base_ref }}" == "master" ]; then + # If PR is targeting master branch, run Scientific tests + echo "testType=Scientific" >> $GITHUB_ENV + echo "testType=Scientific" + else + # If PR targets any other branch (develop, staging), run Plumbing tests + echo "testType=Plumbing" >> $GITHUB_ENV + echo "testType=Plumbing" + fi + else + # For manual workflow runs (workflow_dispatch) + echo "testType=${{ github.event.inputs.testType }}" >> $GITHUB_ENV + echo "testType=${{ github.event.inputs.testType }}" + fi + + # Step 6: Create Method Configuration + # Purpose: Sets up the testing configuration in Terra workspace + - name: Create new method configuration + run: | + echo "Creating new method configuration for branch: $BRANCH_NAME" + + METHOD_CONFIG_NAME=$(python3 scripts/firecloud_api/firecloud_api.py \ + create_new_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER") + + echo "METHOD_CONFIG_NAME=$METHOD_CONFIG_NAME" >> $GITHUB_ENV + + # Step 7: Cancel Previous Runs + # Purpose: Cancels previous GHA workflows from the same branch (regardless of plumbing or scientific test type) + # to avoid running multiple tests at the same time + - name: Cancel Previous GHA Runs + uses: styfle/cancel-workflow-action@0.11.0 + with: + access_token: ${{ github.token }} + all_but_latest: true + ignore_sha: true + + # Step 8: Cancel Previous Terra Submissions + # Purpose: Abort previous Terra submissions from the same branch to avoid running multiple tests at the same time + # Will not abort a Terra submission if it is a scientific test + - name: Cancel Previous Terra Submissions + if: ${{ !contains(env.METHOD_CONFIG_NAME, '_Scientific_') }} + run: | + python3 scripts/firecloud_api/firecloud_api.py \ + --workspace-namespace "${{ env.WORKSPACE_NAMESPACE }}" \ + --workspace-name "${{ env.TESTING_WORKSPACE }}" \ + --pipeline_name "${{ env.PIPELINE_NAME }}" \ + --branch_name "${{ env.BRANCH_NAME }}" \ + --sa-json-b64 "${{ secrets.PDT_TESTER_SA_B64 }}" \ + --user "${{ env.USER }}" \ + --test_type "$testType" \ + cancel_old_submissions + + # Step 9: Handle Git Commit Hash + # Purpose: Gets the correct Github commit hash for version tracking + - name: Determine Github Commit Hash + id: determine_github_commit_hash + run: | + if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then + echo "Using github.sha for manually triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.sha }}" >> $GITHUB_ENV + elif [ "${{ github.event_name }}" == "pull_request" ]; then + echo "Using github.event.pull_request.head.sha for PR-triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV + else + echo "Unsupported event type: ${{ github.event_name }}" + exit 1 + fi + + # Step 10: Compare Hashes + # Purpose: Compares the Dockstore and Github commit hashes to ensure they match + - name: Compare Dockstore and Github Commit Hashes with Retry + id: compare_hashes + run: | + + # Wait 5.5 minutes for Dockstore to update + sleep 330 + + MAX_WAIT_TIME=$((15 * 60)) # 15 minutes in seconds + WAIT_INTERVAL=60 # 1 minute in seconds + TOTAL_WAITED=0 + + echo "Starting hash comparison with retry mechanism..." + + while [ $TOTAL_WAITED -lt $MAX_WAIT_TIME ]; do + echo "Fetching Dockstore Commit Hash..." + DOCKSTORE_COMMIT_HASH=$(python scripts/dockstore_api/fetch_dockstore_commit.py \ + $DOCKSTORE_TOKEN \ + $DOCKSTORE_PIPELINE_NAME \ + $BRANCH_NAME) + echo "Fetched Dockstore Commit Hash: $DOCKSTORE_COMMIT_HASH" + + echo "GitHub Commit Hash: $GITHUB_COMMIT_HASH" + + if [ "$DOCKSTORE_COMMIT_HASH" == "$GITHUB_COMMIT_HASH" ]; then + echo "Success: The Dockstore Commit Hash matches the GitHub Commit Hash." + exit 0 + else + echo "Mismatch found: $DOCKSTORE_COMMIT_HASH != $GITHUB_COMMIT_HASH" + echo "Retrying in $WAIT_INTERVAL seconds..." + sleep $WAIT_INTERVAL + TOTAL_WAITED=$((TOTAL_WAITED + WAIT_INTERVAL)) + fi + done + + echo "Error: The Dockstore Commit Hash does not match the GitHub Commit Hash after 15 minutes of retries!" + exit 1 + + env: + GITHUB_COMMIT_HASH: ${{ env.GITHUB_COMMIT_HASH }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} + + # Step 11: Run Tests + # Purpose: Main testing step - runs the pipeline and collects results + - name: Update test inputs, Upload to Terra, Submit, Monitor and Retrieve Outputs + run: | + UPDATE_TRUTH="${{ github.event.inputs.updateTruth || 'false' }}" + USE_CALL_CACHE="${{ github.event.inputs.useCallCache || 'true' }}" + TRUTH_BRANCH="${{ github.event.inputs.truthBranch || 'master' }}" + CURRENT_TIME=$(date +"%Y-%m-%d-%H-%M-%S") + MAX_RETRIES=2 + RETRY_DELAY=300 # 300 seconds = 5 minutes + # Initialize variables to aggregate statuses and outputs + ALL_WORKFLOW_STATUSES="Workflow ID | Status"$'\n'"--- | ---" + ALL_OUTPUTS="" + # Initialize arrays to track submission and workflow statuses + declare -a SUBMISSION_IDS + declare -A WORKFLOW_STATUSES + OVERALL_SUCCESS=true + + # Convert UPDATE_TRUTH and USE_CALL_CACHE to a boolean-friendly format ("true" -> true, "false" -> false) + if [ "$UPDATE_TRUTH" = "true" ]; then + UPDATE_TRUTH_BOOL=true + else + UPDATE_TRUTH_BOOL=false + fi + + if [ "$USE_CALL_CACHE" == "true" ]; then + USE_CALL_CACHE_BOOL=true + else + USE_CALL_CACHE_BOOL=false + fi + + TEST_TYPE="${{ env.testType }}" + INPUTS_DIR="$PIPELINE_DIR/test_inputs/$TEST_TYPE" + echo "Running tests with test type: $TEST_TYPE" + + TRUTH_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/truth/$(echo "$TEST_TYPE" | tr '[:upper:]' '[:lower:]')/$TRUTH_BRANCH" + echo "Truth path: $TRUTH_PATH" + RESULTS_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/results/$CURRENT_TIME" + + + + # 1. Submit all jobs first and store their submission IDs + for input_file in "$INPUTS_DIR"/*.json; do + test_input_file=$(python3 scripts/firecloud_api/UpdateTestInputs.py --truth_path "$TRUTH_PATH" \ + --results_path "$RESULTS_PATH" \ + --inputs_json "$input_file" \ + --update_truth "$UPDATE_TRUTH_BOOL" \ + --branch_name "$BRANCH_NAME" ) + echo "Uploading the test input file: $test_input_file" + + # Create the submission_data.json file for this input_file + input_file_filename=$(basename $input_file) + SUBMISSION_DATA_FILE="submission_data.json" + printf '{ + "methodConfigurationNamespace": "%s", + "methodConfigurationName": "%s_%s_%s", + "useCallCache": %s, + "deleteIntermediateOutputFiles": false, + "useReferenceDisks": true, + "memoryRetryMultiplier": 1.2, + "workflowFailureMode": "NoNewCalls", + "userComment": "%s", + "ignoreEmptyOutputs": false + }' "$WORKSPACE_NAMESPACE" "$PIPELINE_NAME" "$TEST_TYPE" "$BRANCH_NAME" "$USE_CALL_CACHE_BOOL" "$input_file_filename" > "$SUBMISSION_DATA_FILE" + + echo "Created submission data file: $SUBMISSION_DATA_FILE" + cat "$SUBMISSION_DATA_FILE" + + # Upload test input file + python3 scripts/firecloud_api/firecloud_api.py \ + upload_test_inputs \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --test_input_file "$test_input_file" \ + --branch_name "$BRANCH_NAME" \ + --sa-json-b64 "$SA_JSON_B64" \ + --test_type "$TEST_TYPE" \ + --user "$USER" + + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + SUBMISSION_ID=$(python3 scripts/firecloud_api/firecloud_api.py submit_job \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --submission_data_file "$SUBMISSION_DATA_FILE") + + echo "Submission ID: $SUBMISSION_ID" + + if [[ "$SUBMISSION_ID" == *"404"* || -z "$SUBMISSION_ID" ]]; then + echo "Error in submission, retrying in $RETRY_DELAY seconds..." + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + echo "Submission successful. Submission ID: $SUBMISSION_ID" + SUBMISSION_IDS+=("$SUBMISSION_ID") + break + done + done + + echo "All jobs have been submitted. Starting to poll for statuses..." + + # Continue with polling and output retrieval... + + # 2. After all submissions are done, start polling for statuses of all jobs + for SUBMISSION_ID in "${SUBMISSION_IDS[@]}"; do + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + echo "Polling for Submission ID: $SUBMISSION_ID" + RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py poll_job_status \ + --submission_id "$SUBMISSION_ID" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE") + + if [ -z "$RESPONSE" ]; then + echo "Failed to retrieve Workflow IDs for submission: $SUBMISSION_ID" + OVERALL_SUCCESS=false + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + WORKFLOW_STATUSES_FOR_SUBMISSION=$(echo "$RESPONSE" | jq -r 'to_entries | map(.key + " | " + .value) | .[]') + WORKFLOW_STATUSES["$SUBMISSION_ID"]="$WORKFLOW_STATUSES_FOR_SUBMISSION" + + # Check if any workflow failed or errored + FAILED_WORKFLOWS=$(echo "$RESPONSE" | jq -r 'to_entries | .[] | select(.value == "Failed" or .value == "Aborted" or .value == "Aborting") | .key') + if [ ! -z "$FAILED_WORKFLOWS" ]; then + echo "Failed workflows detected:" + echo "$FAILED_WORKFLOWS" + OVERALL_SUCCESS=false + fi + + # retrieve workflow outputs + echo "Retrieving workflow outputs for Submission ID: $SUBMISSION_ID..." + for WORKFLOW_ID in $(echo "$RESPONSE" | jq -r 'keys[]'); do + WORKFLOW_OUTPUT=$(python3 scripts/firecloud_api/firecloud_api.py get_workflow_outputs \ + --user "$USER" \ + --sa-json-b64 "$SA_JSON_B64" \ + --submission_id "$SUBMISSION_ID" \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --workflow_id "$WORKFLOW_ID" \ + --pipeline_name "$PIPELINE_NAME") + ALL_OUTPUTS+="$WORKFLOW_OUTPUT"$'\n' + done + break + done + done + + # Generate the final summary after all processing is complete + FINAL_SUMMARY="## Combined Workflow Statuses\n\n" + + # Add all workflow statuses to the summary + for SUBMISSION_ID in "${!WORKFLOW_STATUSES[@]}"; do + # Generate the Terra URL for the submission + SUBMISSION_URL="https://app.terra.bio/#workspaces/$WORKSPACE_NAMESPACE/WARP%20Tests/job_history/$SUBMISSION_ID" + + # Add the Submission ID as a hyperlink + FINAL_SUMMARY+="[Submission ID: $SUBMISSION_ID]($SUBMISSION_URL)\n" + + # Add the workflows and statuses for this submission + FINAL_SUMMARY+="${WORKFLOW_STATUSES[$SUBMISSION_ID]}\n\n" + done + + # Write the complete summary once at the end + echo -e "$FINAL_SUMMARY" >> $GITHUB_STEP_SUMMARY + + # Exit with error if any workflows failed + if [ "$OVERALL_SUCCESS" = false ]; then + echo "" + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "One or more workflows failed in Terra. Check the workflow status summary for details." + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "" + exit 1 + fi + + + + # Step 12: Cleanup + # Purpose: Ensures cleanup of Terra method configurations regardless of test outcome + - name: Delete Method Configuration + if: always() # Ensures it runs regardless of success or failure + run: | + echo "Deleting method configuration for branch: $BRANCH_NAME" + DELETE_RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py delete_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --method_config_name "${PIPELINE_NAME}_${testType}_${BRANCH_NAME}") + echo "Delete response: $DELETE_RESPONSE" + if [ "$DELETE_RESPONSE" == "True" ]; then + echo "Method configuration deleted successfully." + else + echo "Error: Method configuration deletion failed." + exit 1 + fi + + + # Step 13: Print Summary on Success + # Purpose: Prints the final summary of the pipeline execution in case of success + - name: Print Summary on Success + if: success() + run: | + echo "# :white_check_mark: Pipeline Execution Summary :white_check_mark:" >> $GITHUB_STEP_SUMMARY + + # Step 14: Print Summary on Failure + # Purpose: Prints the final summary of the pipeline execution in case of failure + - name: Print Summary on Failure + if: failure() + run: | + echo "# :x: Pipeline Execution Summary (on Failure) :x: " >> $GITHUB_STEP_SUMMARY \ No newline at end of file diff --git a/.github/workflows/test_slideseq.yml b/.github/workflows/test_slideseq.yml new file mode 100644 index 0000000000..bcef3776cc --- /dev/null +++ b/.github/workflows/test_slideseq.yml @@ -0,0 +1,471 @@ +name: Test Slide Seq + +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + - 'pipelines/skylab/slideseq/**' + - 'tasks/skylab/StarAlign.wdl' + - 'tasks/skylab/FastqProcessing.wdl' + - 'tasks/skylab/Metrics.wdl' + - 'tasks/skylab/H5adUtils.wdl' + - 'tasks/skylab/CheckInputs.wdl' + - 'tasks/skylab/MergeSortBam.wdl' + - 'tasks/broad/Utilities.wdl' + - 'verification/VerifySlideSeq.wdl' + - 'verification/VerifyTasks.wdl' + - 'verification/test-wdls/TestSlideSeq.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + - '.github/workflows/test_slideseq.yml' + + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + + +env: + # pipeline configuration + PIPELINE_NAME: TestSlideSeq + DOCKSTORE_PIPELINE_NAME: SlideSeq + PIPELINE_DIR: "pipelines/skylab/slideseq" + + # workspace configuration + TESTING_WORKSPACE: WARP Tests + WORKSPACE_NAMESPACE: warp-pipelines + + # service account configuration + SA_JSON_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + USER: pdt-tester@warp-pipeline-dev.iam.gserviceaccount.com + + +jobs: + TestSlideSeq: + runs-on: ubuntu-latest + # Add "id-token" with the intended permissions. + permissions: + contents: 'read' + id-token: 'write' + actions: write + + steps: + # Step 1: Checkout code + # Purpose: Clones the repository code at the specified reference + - uses: actions/checkout@v3 + with: + ref: ${{ github.ref }} + + # Step 2: Setup Python + # Purpose: Installs Python 3.11 for running pipeline scripts + - name: Set up python + id: setup-python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + # Step 3: Install Dependencies + # Purpose: Installs required Python packages for the pipeline + - name: Install dependencies + run: | + cd scripts/firecloud_api/ + pip install -r requirements.txt + + # Step 4: Set Branch Name + # Purpose: Determines and sets the correct branch name for either PR or direct commits + - name: Set Branch Name + id: set_branch + run: | + if [ -z "${{ github.head_ref }}" ]; then + echo "Branch name is missing, using ${GITHUB_REF##*/}" + echo "BRANCH_NAME=${GITHUB_REF##*/}" >> $GITHUB_ENV + else + echo "Branch name from PR: ${{ github.head_ref }}" + echo "BRANCH_NAME=${{ github.head_ref }}" >> $GITHUB_ENV + fi + + # Step 5: Set Test Type + # Purpose: Determines and sets the correct test type based on the branch name + - name: Set Test Type + id: set_test_type + run: | + if [ "${{ github.event_name }}" == "pull_request" ]; then + # For PRs, set based on target branch + if [ "${{ github.base_ref }}" == "master" ]; then + # If PR is targeting master branch, run Scientific tests + echo "testType=Scientific" >> $GITHUB_ENV + echo "testType=Scientific" + else + # If PR targets any other branch (develop, staging), run Plumbing tests + echo "testType=Plumbing" >> $GITHUB_ENV + echo "testType=Plumbing" + fi + else + # For manual workflow runs (workflow_dispatch) + echo "testType=${{ github.event.inputs.testType }}" >> $GITHUB_ENV + echo "testType=${{ github.event.inputs.testType }}" + fi + + # Step 6: Create Method Configuration + # Purpose: Sets up the testing configuration in Terra workspace + - name: Create new method configuration + run: | + echo "Creating new method configuration for branch: $BRANCH_NAME" + + METHOD_CONFIG_NAME=$(python3 scripts/firecloud_api/firecloud_api.py \ + create_new_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER") + + echo "METHOD_CONFIG_NAME=$METHOD_CONFIG_NAME" >> $GITHUB_ENV + + # Step 7: Cancel Previous Runs + # Purpose: Cancels previous GHA workflows from the same branch (regardless of plumbing or scientific test type) + # to avoid running multiple tests at the same time + - name: Cancel Previous GHA Runs + uses: styfle/cancel-workflow-action@0.11.0 + with: + access_token: ${{ github.token }} + all_but_latest: true + ignore_sha: true + + # Step 8: Cancel Previous Terra Submissions + # Purpose: Abort previous Terra submissions from the same branch to avoid running multiple tests at the same time + # Will not abort a Terra submission if it is a scientific test + - name: Cancel Previous Terra Submissions + if: ${{ !contains(env.METHOD_CONFIG_NAME, '_Scientific_') }} + run: | + python3 scripts/firecloud_api/firecloud_api.py \ + --workspace-namespace "${{ env.WORKSPACE_NAMESPACE }}" \ + --workspace-name "${{ env.TESTING_WORKSPACE }}" \ + --pipeline_name "${{ env.PIPELINE_NAME }}" \ + --branch_name "${{ env.BRANCH_NAME }}" \ + --sa-json-b64 "${{ secrets.PDT_TESTER_SA_B64 }}" \ + --user "${{ env.USER }}" \ + --test_type "$testType" \ + cancel_old_submissions + + # Step 9: Handle Git Commit Hash + # Purpose: Gets the correct Github commit hash for version tracking + - name: Determine Github Commit Hash + id: determine_github_commit_hash + run: | + if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then + echo "Using github.sha for manually triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.sha }}" >> $GITHUB_ENV + elif [ "${{ github.event_name }}" == "pull_request" ]; then + echo "Using github.event.pull_request.head.sha for PR-triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV + else + echo "Unsupported event type: ${{ github.event_name }}" + exit 1 + fi + + # Step 10: Compare Hashes + # Purpose: Compares the Dockstore and Github commit hashes to ensure they match + - name: Compare Dockstore and Github Commit Hashes with Retry + id: compare_hashes + run: | + + # Wait 5.5 minutes for Dockstore to update + sleep 330 + + MAX_WAIT_TIME=$((15 * 60)) # 15 minutes in seconds + WAIT_INTERVAL=60 # 1 minute in seconds + TOTAL_WAITED=0 + + echo "Starting hash comparison with retry mechanism..." + + while [ $TOTAL_WAITED -lt $MAX_WAIT_TIME ]; do + echo "Fetching Dockstore Commit Hash..." + DOCKSTORE_COMMIT_HASH=$(python scripts/dockstore_api/fetch_dockstore_commit.py \ + $DOCKSTORE_TOKEN \ + $DOCKSTORE_PIPELINE_NAME \ + $BRANCH_NAME) + echo "Fetched Dockstore Commit Hash: $DOCKSTORE_COMMIT_HASH" + + echo "GitHub Commit Hash: $GITHUB_COMMIT_HASH" + + if [ "$DOCKSTORE_COMMIT_HASH" == "$GITHUB_COMMIT_HASH" ]; then + echo "Success: The Dockstore Commit Hash matches the GitHub Commit Hash." + exit 0 + else + echo "Mismatch found: $DOCKSTORE_COMMIT_HASH != $GITHUB_COMMIT_HASH" + echo "Retrying in $WAIT_INTERVAL seconds..." + sleep $WAIT_INTERVAL + TOTAL_WAITED=$((TOTAL_WAITED + WAIT_INTERVAL)) + fi + done + + echo "Error: The Dockstore Commit Hash does not match the GitHub Commit Hash after 15 minutes of retries!" + exit 1 + + env: + GITHUB_COMMIT_HASH: ${{ env.GITHUB_COMMIT_HASH }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} + + # Step 11: Run Tests + # Purpose: Main testing step - runs the pipeline and collects results + - name: Update test inputs, Upload to Terra, Submit, Monitor and Retrieve Outputs + run: | + UPDATE_TRUTH="${{ github.event.inputs.updateTruth || 'false' }}" + USE_CALL_CACHE="${{ github.event.inputs.useCallCache || 'true' }}" + TRUTH_BRANCH="${{ github.event.inputs.truthBranch || 'master' }}" + CURRENT_TIME=$(date +"%Y-%m-%d-%H-%M-%S") + MAX_RETRIES=2 + RETRY_DELAY=300 # 300 seconds = 5 minutes + # Initialize variables to aggregate statuses and outputs + ALL_WORKFLOW_STATUSES="Workflow ID | Status"$'\n'"--- | ---" + ALL_OUTPUTS="" + # Initialize arrays to track submission and workflow statuses + declare -a SUBMISSION_IDS + declare -A WORKFLOW_STATUSES + OVERALL_SUCCESS=true + + # Convert UPDATE_TRUTH and USE_CALL_CACHE to a boolean-friendly format ("true" -> true, "false" -> false) + if [ "$UPDATE_TRUTH" = "true" ]; then + UPDATE_TRUTH_BOOL=true + else + UPDATE_TRUTH_BOOL=false + fi + + if [ "$USE_CALL_CACHE" == "true" ]; then + USE_CALL_CACHE_BOOL=true + else + USE_CALL_CACHE_BOOL=false + fi + + TEST_TYPE="${{ env.testType }}" + INPUTS_DIR="$PIPELINE_DIR/test_inputs/$TEST_TYPE" + echo "Running tests with test type: $TEST_TYPE" + + TRUTH_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/truth/$(echo "$TEST_TYPE" | tr '[:upper:]' '[:lower:]')/$TRUTH_BRANCH" + echo "Truth path: $TRUTH_PATH" + RESULTS_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/results/$CURRENT_TIME" + + + + # 1. Submit all jobs first and store their submission IDs + for input_file in "$INPUTS_DIR"/*.json; do + test_input_file=$(python3 scripts/firecloud_api/UpdateTestInputs.py --truth_path "$TRUTH_PATH" \ + --results_path "$RESULTS_PATH" \ + --inputs_json "$input_file" \ + --update_truth "$UPDATE_TRUTH_BOOL" \ + --branch_name "$BRANCH_NAME" ) + echo "Uploading the test input file: $test_input_file" + + # Create the submission_data.json file for this input_file + input_file_filename=$(basename $input_file) + SUBMISSION_DATA_FILE="submission_data.json" + printf '{ + "methodConfigurationNamespace": "%s", + "methodConfigurationName": "%s_%s_%s", + "useCallCache": %s, + "deleteIntermediateOutputFiles": false, + "useReferenceDisks": true, + "memoryRetryMultiplier": 1.2, + "workflowFailureMode": "NoNewCalls", + "userComment": "%s", + "ignoreEmptyOutputs": false + }' "$WORKSPACE_NAMESPACE" "$PIPELINE_NAME" "$TEST_TYPE" "$BRANCH_NAME" "$USE_CALL_CACHE_BOOL" "$input_file_filename" > "$SUBMISSION_DATA_FILE" + + echo "Created submission data file: $SUBMISSION_DATA_FILE" + cat "$SUBMISSION_DATA_FILE" + + # Upload test input file + python3 scripts/firecloud_api/firecloud_api.py \ + upload_test_inputs \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --test_input_file "$test_input_file" \ + --branch_name "$BRANCH_NAME" \ + --sa-json-b64 "$SA_JSON_B64" \ + --test_type "$TEST_TYPE" \ + --user "$USER" + + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + SUBMISSION_ID=$(python3 scripts/firecloud_api/firecloud_api.py submit_job \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --submission_data_file "$SUBMISSION_DATA_FILE") + + echo "Submission ID: $SUBMISSION_ID" + + if [[ "$SUBMISSION_ID" == *"404"* || -z "$SUBMISSION_ID" ]]; then + echo "Error in submission, retrying in $RETRY_DELAY seconds..." + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + echo "Submission successful. Submission ID: $SUBMISSION_ID" + SUBMISSION_IDS+=("$SUBMISSION_ID") + break + done + done + + echo "All jobs have been submitted. Starting to poll for statuses..." + + # Continue with polling and output retrieval... + + # 2. After all submissions are done, start polling for statuses of all jobs + for SUBMISSION_ID in "${SUBMISSION_IDS[@]}"; do + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + echo "Polling for Submission ID: $SUBMISSION_ID" + RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py poll_job_status \ + --submission_id "$SUBMISSION_ID" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE") + + if [ -z "$RESPONSE" ]; then + echo "Failed to retrieve Workflow IDs for submission: $SUBMISSION_ID" + OVERALL_SUCCESS=false + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + WORKFLOW_STATUSES_FOR_SUBMISSION=$(echo "$RESPONSE" | jq -r 'to_entries | map(.key + " | " + .value) | .[]') + WORKFLOW_STATUSES["$SUBMISSION_ID"]="$WORKFLOW_STATUSES_FOR_SUBMISSION" + + # Check if any workflow failed or errored + FAILED_WORKFLOWS=$(echo "$RESPONSE" | jq -r 'to_entries | .[] | select(.value == "Failed" or .value == "Aborted" or .value == "Aborting") | .key') + if [ ! -z "$FAILED_WORKFLOWS" ]; then + echo "Failed workflows detected:" + echo "$FAILED_WORKFLOWS" + OVERALL_SUCCESS=false + fi + + # retrieve workflow outputs + echo "Retrieving workflow outputs for Submission ID: $SUBMISSION_ID..." + for WORKFLOW_ID in $(echo "$RESPONSE" | jq -r 'keys[]'); do + WORKFLOW_OUTPUT=$(python3 scripts/firecloud_api/firecloud_api.py get_workflow_outputs \ + --user "$USER" \ + --sa-json-b64 "$SA_JSON_B64" \ + --submission_id "$SUBMISSION_ID" \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --workflow_id "$WORKFLOW_ID" \ + --pipeline_name "$PIPELINE_NAME") + ALL_OUTPUTS+="$WORKFLOW_OUTPUT"$'\n' + done + break + done + done + + # Generate the final summary after all processing is complete + FINAL_SUMMARY="## Combined Workflow Statuses\n\n" + + # Add all workflow statuses to the summary + for SUBMISSION_ID in "${!WORKFLOW_STATUSES[@]}"; do + # Generate the Terra URL for the submission + SUBMISSION_URL="https://app.terra.bio/#workspaces/$WORKSPACE_NAMESPACE/WARP%20Tests/job_history/$SUBMISSION_ID" + + # Add the Submission ID as a hyperlink + FINAL_SUMMARY+="[Submission ID: $SUBMISSION_ID]($SUBMISSION_URL)\n" + + # Add the workflows and statuses for this submission + FINAL_SUMMARY+="${WORKFLOW_STATUSES[$SUBMISSION_ID]}\n\n" + done + + # Write the complete summary once at the end + echo -e "$FINAL_SUMMARY" >> $GITHUB_STEP_SUMMARY + + # Exit with error if any workflows failed + if [ "$OVERALL_SUCCESS" = false ]; then + echo "" + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "One or more workflows failed in Terra. Check the workflow status summary for details." + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "" + exit 1 + fi + + + + # Step 12: Cleanup + # Purpose: Ensures cleanup of Terra method configurations regardless of test outcome + - name: Delete Method Configuration + if: always() # Ensures it runs regardless of success or failure + run: | + echo "Deleting method configuration for branch: $BRANCH_NAME" + DELETE_RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py delete_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --method_config_name "${PIPELINE_NAME}_${testType}_${BRANCH_NAME}") + echo "Delete response: $DELETE_RESPONSE" + if [ "$DELETE_RESPONSE" == "True" ]; then + echo "Method configuration deleted successfully." + else + echo "Error: Method configuration deletion failed." + exit 1 + fi + + + # Step 13: Print Summary on Success + # Purpose: Prints the final summary of the pipeline execution in case of success + - name: Print Summary on Success + if: success() + run: | + echo "# :white_check_mark: Pipeline Execution Summary :white_check_mark:" >> $GITHUB_STEP_SUMMARY + + # Step 14: Print Summary on Failure + # Purpose: Prints the final summary of the pipeline execution in case of failure + - name: Print Summary on Failure + if: failure() + run: | + echo "# :x: Pipeline Execution Summary (on Failure) :x: " >> $GITHUB_STEP_SUMMARY \ No newline at end of file diff --git a/.github/workflows/test_snm3c.yml b/.github/workflows/test_snm3c.yml new file mode 100644 index 0000000000..e1a8323eff --- /dev/null +++ b/.github/workflows/test_snm3c.yml @@ -0,0 +1,464 @@ +name: Test snm3C + +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + - 'pipelines/skylab/snm3C/**' + - 'verification/Verifysnm3C.wdl' + - 'tasks/broad/Utilities.wdl' + - 'verification/test-wdls/Testsnm3C.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + - '.github/workflows/test_snm3c.yml' + + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + + +env: + # pipeline configuration + PIPELINE_NAME: Testsnm3C + DOCKSTORE_PIPELINE_NAME: snm3C-seq + PIPELINE_DIR: "pipelines/skylab/snm3C" + + # workspace configuration + TESTING_WORKSPACE: WARP Tests + WORKSPACE_NAMESPACE: warp-pipelines + + # service account configuration + SA_JSON_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + USER: pdt-tester@warp-pipeline-dev.iam.gserviceaccount.com + + +jobs: + Testsnm3C: + runs-on: ubuntu-latest + # Add "id-token" with the intended permissions. + permissions: + contents: 'read' + id-token: 'write' + actions: write + + steps: + # Step 1: Checkout code + # Purpose: Clones the repository code at the specified reference + - uses: actions/checkout@v3 + with: + ref: ${{ github.ref }} + + # Step 2: Setup Python + # Purpose: Installs Python 3.11 for running pipeline scripts + - name: Set up python + id: setup-python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + # Step 3: Install Dependencies + # Purpose: Installs required Python packages for the pipeline + - name: Install dependencies + run: | + cd scripts/firecloud_api/ + pip install -r requirements.txt + + # Step 4: Set Branch Name + # Purpose: Determines and sets the correct branch name for either PR or direct commits + - name: Set Branch Name + id: set_branch + run: | + if [ -z "${{ github.head_ref }}" ]; then + echo "Branch name is missing, using ${GITHUB_REF##*/}" + echo "BRANCH_NAME=${GITHUB_REF##*/}" >> $GITHUB_ENV + else + echo "Branch name from PR: ${{ github.head_ref }}" + echo "BRANCH_NAME=${{ github.head_ref }}" >> $GITHUB_ENV + fi + + # Step 5: Set Test Type + # Purpose: Determines and sets the correct test type based on the branch name + - name: Set Test Type + id: set_test_type + run: | + if [ "${{ github.event_name }}" == "pull_request" ]; then + # For PRs, set based on target branch + if [ "${{ github.base_ref }}" == "master" ]; then + # If PR is targeting master branch, run Scientific tests + echo "testType=Scientific" >> $GITHUB_ENV + echo "testType=Scientific" + else + # If PR targets any other branch (develop, staging), run Plumbing tests + echo "testType=Plumbing" >> $GITHUB_ENV + echo "testType=Plumbing" + fi + else + # For manual workflow runs (workflow_dispatch) + echo "testType=${{ github.event.inputs.testType }}" >> $GITHUB_ENV + echo "testType=${{ github.event.inputs.testType }}" + fi + + # Step 6: Create Method Configuration + # Purpose: Sets up the testing configuration in Terra workspace + - name: Create new method configuration + run: | + echo "Creating new method configuration for branch: $BRANCH_NAME" + + METHOD_CONFIG_NAME=$(python3 scripts/firecloud_api/firecloud_api.py \ + create_new_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER") + + echo "METHOD_CONFIG_NAME=$METHOD_CONFIG_NAME" >> $GITHUB_ENV + + # Step 7: Cancel Previous Runs + # Purpose: Cancels previous GHA workflows from the same branch (regardless of plumbing or scientific test type) + # to avoid running multiple tests at the same time + - name: Cancel Previous GHA Runs + uses: styfle/cancel-workflow-action@0.11.0 + with: + access_token: ${{ github.token }} + all_but_latest: true + ignore_sha: true + + # Step 8: Cancel Previous Terra Submissions + # Purpose: Abort previous Terra submissions from the same branch to avoid running multiple tests at the same time + # Will not abort a Terra submission if it is a scientific test + - name: Cancel Previous Terra Submissions + if: ${{ !contains(env.METHOD_CONFIG_NAME, '_Scientific_') }} + run: | + python3 scripts/firecloud_api/firecloud_api.py \ + --workspace-namespace "${{ env.WORKSPACE_NAMESPACE }}" \ + --workspace-name "${{ env.TESTING_WORKSPACE }}" \ + --pipeline_name "${{ env.PIPELINE_NAME }}" \ + --branch_name "${{ env.BRANCH_NAME }}" \ + --sa-json-b64 "${{ secrets.PDT_TESTER_SA_B64 }}" \ + --user "${{ env.USER }}" \ + --test_type "$testType" \ + cancel_old_submissions + + # Step 9: Handle Git Commit Hash + # Purpose: Gets the correct Github commit hash for version tracking + - name: Determine Github Commit Hash + id: determine_github_commit_hash + run: | + if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then + echo "Using github.sha for manually triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.sha }}" >> $GITHUB_ENV + elif [ "${{ github.event_name }}" == "pull_request" ]; then + echo "Using github.event.pull_request.head.sha for PR-triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV + else + echo "Unsupported event type: ${{ github.event_name }}" + exit 1 + fi + + # Step 10: Compare Hashes + # Purpose: Compares the Dockstore and Github commit hashes to ensure they match + - name: Compare Dockstore and Github Commit Hashes with Retry + id: compare_hashes + run: | + + # Wait 5.5 minutes for Dockstore to update + sleep 330 + + MAX_WAIT_TIME=$((15 * 60)) # 15 minutes in seconds + WAIT_INTERVAL=60 # 1 minute in seconds + TOTAL_WAITED=0 + + echo "Starting hash comparison with retry mechanism..." + + while [ $TOTAL_WAITED -lt $MAX_WAIT_TIME ]; do + echo "Fetching Dockstore Commit Hash..." + DOCKSTORE_COMMIT_HASH=$(python scripts/dockstore_api/fetch_dockstore_commit.py \ + $DOCKSTORE_TOKEN \ + $DOCKSTORE_PIPELINE_NAME \ + $BRANCH_NAME) + echo "Fetched Dockstore Commit Hash: $DOCKSTORE_COMMIT_HASH" + + echo "GitHub Commit Hash: $GITHUB_COMMIT_HASH" + + if [ "$DOCKSTORE_COMMIT_HASH" == "$GITHUB_COMMIT_HASH" ]; then + echo "Success: The Dockstore Commit Hash matches the GitHub Commit Hash." + exit 0 + else + echo "Mismatch found: $DOCKSTORE_COMMIT_HASH != $GITHUB_COMMIT_HASH" + echo "Retrying in $WAIT_INTERVAL seconds..." + sleep $WAIT_INTERVAL + TOTAL_WAITED=$((TOTAL_WAITED + WAIT_INTERVAL)) + fi + done + + echo "Error: The Dockstore Commit Hash does not match the GitHub Commit Hash after 15 minutes of retries!" + exit 1 + + env: + GITHUB_COMMIT_HASH: ${{ env.GITHUB_COMMIT_HASH }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} + + # Step 11: Run Tests + # Purpose: Main testing step - runs the pipeline and collects results + - name: Update test inputs, Upload to Terra, Submit, Monitor and Retrieve Outputs + run: | + UPDATE_TRUTH="${{ github.event.inputs.updateTruth || 'false' }}" + USE_CALL_CACHE="${{ github.event.inputs.useCallCache || 'true' }}" + TRUTH_BRANCH="${{ github.event.inputs.truthBranch || 'master' }}" + CURRENT_TIME=$(date +"%Y-%m-%d-%H-%M-%S") + MAX_RETRIES=2 + RETRY_DELAY=300 # 300 seconds = 5 minutes + # Initialize variables to aggregate statuses and outputs + ALL_WORKFLOW_STATUSES="Workflow ID | Status"$'\n'"--- | ---" + ALL_OUTPUTS="" + # Initialize arrays to track submission and workflow statuses + declare -a SUBMISSION_IDS + declare -A WORKFLOW_STATUSES + OVERALL_SUCCESS=true + + # Convert UPDATE_TRUTH and USE_CALL_CACHE to a boolean-friendly format ("true" -> true, "false" -> false) + if [ "$UPDATE_TRUTH" = "true" ]; then + UPDATE_TRUTH_BOOL=true + else + UPDATE_TRUTH_BOOL=false + fi + + if [ "$USE_CALL_CACHE" == "true" ]; then + USE_CALL_CACHE_BOOL=true + else + USE_CALL_CACHE_BOOL=false + fi + + TEST_TYPE="${{ env.testType }}" + INPUTS_DIR="$PIPELINE_DIR/test_inputs/$TEST_TYPE" + echo "Running tests with test type: $TEST_TYPE" + + TRUTH_PATH="gs://broad-gotc-test-storage/snm3C/truth/$(echo "$TEST_TYPE" | tr '[:upper:]' '[:lower:]')/$TRUTH_BRANCH" + echo "Truth path: $TRUTH_PATH" + RESULTS_PATH="gs://broad-gotc-test-storage/snm3C/results/$CURRENT_TIME" + + + + # 1. Submit all jobs first and store their submission IDs + for input_file in "$INPUTS_DIR"/*.json; do + test_input_file=$(python3 scripts/firecloud_api/UpdateTestInputs.py --truth_path "$TRUTH_PATH" \ + --results_path "$RESULTS_PATH" \ + --inputs_json "$input_file" \ + --update_truth "$UPDATE_TRUTH_BOOL" \ + --branch_name "$BRANCH_NAME" ) + echo "Uploading the test input file: $test_input_file" + + # Create the submission_data.json file for this input_file + input_file_filename=$(basename $input_file) + SUBMISSION_DATA_FILE="submission_data.json" + printf '{ + "methodConfigurationNamespace": "%s", + "methodConfigurationName": "%s_%s_%s", + "useCallCache": %s, + "deleteIntermediateOutputFiles": false, + "useReferenceDisks": true, + "memoryRetryMultiplier": 1.2, + "workflowFailureMode": "NoNewCalls", + "userComment": "%s", + "ignoreEmptyOutputs": false + }' "$WORKSPACE_NAMESPACE" "$PIPELINE_NAME" "$TEST_TYPE" "$BRANCH_NAME" "$USE_CALL_CACHE_BOOL" "$input_file_filename" > "$SUBMISSION_DATA_FILE" + + echo "Created submission data file: $SUBMISSION_DATA_FILE" + cat "$SUBMISSION_DATA_FILE" + + # Upload test input file + python3 scripts/firecloud_api/firecloud_api.py \ + upload_test_inputs \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --test_input_file "$test_input_file" \ + --branch_name "$BRANCH_NAME" \ + --sa-json-b64 "$SA_JSON_B64" \ + --test_type "$TEST_TYPE" \ + --user "$USER" + + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + SUBMISSION_ID=$(python3 scripts/firecloud_api/firecloud_api.py submit_job \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --submission_data_file "$SUBMISSION_DATA_FILE") + + echo "Submission ID: $SUBMISSION_ID" + + if [[ "$SUBMISSION_ID" == *"404"* || -z "$SUBMISSION_ID" ]]; then + echo "Error in submission, retrying in $RETRY_DELAY seconds..." + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + echo "Submission successful. Submission ID: $SUBMISSION_ID" + SUBMISSION_IDS+=("$SUBMISSION_ID") + break + done + done + + echo "All jobs have been submitted. Starting to poll for statuses..." + + # Continue with polling and output retrieval... + + # 2. After all submissions are done, start polling for statuses of all jobs + for SUBMISSION_ID in "${SUBMISSION_IDS[@]}"; do + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + echo "Polling for Submission ID: $SUBMISSION_ID" + RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py poll_job_status \ + --submission_id "$SUBMISSION_ID" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE") + + if [ -z "$RESPONSE" ]; then + echo "Failed to retrieve Workflow IDs for submission: $SUBMISSION_ID" + OVERALL_SUCCESS=false + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + WORKFLOW_STATUSES_FOR_SUBMISSION=$(echo "$RESPONSE" | jq -r 'to_entries | map(.key + " | " + .value) | .[]') + WORKFLOW_STATUSES["$SUBMISSION_ID"]="$WORKFLOW_STATUSES_FOR_SUBMISSION" + + # Check if any workflow failed or errored + FAILED_WORKFLOWS=$(echo "$RESPONSE" | jq -r 'to_entries | .[] | select(.value == "Failed" or .value == "Aborted" or .value == "Aborting") | .key') + if [ ! -z "$FAILED_WORKFLOWS" ]; then + echo "Failed workflows detected:" + echo "$FAILED_WORKFLOWS" + OVERALL_SUCCESS=false + fi + + # retrieve workflow outputs + echo "Retrieving workflow outputs for Submission ID: $SUBMISSION_ID..." + for WORKFLOW_ID in $(echo "$RESPONSE" | jq -r 'keys[]'); do + WORKFLOW_OUTPUT=$(python3 scripts/firecloud_api/firecloud_api.py get_workflow_outputs \ + --user "$USER" \ + --sa-json-b64 "$SA_JSON_B64" \ + --submission_id "$SUBMISSION_ID" \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --workflow_id "$WORKFLOW_ID" \ + --pipeline_name "$PIPELINE_NAME") + ALL_OUTPUTS+="$WORKFLOW_OUTPUT"$'\n' + done + break + done + done + + # Generate the final summary after all processing is complete + FINAL_SUMMARY="## Combined Workflow Statuses\n\n" + + # Add all workflow statuses to the summary + for SUBMISSION_ID in "${!WORKFLOW_STATUSES[@]}"; do + # Generate the Terra URL for the submission + SUBMISSION_URL="https://app.terra.bio/#workspaces/$WORKSPACE_NAMESPACE/WARP%20Tests/job_history/$SUBMISSION_ID" + + # Add the Submission ID as a hyperlink + FINAL_SUMMARY+="[Submission ID: $SUBMISSION_ID]($SUBMISSION_URL)\n" + + # Add the workflows and statuses for this submission + FINAL_SUMMARY+="${WORKFLOW_STATUSES[$SUBMISSION_ID]}\n\n" + done + + # Write the complete summary once at the end + echo -e "$FINAL_SUMMARY" >> $GITHUB_STEP_SUMMARY + + # Exit with error if any workflows failed + if [ "$OVERALL_SUCCESS" = false ]; then + echo "" + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "One or more workflows failed in Terra. Check the workflow status summary for details." + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "" + exit 1 + fi + + + + # Step 12: Cleanup + # Purpose: Ensures cleanup of Terra method configurations regardless of test outcome + - name: Delete Method Configuration + if: always() # Ensures it runs regardless of success or failure + run: | + echo "Deleting method configuration for branch: $BRANCH_NAME" + DELETE_RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py delete_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --method_config_name "${PIPELINE_NAME}_${testType}_${BRANCH_NAME}") + echo "Delete response: $DELETE_RESPONSE" + if [ "$DELETE_RESPONSE" == "True" ]; then + echo "Method configuration deleted successfully." + else + echo "Error: Method configuration deletion failed." + exit 1 + fi + + + # Step 13: Print Summary on Success + # Purpose: Prints the final summary of the pipeline execution in case of success + - name: Print Summary on Success + if: success() + run: | + echo "# :white_check_mark: Pipeline Execution Summary :white_check_mark:" >> $GITHUB_STEP_SUMMARY + + # Step 14: Print Summary on Failure + # Purpose: Prints the final summary of the pipeline execution in case of failure + - name: Print Summary on Failure + if: failure() + run: | + echo "# :x: Pipeline Execution Summary (on Failure) :x: " >> $GITHUB_STEP_SUMMARY \ No newline at end of file diff --git a/.github/workflows/test_ultima_genomics_joint_genotyping.yml b/.github/workflows/test_ultima_genomics_joint_genotyping.yml new file mode 100644 index 0000000000..5dfd1d84bf --- /dev/null +++ b/.github/workflows/test_ultima_genomics_joint_genotyping.yml @@ -0,0 +1,471 @@ +name: Test UltimaGenomicsJointGenotyping + +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + - 'pipelines/broad/dna_seq/germline/joint_genotyping/UltimaGenomics/**' + - 'tasks/broad/JointGenotypingTasks.wdl' + - 'tasks/broad/UltimaGenomicsGermlineFilteringThreshold.wdl' + - 'tasks/broad/JointGenotypingTasks.wdl' + - 'verification/VerifyUltimaGenomicsJointGenotyping.wdl' + - 'verification/test-wdls/TestUltimaGenomicsJointGenotyping.wdl' + - 'verification/VerifyTasks.wdl' + - 'verification/VerifyMetrics.wdl' + - 'verification/VerifyGermlineSingleSample.wdl' + - 'verification/VerifyNA12878.wdl' + - 'tasks/broad/Utilities.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + - '.github/workflows/test_ultima_genomics_joint_genotyping.yml' + + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + + +env: + # pipeline configuration + PIPELINE_NAME: TestUltimaGenomicsJointGenotyping + DOCKSTORE_PIPELINE_NAME: UltimaGenomicsJointGenotyping + PIPELINE_DIR: "pipelines/broad/dna_seq/germline/joint_genotyping/UltimaGenomics" + + # workspace configuration + TESTING_WORKSPACE: WARP Tests + WORKSPACE_NAMESPACE: warp-pipelines + + # service account configuration + SA_JSON_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + USER: pdt-tester@warp-pipeline-dev.iam.gserviceaccount.com + + +jobs: + TestUltimaGenomicsJointGenotyping: + runs-on: ubuntu-latest + # Add "id-token" with the intended permissions. + permissions: + contents: 'read' + id-token: 'write' + actions: write + + steps: + # Step 1: Checkout code + # Purpose: Clones the repository code at the specified reference + - uses: actions/checkout@v3 + with: + ref: ${{ github.ref }} + + # Step 2: Setup Python + # Purpose: Installs Python 3.11 for running pipeline scripts + - name: Set up python + id: setup-python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + # Step 3: Install Dependencies + # Purpose: Installs required Python packages for the pipeline + - name: Install dependencies + run: | + cd scripts/firecloud_api/ + pip install -r requirements.txt + + # Step 4: Set Branch Name + # Purpose: Determines and sets the correct branch name for either PR or direct commits + - name: Set Branch Name + id: set_branch + run: | + if [ -z "${{ github.head_ref }}" ]; then + echo "Branch name is missing, using ${GITHUB_REF##*/}" + echo "BRANCH_NAME=${GITHUB_REF##*/}" >> $GITHUB_ENV + else + echo "Branch name from PR: ${{ github.head_ref }}" + echo "BRANCH_NAME=${{ github.head_ref }}" >> $GITHUB_ENV + fi + + # Step 5: Set Test Type + # Purpose: Determines and sets the correct test type based on the branch name + - name: Set Test Type + id: set_test_type + run: | + if [ "${{ github.event_name }}" == "pull_request" ]; then + # For PRs, set based on target branch + if [ "${{ github.base_ref }}" == "master" ]; then + # If PR is targeting master branch, run Scientific tests + echo "testType=Scientific" >> $GITHUB_ENV + echo "testType=Scientific" + else + # If PR targets any other branch (develop, staging), run Plumbing tests + echo "testType=Plumbing" >> $GITHUB_ENV + echo "testType=Plumbing" + fi + else + # For manual workflow runs (workflow_dispatch) + echo "testType=${{ github.event.inputs.testType }}" >> $GITHUB_ENV + echo "testType=${{ github.event.inputs.testType }}" + fi + + # Step 6: Create Method Configuration + # Purpose: Sets up the testing configuration in Terra workspace + - name: Create new method configuration + run: | + echo "Creating new method configuration for branch: $BRANCH_NAME" + + METHOD_CONFIG_NAME=$(python3 scripts/firecloud_api/firecloud_api.py \ + create_new_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER") + + echo "METHOD_CONFIG_NAME=$METHOD_CONFIG_NAME" >> $GITHUB_ENV + + # Step 7: Cancel Previous Runs + # Purpose: Cancels previous GHA workflows from the same branch (regardless of plumbing or scientific test type) + # to avoid running multiple tests at the same time + - name: Cancel Previous GHA Runs + uses: styfle/cancel-workflow-action@0.11.0 + with: + access_token: ${{ github.token }} + all_but_latest: true + ignore_sha: true + + # Step 8: Cancel Previous Terra Submissions + # Purpose: Abort previous Terra submissions from the same branch to avoid running multiple tests at the same time + # Will not abort a Terra submission if it is a scientific test + - name: Cancel Previous Terra Submissions + if: ${{ !contains(env.METHOD_CONFIG_NAME, '_Scientific_') }} + run: | + python3 scripts/firecloud_api/firecloud_api.py \ + --workspace-namespace "${{ env.WORKSPACE_NAMESPACE }}" \ + --workspace-name "${{ env.TESTING_WORKSPACE }}" \ + --pipeline_name "${{ env.PIPELINE_NAME }}" \ + --branch_name "${{ env.BRANCH_NAME }}" \ + --sa-json-b64 "${{ secrets.PDT_TESTER_SA_B64 }}" \ + --user "${{ env.USER }}" \ + --test_type "$testType" \ + cancel_old_submissions + + # Step 9: Handle Git Commit Hash + # Purpose: Gets the correct Github commit hash for version tracking + - name: Determine Github Commit Hash + id: determine_github_commit_hash + run: | + if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then + echo "Using github.sha for manually triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.sha }}" >> $GITHUB_ENV + elif [ "${{ github.event_name }}" == "pull_request" ]; then + echo "Using github.event.pull_request.head.sha for PR-triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV + else + echo "Unsupported event type: ${{ github.event_name }}" + exit 1 + fi + + # Step 10: Compare Hashes + # Purpose: Compares the Dockstore and Github commit hashes to ensure they match + - name: Compare Dockstore and Github Commit Hashes with Retry + id: compare_hashes + run: | + + # Wait 5.5 minutes for Dockstore to update + sleep 330 + + MAX_WAIT_TIME=$((15 * 60)) # 15 minutes in seconds + WAIT_INTERVAL=60 # 1 minute in seconds + TOTAL_WAITED=0 + + echo "Starting hash comparison with retry mechanism..." + + while [ $TOTAL_WAITED -lt $MAX_WAIT_TIME ]; do + echo "Fetching Dockstore Commit Hash..." + DOCKSTORE_COMMIT_HASH=$(python scripts/dockstore_api/fetch_dockstore_commit.py \ + $DOCKSTORE_TOKEN \ + $DOCKSTORE_PIPELINE_NAME \ + $BRANCH_NAME) + echo "Fetched Dockstore Commit Hash: $DOCKSTORE_COMMIT_HASH" + + echo "GitHub Commit Hash: $GITHUB_COMMIT_HASH" + + if [ "$DOCKSTORE_COMMIT_HASH" == "$GITHUB_COMMIT_HASH" ]; then + echo "Success: The Dockstore Commit Hash matches the GitHub Commit Hash." + exit 0 + else + echo "Mismatch found: $DOCKSTORE_COMMIT_HASH != $GITHUB_COMMIT_HASH" + echo "Retrying in $WAIT_INTERVAL seconds..." + sleep $WAIT_INTERVAL + TOTAL_WAITED=$((TOTAL_WAITED + WAIT_INTERVAL)) + fi + done + + echo "Error: The Dockstore Commit Hash does not match the GitHub Commit Hash after 15 minutes of retries!" + exit 1 + + env: + GITHUB_COMMIT_HASH: ${{ env.GITHUB_COMMIT_HASH }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} + + # Step 11: Run Tests + # Purpose: Main testing step - runs the pipeline and collects results + - name: Update test inputs, Upload to Terra, Submit, Monitor and Retrieve Outputs + run: | + UPDATE_TRUTH="${{ github.event.inputs.updateTruth || 'false' }}" + USE_CALL_CACHE="${{ github.event.inputs.useCallCache || 'true' }}" + TRUTH_BRANCH="${{ github.event.inputs.truthBranch || 'master' }}" + CURRENT_TIME=$(date +"%Y-%m-%d-%H-%M-%S") + MAX_RETRIES=2 + RETRY_DELAY=300 # 300 seconds = 5 minutes + # Initialize variables to aggregate statuses and outputs + ALL_WORKFLOW_STATUSES="Workflow ID | Status"$'\n'"--- | ---" + ALL_OUTPUTS="" + # Initialize arrays to track submission and workflow statuses + declare -a SUBMISSION_IDS + declare -A WORKFLOW_STATUSES + OVERALL_SUCCESS=true + + # Convert UPDATE_TRUTH and USE_CALL_CACHE to a boolean-friendly format ("true" -> true, "false" -> false) + if [ "$UPDATE_TRUTH" = "true" ]; then + UPDATE_TRUTH_BOOL=true + else + UPDATE_TRUTH_BOOL=false + fi + + if [ "$USE_CALL_CACHE" == "true" ]; then + USE_CALL_CACHE_BOOL=true + else + USE_CALL_CACHE_BOOL=false + fi + + TEST_TYPE="${{ env.testType }}" + INPUTS_DIR="$PIPELINE_DIR/test_inputs/$TEST_TYPE" + echo "Running tests with test type: $TEST_TYPE" + + TRUTH_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/truth/$(echo "$TEST_TYPE" | tr '[:upper:]' '[:lower:]')/$TRUTH_BRANCH" + echo "Truth path: $TRUTH_PATH" + RESULTS_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/results/$CURRENT_TIME" + + + + # 1. Submit all jobs first and store their submission IDs + for input_file in "$INPUTS_DIR"/*.json; do + test_input_file=$(python3 scripts/firecloud_api/UpdateTestInputs.py --truth_path "$TRUTH_PATH" \ + --results_path "$RESULTS_PATH" \ + --inputs_json "$input_file" \ + --update_truth "$UPDATE_TRUTH_BOOL" \ + --branch_name "$BRANCH_NAME" ) + echo "Uploading the test input file: $test_input_file" + + # Create the submission_data.json file for this input_file + input_file_filename=$(basename $input_file) + SUBMISSION_DATA_FILE="submission_data.json" + printf '{ + "methodConfigurationNamespace": "%s", + "methodConfigurationName": "%s_%s_%s", + "useCallCache": %s, + "deleteIntermediateOutputFiles": false, + "useReferenceDisks": true, + "memoryRetryMultiplier": 1.2, + "workflowFailureMode": "NoNewCalls", + "userComment": "%s", + "ignoreEmptyOutputs": false + }' "$WORKSPACE_NAMESPACE" "$PIPELINE_NAME" "$TEST_TYPE" "$BRANCH_NAME" "$USE_CALL_CACHE_BOOL" "$input_file_filename" > "$SUBMISSION_DATA_FILE" + + echo "Created submission data file: $SUBMISSION_DATA_FILE" + cat "$SUBMISSION_DATA_FILE" + + # Upload test input file + python3 scripts/firecloud_api/firecloud_api.py \ + upload_test_inputs \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --test_input_file "$test_input_file" \ + --branch_name "$BRANCH_NAME" \ + --sa-json-b64 "$SA_JSON_B64" \ + --test_type "$TEST_TYPE" \ + --user "$USER" + + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + SUBMISSION_ID=$(python3 scripts/firecloud_api/firecloud_api.py submit_job \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --submission_data_file "$SUBMISSION_DATA_FILE") + + echo "Submission ID: $SUBMISSION_ID" + + if [[ "$SUBMISSION_ID" == *"404"* || -z "$SUBMISSION_ID" ]]; then + echo "Error in submission, retrying in $RETRY_DELAY seconds..." + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + echo "Submission successful. Submission ID: $SUBMISSION_ID" + SUBMISSION_IDS+=("$SUBMISSION_ID") + break + done + done + + echo "All jobs have been submitted. Starting to poll for statuses..." + + # Continue with polling and output retrieval... + + # 2. After all submissions are done, start polling for statuses of all jobs + for SUBMISSION_ID in "${SUBMISSION_IDS[@]}"; do + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + echo "Polling for Submission ID: $SUBMISSION_ID" + RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py poll_job_status \ + --submission_id "$SUBMISSION_ID" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE") + + if [ -z "$RESPONSE" ]; then + echo "Failed to retrieve Workflow IDs for submission: $SUBMISSION_ID" + OVERALL_SUCCESS=false + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + WORKFLOW_STATUSES_FOR_SUBMISSION=$(echo "$RESPONSE" | jq -r 'to_entries | map(.key + " | " + .value) | .[]') + WORKFLOW_STATUSES["$SUBMISSION_ID"]="$WORKFLOW_STATUSES_FOR_SUBMISSION" + + # Check if any workflow failed or errored + FAILED_WORKFLOWS=$(echo "$RESPONSE" | jq -r 'to_entries | .[] | select(.value == "Failed" or .value == "Aborted" or .value == "Aborting") | .key') + if [ ! -z "$FAILED_WORKFLOWS" ]; then + echo "Failed workflows detected:" + echo "$FAILED_WORKFLOWS" + OVERALL_SUCCESS=false + fi + + # retrieve workflow outputs + echo "Retrieving workflow outputs for Submission ID: $SUBMISSION_ID..." + for WORKFLOW_ID in $(echo "$RESPONSE" | jq -r 'keys[]'); do + WORKFLOW_OUTPUT=$(python3 scripts/firecloud_api/firecloud_api.py get_workflow_outputs \ + --user "$USER" \ + --sa-json-b64 "$SA_JSON_B64" \ + --submission_id "$SUBMISSION_ID" \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --workflow_id "$WORKFLOW_ID" \ + --pipeline_name "$PIPELINE_NAME") + ALL_OUTPUTS+="$WORKFLOW_OUTPUT"$'\n' + done + break + done + done + + # Generate the final summary after all processing is complete + FINAL_SUMMARY="## Combined Workflow Statuses\n\n" + + # Add all workflow statuses to the summary + for SUBMISSION_ID in "${!WORKFLOW_STATUSES[@]}"; do + # Generate the Terra URL for the submission + SUBMISSION_URL="https://app.terra.bio/#workspaces/$WORKSPACE_NAMESPACE/WARP%20Tests/job_history/$SUBMISSION_ID" + + # Add the Submission ID as a hyperlink + FINAL_SUMMARY+="[Submission ID: $SUBMISSION_ID]($SUBMISSION_URL)\n" + + # Add the workflows and statuses for this submission + FINAL_SUMMARY+="${WORKFLOW_STATUSES[$SUBMISSION_ID]}\n\n" + done + + # Write the complete summary once at the end + echo -e "$FINAL_SUMMARY" >> $GITHUB_STEP_SUMMARY + + # Exit with error if any workflows failed + if [ "$OVERALL_SUCCESS" = false ]; then + echo "" + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "One or more workflows failed in Terra. Check the workflow status summary for details." + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "" + exit 1 + fi + + + + # Step 12: Cleanup + # Purpose: Ensures cleanup of Terra method configurations regardless of test outcome + - name: Delete Method Configuration + if: always() # Ensures it runs regardless of success or failure + run: | + echo "Deleting method configuration for branch: $BRANCH_NAME" + DELETE_RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py delete_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --method_config_name "${PIPELINE_NAME}_${testType}_${BRANCH_NAME}") + echo "Delete response: $DELETE_RESPONSE" + if [ "$DELETE_RESPONSE" == "True" ]; then + echo "Method configuration deleted successfully." + else + echo "Error: Method configuration deletion failed." + exit 1 + fi + + + # Step 13: Print Summary on Success + # Purpose: Prints the final summary of the pipeline execution in case of success + - name: Print Summary on Success + if: success() + run: | + echo "# :white_check_mark: Pipeline Execution Summary :white_check_mark:" >> $GITHUB_STEP_SUMMARY + + # Step 14: Print Summary on Failure + # Purpose: Prints the final summary of the pipeline execution in case of failure + - name: Print Summary on Failure + if: failure() + run: | + echo "# :x: Pipeline Execution Summary (on Failure) :x: " >> $GITHUB_STEP_SUMMARY \ No newline at end of file diff --git a/.github/workflows/test_ultima_genomics_whole_genome_cram_only.yml b/.github/workflows/test_ultima_genomics_whole_genome_cram_only.yml new file mode 100644 index 0000000000..f862990785 --- /dev/null +++ b/.github/workflows/test_ultima_genomics_whole_genome_cram_only.yml @@ -0,0 +1,478 @@ +name: Test UltimaGenomicsWholeGenomeCramOnly + +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + - 'pipelines/broad/dna_seq/somatic/single_sample/ugwgs/**' + - 'pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.wdl' + - 'tasks/broad/UltimaGenomicsWholeGenomeGermlineTasks.wdl' + - 'tasks/broad/GermlineVariantDiscovery.wdl' + - 'structs/dna_seq/DNASeqStructs.wdl' + - 'tasks/broad/Alignment.wdl' + - 'tasks/broad/Qc.wdl' + - 'tasks/broad/UltimaGenomicsWholeGenomeGermlineQC.wdl' + - 'structs/dna_seq/UltimaGenomicsWholeGenomeGermlineStructs.wdl' + - 'tasks/broad/InternalTasks.wdl' + - 'tasks/broad/UltimaGenomicsWholeGenomeGermlineAlignmentMarkDuplicates.wdl' + - 'pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.wdl' + - 'verification/VerifyUltimaGenomicsWholeGenomeCramOnly.wdl' + - 'verification/VerifyMetrics.wdl' + - 'verification/VerifyTasks.wdl' + - 'verification/VerifyNA12878.wdl' + - 'verification/test-wdls/TestUltimaGenomicsWholeGenomeCramOnly.wdl' + - 'tasks/broad/Utilities.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + - '.github/workflows/test_ultima_genomics_whole_genome_cram_only.yml' + + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + + +env: + # pipeline configuration + PIPELINE_NAME: TestUltimaGenomicsWholeGenomeCramOnly + DOCKSTORE_PIPELINE_NAME: UltimaGenomicsWholeGenomeCramOnly + PIPELINE_DIR: "pipelines/broad/dna_seq/somatic/single_sample/ugwgs/" + + # workspace configuration + TESTING_WORKSPACE: WARP Tests + WORKSPACE_NAMESPACE: warp-pipelines + + # service account configuration + SA_JSON_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + USER: pdt-tester@warp-pipeline-dev.iam.gserviceaccount.com + + +jobs: + TestUltimaGenomicsWholeGenomeCramOnly: + runs-on: ubuntu-latest + # Add "id-token" with the intended permissions. + permissions: + contents: 'read' + id-token: 'write' + actions: write + + steps: + # Step 1: Checkout code + # Purpose: Clones the repository code at the specified reference + - uses: actions/checkout@v3 + with: + ref: ${{ github.ref }} + + # Step 2: Setup Python + # Purpose: Installs Python 3.11 for running pipeline scripts + - name: Set up python + id: setup-python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + # Step 3: Install Dependencies + # Purpose: Installs required Python packages for the pipeline + - name: Install dependencies + run: | + cd scripts/firecloud_api/ + pip install -r requirements.txt + + # Step 4: Set Branch Name + # Purpose: Determines and sets the correct branch name for either PR or direct commits + - name: Set Branch Name + id: set_branch + run: | + if [ -z "${{ github.head_ref }}" ]; then + echo "Branch name is missing, using ${GITHUB_REF##*/}" + echo "BRANCH_NAME=${GITHUB_REF##*/}" >> $GITHUB_ENV + else + echo "Branch name from PR: ${{ github.head_ref }}" + echo "BRANCH_NAME=${{ github.head_ref }}" >> $GITHUB_ENV + fi + + # Step 5: Set Test Type + # Purpose: Determines and sets the correct test type based on the branch name + - name: Set Test Type + id: set_test_type + run: | + if [ "${{ github.event_name }}" == "pull_request" ]; then + # For PRs, set based on target branch + if [ "${{ github.base_ref }}" == "master" ]; then + # If PR is targeting master branch, run Scientific tests + echo "testType=Scientific" >> $GITHUB_ENV + echo "testType=Scientific" + else + # If PR targets any other branch (develop, staging), run Plumbing tests + echo "testType=Plumbing" >> $GITHUB_ENV + echo "testType=Plumbing" + fi + else + # For manual workflow runs (workflow_dispatch) + echo "testType=${{ github.event.inputs.testType }}" >> $GITHUB_ENV + echo "testType=${{ github.event.inputs.testType }}" + fi + + # Step 6: Create Method Configuration + # Purpose: Sets up the testing configuration in Terra workspace + - name: Create new method configuration + run: | + echo "Creating new method configuration for branch: $BRANCH_NAME" + + METHOD_CONFIG_NAME=$(python3 scripts/firecloud_api/firecloud_api.py \ + create_new_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER") + + echo "METHOD_CONFIG_NAME=$METHOD_CONFIG_NAME" >> $GITHUB_ENV + + # Step 7: Cancel Previous Runs + # Purpose: Cancels previous GHA workflows from the same branch (regardless of plumbing or scientific test type) + # to avoid running multiple tests at the same time + - name: Cancel Previous GHA Runs + uses: styfle/cancel-workflow-action@0.11.0 + with: + access_token: ${{ github.token }} + all_but_latest: true + ignore_sha: true + + # Step 8: Cancel Previous Terra Submissions + # Purpose: Abort previous Terra submissions from the same branch to avoid running multiple tests at the same time + # Will not abort a Terra submission if it is a scientific test + - name: Cancel Previous Terra Submissions + if: ${{ !contains(env.METHOD_CONFIG_NAME, '_Scientific_') }} + run: | + python3 scripts/firecloud_api/firecloud_api.py \ + --workspace-namespace "${{ env.WORKSPACE_NAMESPACE }}" \ + --workspace-name "${{ env.TESTING_WORKSPACE }}" \ + --pipeline_name "${{ env.PIPELINE_NAME }}" \ + --branch_name "${{ env.BRANCH_NAME }}" \ + --sa-json-b64 "${{ secrets.PDT_TESTER_SA_B64 }}" \ + --user "${{ env.USER }}" \ + --test_type "$testType" \ + cancel_old_submissions + + # Step 9: Handle Git Commit Hash + # Purpose: Gets the correct Github commit hash for version tracking + - name: Determine Github Commit Hash + id: determine_github_commit_hash + run: | + if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then + echo "Using github.sha for manually triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.sha }}" >> $GITHUB_ENV + elif [ "${{ github.event_name }}" == "pull_request" ]; then + echo "Using github.event.pull_request.head.sha for PR-triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV + else + echo "Unsupported event type: ${{ github.event_name }}" + exit 1 + fi + + # Step 10: Compare Hashes + # Purpose: Compares the Dockstore and Github commit hashes to ensure they match + - name: Compare Dockstore and Github Commit Hashes with Retry + id: compare_hashes + run: | + + # Wait 5.5 minutes for Dockstore to update + sleep 330 + + MAX_WAIT_TIME=$((15 * 60)) # 15 minutes in seconds + WAIT_INTERVAL=60 # 1 minute in seconds + TOTAL_WAITED=0 + + echo "Starting hash comparison with retry mechanism..." + + while [ $TOTAL_WAITED -lt $MAX_WAIT_TIME ]; do + echo "Fetching Dockstore Commit Hash..." + DOCKSTORE_COMMIT_HASH=$(python scripts/dockstore_api/fetch_dockstore_commit.py \ + $DOCKSTORE_TOKEN \ + $DOCKSTORE_PIPELINE_NAME \ + $BRANCH_NAME) + echo "Fetched Dockstore Commit Hash: $DOCKSTORE_COMMIT_HASH" + + echo "GitHub Commit Hash: $GITHUB_COMMIT_HASH" + + if [ "$DOCKSTORE_COMMIT_HASH" == "$GITHUB_COMMIT_HASH" ]; then + echo "Success: The Dockstore Commit Hash matches the GitHub Commit Hash." + exit 0 + else + echo "Mismatch found: $DOCKSTORE_COMMIT_HASH != $GITHUB_COMMIT_HASH" + echo "Retrying in $WAIT_INTERVAL seconds..." + sleep $WAIT_INTERVAL + TOTAL_WAITED=$((TOTAL_WAITED + WAIT_INTERVAL)) + fi + done + + echo "Error: The Dockstore Commit Hash does not match the GitHub Commit Hash after 15 minutes of retries!" + exit 1 + + env: + GITHUB_COMMIT_HASH: ${{ env.GITHUB_COMMIT_HASH }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} + + # Step 11: Run Tests + # Purpose: Main testing step - runs the pipeline and collects results + - name: Update test inputs, Upload to Terra, Submit, Monitor and Retrieve Outputs + run: | + UPDATE_TRUTH="${{ github.event.inputs.updateTruth || 'false' }}" + USE_CALL_CACHE="${{ github.event.inputs.useCallCache || 'true' }}" + TRUTH_BRANCH="${{ github.event.inputs.truthBranch || 'master' }}" + CURRENT_TIME=$(date +"%Y-%m-%d-%H-%M-%S") + MAX_RETRIES=2 + RETRY_DELAY=300 # 300 seconds = 5 minutes + # Initialize variables to aggregate statuses and outputs + ALL_WORKFLOW_STATUSES="Workflow ID | Status"$'\n'"--- | ---" + ALL_OUTPUTS="" + # Initialize arrays to track submission and workflow statuses + declare -a SUBMISSION_IDS + declare -A WORKFLOW_STATUSES + OVERALL_SUCCESS=true + + # Convert UPDATE_TRUTH and USE_CALL_CACHE to a boolean-friendly format ("true" -> true, "false" -> false) + if [ "$UPDATE_TRUTH" = "true" ]; then + UPDATE_TRUTH_BOOL=true + else + UPDATE_TRUTH_BOOL=false + fi + + if [ "$USE_CALL_CACHE" == "true" ]; then + USE_CALL_CACHE_BOOL=true + else + USE_CALL_CACHE_BOOL=false + fi + + TEST_TYPE="${{ env.testType }}" + INPUTS_DIR="$PIPELINE_DIR/test_inputs/$TEST_TYPE" + echo "Running tests with test type: $TEST_TYPE" + + TRUTH_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/truth/$(echo "$TEST_TYPE" | tr '[:upper:]' '[:lower:]')/$TRUTH_BRANCH" + echo "Truth path: $TRUTH_PATH" + RESULTS_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/results/$CURRENT_TIME" + + + + # 1. Submit all jobs first and store their submission IDs + for input_file in "$INPUTS_DIR"/*.json; do + test_input_file=$(python3 scripts/firecloud_api/UpdateTestInputs.py --truth_path "$TRUTH_PATH" \ + --results_path "$RESULTS_PATH" \ + --inputs_json "$input_file" \ + --update_truth "$UPDATE_TRUTH_BOOL" \ + --branch_name "$BRANCH_NAME" ) + echo "Uploading the test input file: $test_input_file" + + # Create the submission_data.json file for this input_file + input_file_filename=$(basename $input_file) + SUBMISSION_DATA_FILE="submission_data.json" + printf '{ + "methodConfigurationNamespace": "%s", + "methodConfigurationName": "%s_%s_%s", + "useCallCache": %s, + "deleteIntermediateOutputFiles": false, + "useReferenceDisks": true, + "memoryRetryMultiplier": 1.2, + "workflowFailureMode": "NoNewCalls", + "userComment": "%s", + "ignoreEmptyOutputs": false + }' "$WORKSPACE_NAMESPACE" "$PIPELINE_NAME" "$TEST_TYPE" "$BRANCH_NAME" "$USE_CALL_CACHE_BOOL" "$input_file_filename" > "$SUBMISSION_DATA_FILE" + + echo "Created submission data file: $SUBMISSION_DATA_FILE" + cat "$SUBMISSION_DATA_FILE" + + # Upload test input file + python3 scripts/firecloud_api/firecloud_api.py \ + upload_test_inputs \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --test_input_file "$test_input_file" \ + --branch_name "$BRANCH_NAME" \ + --sa-json-b64 "$SA_JSON_B64" \ + --test_type "$TEST_TYPE" \ + --user "$USER" + + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + SUBMISSION_ID=$(python3 scripts/firecloud_api/firecloud_api.py submit_job \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --submission_data_file "$SUBMISSION_DATA_FILE") + + echo "Submission ID: $SUBMISSION_ID" + + if [[ "$SUBMISSION_ID" == *"404"* || -z "$SUBMISSION_ID" ]]; then + echo "Error in submission, retrying in $RETRY_DELAY seconds..." + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + echo "Submission successful. Submission ID: $SUBMISSION_ID" + SUBMISSION_IDS+=("$SUBMISSION_ID") + break + done + done + + echo "All jobs have been submitted. Starting to poll for statuses..." + + # Continue with polling and output retrieval... + + # 2. After all submissions are done, start polling for statuses of all jobs + for SUBMISSION_ID in "${SUBMISSION_IDS[@]}"; do + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + echo "Polling for Submission ID: $SUBMISSION_ID" + RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py poll_job_status \ + --submission_id "$SUBMISSION_ID" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE") + + if [ -z "$RESPONSE" ]; then + echo "Failed to retrieve Workflow IDs for submission: $SUBMISSION_ID" + OVERALL_SUCCESS=false + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + WORKFLOW_STATUSES_FOR_SUBMISSION=$(echo "$RESPONSE" | jq -r 'to_entries | map(.key + " | " + .value) | .[]') + WORKFLOW_STATUSES["$SUBMISSION_ID"]="$WORKFLOW_STATUSES_FOR_SUBMISSION" + + # Check if any workflow failed or errored + FAILED_WORKFLOWS=$(echo "$RESPONSE" | jq -r 'to_entries | .[] | select(.value == "Failed" or .value == "Aborted" or .value == "Aborting") | .key') + if [ ! -z "$FAILED_WORKFLOWS" ]; then + echo "Failed workflows detected:" + echo "$FAILED_WORKFLOWS" + OVERALL_SUCCESS=false + fi + + # retrieve workflow outputs + echo "Retrieving workflow outputs for Submission ID: $SUBMISSION_ID..." + for WORKFLOW_ID in $(echo "$RESPONSE" | jq -r 'keys[]'); do + WORKFLOW_OUTPUT=$(python3 scripts/firecloud_api/firecloud_api.py get_workflow_outputs \ + --user "$USER" \ + --sa-json-b64 "$SA_JSON_B64" \ + --submission_id "$SUBMISSION_ID" \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --workflow_id "$WORKFLOW_ID" \ + --pipeline_name "$PIPELINE_NAME") + ALL_OUTPUTS+="$WORKFLOW_OUTPUT"$'\n' + done + break + done + done + + # Generate the final summary after all processing is complete + FINAL_SUMMARY="## Combined Workflow Statuses\n\n" + + # Add all workflow statuses to the summary + for SUBMISSION_ID in "${!WORKFLOW_STATUSES[@]}"; do + # Generate the Terra URL for the submission + SUBMISSION_URL="https://app.terra.bio/#workspaces/$WORKSPACE_NAMESPACE/WARP%20Tests/job_history/$SUBMISSION_ID" + + # Add the Submission ID as a hyperlink + FINAL_SUMMARY+="[Submission ID: $SUBMISSION_ID]($SUBMISSION_URL)\n" + + # Add the workflows and statuses for this submission + FINAL_SUMMARY+="${WORKFLOW_STATUSES[$SUBMISSION_ID]}\n\n" + done + + # Write the complete summary once at the end + echo -e "$FINAL_SUMMARY" >> $GITHUB_STEP_SUMMARY + + # Exit with error if any workflows failed + if [ "$OVERALL_SUCCESS" = false ]; then + echo "" + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "One or more workflows failed in Terra. Check the workflow status summary for details." + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "" + exit 1 + fi + + + + # Step 12: Cleanup + # Purpose: Ensures cleanup of Terra method configurations regardless of test outcome + - name: Delete Method Configuration + if: always() # Ensures it runs regardless of success or failure + run: | + echo "Deleting method configuration for branch: $BRANCH_NAME" + DELETE_RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py delete_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --method_config_name "${PIPELINE_NAME}_${testType}_${BRANCH_NAME}") + echo "Delete response: $DELETE_RESPONSE" + if [ "$DELETE_RESPONSE" == "True" ]; then + echo "Method configuration deleted successfully." + else + echo "Error: Method configuration deletion failed." + exit 1 + fi + + + # Step 13: Print Summary on Success + # Purpose: Prints the final summary of the pipeline execution in case of success + - name: Print Summary on Success + if: success() + run: | + echo "# :white_check_mark: Pipeline Execution Summary :white_check_mark:" >> $GITHUB_STEP_SUMMARY + + # Step 14: Print Summary on Failure + # Purpose: Prints the final summary of the pipeline execution in case of failure + - name: Print Summary on Failure + if: failure() + run: | + echo "# :x: Pipeline Execution Summary (on Failure) :x: " >> $GITHUB_STEP_SUMMARY \ No newline at end of file diff --git a/.github/workflows/test_ultima_genomics_whole_genome_germline.yml b/.github/workflows/test_ultima_genomics_whole_genome_germline.yml new file mode 100644 index 0000000000..b046345448 --- /dev/null +++ b/.github/workflows/test_ultima_genomics_whole_genome_germline.yml @@ -0,0 +1,478 @@ +name: Test UltimaGenomicsWholeGenomeGermline + +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + - 'pipelines/broad/dna_seq/germline/single_sample/ugwgs/**' + - 'pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.wdl' + - 'tasks/broad/UltimaGenomicsWholeGenomeGermlineTasks.wdl' + - 'tasks/broad/GermlineVariantDiscovery.wdl' + - 'structs/dna_seq/DNASeqStructs.wdl' + - 'tasks/broad/Alignment.wdl' + - 'tasks/broad/Qc.wdl' + - 'tasks/broad/UltimaGenomicsWholeGenomeGermlineQC.wdl' + - 'structs/dna_seq/UltimaGenomicsWholeGenomeGermlineStructs.wdl' + - 'tasks/broad/InternalTasks.wdl' + - 'tasks/broad/UltimaGenomicsWholeGenomeGermlineAlignmentMarkDuplicates.wdl' + - 'pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.wdl' + - 'verification/VerifyUltimaGenomicsWholeGenomeGermline.wdl' + - 'verification/VerifyMetrics.wdl' + - 'verification/VerifyTasks.wdl' + - 'verification/VerifyNA12878.wdl' + - 'verification/test-wdls/TestUltimaGenomicsWholeGenomeGermline.wdl' + - 'tasks/broad/Utilities.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + - '.github/workflows/test_ultima_genomics_whole_genome_germline.yml' + + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + + +env: + # pipeline configuration + PIPELINE_NAME: TestUltimaGenomicsWholeGenomeGermline + DOCKSTORE_PIPELINE_NAME: UltimaGenomicsWholeGenomeGermline + PIPELINE_DIR: "pipelines/broad/dna_seq/germline/single_sample/ugwgs/" + + # workspace configuration + TESTING_WORKSPACE: WARP Tests + WORKSPACE_NAMESPACE: warp-pipelines + + # service account configuration + SA_JSON_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + USER: pdt-tester@warp-pipeline-dev.iam.gserviceaccount.com + + +jobs: + TestUltimaGenomicsWholeGenomeGermline: + runs-on: ubuntu-latest + # Add "id-token" with the intended permissions. + permissions: + contents: 'read' + id-token: 'write' + actions: write + + steps: + # Step 1: Checkout code + # Purpose: Clones the repository code at the specified reference + - uses: actions/checkout@v3 + with: + ref: ${{ github.ref }} + + # Step 2: Setup Python + # Purpose: Installs Python 3.11 for running pipeline scripts + - name: Set up python + id: setup-python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + # Step 3: Install Dependencies + # Purpose: Installs required Python packages for the pipeline + - name: Install dependencies + run: | + cd scripts/firecloud_api/ + pip install -r requirements.txt + + # Step 4: Set Branch Name + # Purpose: Determines and sets the correct branch name for either PR or direct commits + - name: Set Branch Name + id: set_branch + run: | + if [ -z "${{ github.head_ref }}" ]; then + echo "Branch name is missing, using ${GITHUB_REF##*/}" + echo "BRANCH_NAME=${GITHUB_REF##*/}" >> $GITHUB_ENV + else + echo "Branch name from PR: ${{ github.head_ref }}" + echo "BRANCH_NAME=${{ github.head_ref }}" >> $GITHUB_ENV + fi + + # Step 5: Set Test Type + # Purpose: Determines and sets the correct test type based on the branch name + - name: Set Test Type + id: set_test_type + run: | + if [ "${{ github.event_name }}" == "pull_request" ]; then + # For PRs, set based on target branch + if [ "${{ github.base_ref }}" == "master" ]; then + # If PR is targeting master branch, run Scientific tests + echo "testType=Scientific" >> $GITHUB_ENV + echo "testType=Scientific" + else + # If PR targets any other branch (develop, staging), run Plumbing tests + echo "testType=Plumbing" >> $GITHUB_ENV + echo "testType=Plumbing" + fi + else + # For manual workflow runs (workflow_dispatch) + echo "testType=${{ github.event.inputs.testType }}" >> $GITHUB_ENV + echo "testType=${{ github.event.inputs.testType }}" + fi + + # Step 6: Create Method Configuration + # Purpose: Sets up the testing configuration in Terra workspace + - name: Create new method configuration + run: | + echo "Creating new method configuration for branch: $BRANCH_NAME" + + METHOD_CONFIG_NAME=$(python3 scripts/firecloud_api/firecloud_api.py \ + create_new_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER") + + echo "METHOD_CONFIG_NAME=$METHOD_CONFIG_NAME" >> $GITHUB_ENV + + # Step 7: Cancel Previous Runs + # Purpose: Cancels previous GHA workflows from the same branch (regardless of plumbing or scientific test type) + # to avoid running multiple tests at the same time + - name: Cancel Previous GHA Runs + uses: styfle/cancel-workflow-action@0.11.0 + with: + access_token: ${{ github.token }} + all_but_latest: true + ignore_sha: true + + # Step 8: Cancel Previous Terra Submissions + # Purpose: Abort previous Terra submissions from the same branch to avoid running multiple tests at the same time + # Will not abort a Terra submission if it is a scientific test + - name: Cancel Previous Terra Submissions + if: ${{ !contains(env.METHOD_CONFIG_NAME, '_Scientific_') }} + run: | + python3 scripts/firecloud_api/firecloud_api.py \ + --workspace-namespace "${{ env.WORKSPACE_NAMESPACE }}" \ + --workspace-name "${{ env.TESTING_WORKSPACE }}" \ + --pipeline_name "${{ env.PIPELINE_NAME }}" \ + --branch_name "${{ env.BRANCH_NAME }}" \ + --sa-json-b64 "${{ secrets.PDT_TESTER_SA_B64 }}" \ + --user "${{ env.USER }}" \ + --test_type "$testType" \ + cancel_old_submissions + + # Step 9: Handle Git Commit Hash + # Purpose: Gets the correct Github commit hash for version tracking + - name: Determine Github Commit Hash + id: determine_github_commit_hash + run: | + if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then + echo "Using github.sha for manually triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.sha }}" >> $GITHUB_ENV + elif [ "${{ github.event_name }}" == "pull_request" ]; then + echo "Using github.event.pull_request.head.sha for PR-triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV + else + echo "Unsupported event type: ${{ github.event_name }}" + exit 1 + fi + + # Step 10: Compare Hashes + # Purpose: Compares the Dockstore and Github commit hashes to ensure they match + - name: Compare Dockstore and Github Commit Hashes with Retry + id: compare_hashes + run: | + + # Wait 5.5 minutes for Dockstore to update + sleep 330 + + MAX_WAIT_TIME=$((15 * 60)) # 15 minutes in seconds + WAIT_INTERVAL=60 # 1 minute in seconds + TOTAL_WAITED=0 + + echo "Starting hash comparison with retry mechanism..." + + while [ $TOTAL_WAITED -lt $MAX_WAIT_TIME ]; do + echo "Fetching Dockstore Commit Hash..." + DOCKSTORE_COMMIT_HASH=$(python scripts/dockstore_api/fetch_dockstore_commit.py \ + $DOCKSTORE_TOKEN \ + $DOCKSTORE_PIPELINE_NAME \ + $BRANCH_NAME) + echo "Fetched Dockstore Commit Hash: $DOCKSTORE_COMMIT_HASH" + + echo "GitHub Commit Hash: $GITHUB_COMMIT_HASH" + + if [ "$DOCKSTORE_COMMIT_HASH" == "$GITHUB_COMMIT_HASH" ]; then + echo "Success: The Dockstore Commit Hash matches the GitHub Commit Hash." + exit 0 + else + echo "Mismatch found: $DOCKSTORE_COMMIT_HASH != $GITHUB_COMMIT_HASH" + echo "Retrying in $WAIT_INTERVAL seconds..." + sleep $WAIT_INTERVAL + TOTAL_WAITED=$((TOTAL_WAITED + WAIT_INTERVAL)) + fi + done + + echo "Error: The Dockstore Commit Hash does not match the GitHub Commit Hash after 15 minutes of retries!" + exit 1 + + env: + GITHUB_COMMIT_HASH: ${{ env.GITHUB_COMMIT_HASH }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} + + # Step 11: Run Tests + # Purpose: Main testing step - runs the pipeline and collects results + - name: Update test inputs, Upload to Terra, Submit, Monitor and Retrieve Outputs + run: | + UPDATE_TRUTH="${{ github.event.inputs.updateTruth || 'false' }}" + USE_CALL_CACHE="${{ github.event.inputs.useCallCache || 'true' }}" + TRUTH_BRANCH="${{ github.event.inputs.truthBranch || 'master' }}" + CURRENT_TIME=$(date +"%Y-%m-%d-%H-%M-%S") + MAX_RETRIES=2 + RETRY_DELAY=300 # 300 seconds = 5 minutes + # Initialize variables to aggregate statuses and outputs + ALL_WORKFLOW_STATUSES="Workflow ID | Status"$'\n'"--- | ---" + ALL_OUTPUTS="" + # Initialize arrays to track submission and workflow statuses + declare -a SUBMISSION_IDS + declare -A WORKFLOW_STATUSES + OVERALL_SUCCESS=true + + # Convert UPDATE_TRUTH and USE_CALL_CACHE to a boolean-friendly format ("true" -> true, "false" -> false) + if [ "$UPDATE_TRUTH" = "true" ]; then + UPDATE_TRUTH_BOOL=true + else + UPDATE_TRUTH_BOOL=false + fi + + if [ "$USE_CALL_CACHE" == "true" ]; then + USE_CALL_CACHE_BOOL=true + else + USE_CALL_CACHE_BOOL=false + fi + + TEST_TYPE="${{ env.testType }}" + INPUTS_DIR="$PIPELINE_DIR/test_inputs/$TEST_TYPE" + echo "Running tests with test type: $TEST_TYPE" + + TRUTH_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/truth/$(echo "$TEST_TYPE" | tr '[:upper:]' '[:lower:]')/$TRUTH_BRANCH" + echo "Truth path: $TRUTH_PATH" + RESULTS_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/results/$CURRENT_TIME" + + + + # 1. Submit all jobs first and store their submission IDs + for input_file in "$INPUTS_DIR"/*.json; do + test_input_file=$(python3 scripts/firecloud_api/UpdateTestInputs.py --truth_path "$TRUTH_PATH" \ + --results_path "$RESULTS_PATH" \ + --inputs_json "$input_file" \ + --update_truth "$UPDATE_TRUTH_BOOL" \ + --branch_name "$BRANCH_NAME" ) + echo "Uploading the test input file: $test_input_file" + + # Create the submission_data.json file for this input_file + input_file_filename=$(basename $input_file) + SUBMISSION_DATA_FILE="submission_data.json" + printf '{ + "methodConfigurationNamespace": "%s", + "methodConfigurationName": "%s_%s_%s", + "useCallCache": %s, + "deleteIntermediateOutputFiles": false, + "useReferenceDisks": true, + "memoryRetryMultiplier": 1.2, + "workflowFailureMode": "NoNewCalls", + "userComment": "%s", + "ignoreEmptyOutputs": false + }' "$WORKSPACE_NAMESPACE" "$PIPELINE_NAME" "$TEST_TYPE" "$BRANCH_NAME" "$USE_CALL_CACHE_BOOL" "$input_file_filename" > "$SUBMISSION_DATA_FILE" + + echo "Created submission data file: $SUBMISSION_DATA_FILE" + cat "$SUBMISSION_DATA_FILE" + + # Upload test input file + python3 scripts/firecloud_api/firecloud_api.py \ + upload_test_inputs \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --test_input_file "$test_input_file" \ + --branch_name "$BRANCH_NAME" \ + --sa-json-b64 "$SA_JSON_B64" \ + --test_type "$TEST_TYPE" \ + --user "$USER" + + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + SUBMISSION_ID=$(python3 scripts/firecloud_api/firecloud_api.py submit_job \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --submission_data_file "$SUBMISSION_DATA_FILE") + + echo "Submission ID: $SUBMISSION_ID" + + if [[ "$SUBMISSION_ID" == *"404"* || -z "$SUBMISSION_ID" ]]; then + echo "Error in submission, retrying in $RETRY_DELAY seconds..." + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + echo "Submission successful. Submission ID: $SUBMISSION_ID" + SUBMISSION_IDS+=("$SUBMISSION_ID") + break + done + done + + echo "All jobs have been submitted. Starting to poll for statuses..." + + # Continue with polling and output retrieval... + + # 2. After all submissions are done, start polling for statuses of all jobs + for SUBMISSION_ID in "${SUBMISSION_IDS[@]}"; do + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + echo "Polling for Submission ID: $SUBMISSION_ID" + RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py poll_job_status \ + --submission_id "$SUBMISSION_ID" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE") + + if [ -z "$RESPONSE" ]; then + echo "Failed to retrieve Workflow IDs for submission: $SUBMISSION_ID" + OVERALL_SUCCESS=false + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + WORKFLOW_STATUSES_FOR_SUBMISSION=$(echo "$RESPONSE" | jq -r 'to_entries | map(.key + " | " + .value) | .[]') + WORKFLOW_STATUSES["$SUBMISSION_ID"]="$WORKFLOW_STATUSES_FOR_SUBMISSION" + + # Check if any workflow failed or errored + FAILED_WORKFLOWS=$(echo "$RESPONSE" | jq -r 'to_entries | .[] | select(.value == "Failed" or .value == "Aborted" or .value == "Aborting") | .key') + if [ ! -z "$FAILED_WORKFLOWS" ]; then + echo "Failed workflows detected:" + echo "$FAILED_WORKFLOWS" + OVERALL_SUCCESS=false + fi + + # retrieve workflow outputs + echo "Retrieving workflow outputs for Submission ID: $SUBMISSION_ID..." + for WORKFLOW_ID in $(echo "$RESPONSE" | jq -r 'keys[]'); do + WORKFLOW_OUTPUT=$(python3 scripts/firecloud_api/firecloud_api.py get_workflow_outputs \ + --user "$USER" \ + --sa-json-b64 "$SA_JSON_B64" \ + --submission_id "$SUBMISSION_ID" \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --workflow_id "$WORKFLOW_ID" \ + --pipeline_name "$PIPELINE_NAME") + ALL_OUTPUTS+="$WORKFLOW_OUTPUT"$'\n' + done + break + done + done + + # Generate the final summary after all processing is complete + FINAL_SUMMARY="## Combined Workflow Statuses\n\n" + + # Add all workflow statuses to the summary + for SUBMISSION_ID in "${!WORKFLOW_STATUSES[@]}"; do + # Generate the Terra URL for the submission + SUBMISSION_URL="https://app.terra.bio/#workspaces/$WORKSPACE_NAMESPACE/WARP%20Tests/job_history/$SUBMISSION_ID" + + # Add the Submission ID as a hyperlink + FINAL_SUMMARY+="[Submission ID: $SUBMISSION_ID]($SUBMISSION_URL)\n" + + # Add the workflows and statuses for this submission + FINAL_SUMMARY+="${WORKFLOW_STATUSES[$SUBMISSION_ID]}\n\n" + done + + # Write the complete summary once at the end + echo -e "$FINAL_SUMMARY" >> $GITHUB_STEP_SUMMARY + + # Exit with error if any workflows failed + if [ "$OVERALL_SUCCESS" = false ]; then + echo "" + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "One or more workflows failed in Terra. Check the workflow status summary for details." + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "" + exit 1 + fi + + + + # Step 12: Cleanup + # Purpose: Ensures cleanup of Terra method configurations regardless of test outcome + - name: Delete Method Configuration + if: always() # Ensures it runs regardless of success or failure + run: | + echo "Deleting method configuration for branch: $BRANCH_NAME" + DELETE_RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py delete_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --method_config_name "${PIPELINE_NAME}_${testType}_${BRANCH_NAME}") + echo "Delete response: $DELETE_RESPONSE" + if [ "$DELETE_RESPONSE" == "True" ]; then + echo "Method configuration deleted successfully." + else + echo "Error: Method configuration deletion failed." + exit 1 + fi + + + # Step 13: Print Summary on Success + # Purpose: Prints the final summary of the pipeline execution in case of success + - name: Print Summary on Success + if: success() + run: | + echo "# :white_check_mark: Pipeline Execution Summary :white_check_mark:" >> $GITHUB_STEP_SUMMARY + + # Step 14: Print Summary on Failure + # Purpose: Prints the final summary of the pipeline execution in case of failure + - name: Print Summary on Failure + if: failure() + run: | + echo "# :x: Pipeline Execution Summary (on Failure) :x: " >> $GITHUB_STEP_SUMMARY \ No newline at end of file diff --git a/.github/workflows/test_variant_calling.yml b/.github/workflows/test_variant_calling.yml new file mode 100644 index 0000000000..b7528b8afc --- /dev/null +++ b/.github/workflows/test_variant_calling.yml @@ -0,0 +1,469 @@ +name: Test Variant Calling + +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + - 'pipelines/broad/dna_seq/germline/variant_calling/**' + - 'tasks/broad/GermlineVariantDiscovery.wdl' + - 'tasks/broad/Qc.wdl' + - 'tasks/broad/Utilities.wdl' + - 'tasks/broad/BamProcessing.wdl' + - 'tasks/broad/DragenTasks.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + - 'verification/test-wdls/TestVariantCalling.wdl' + - 'verification/VerifyGvcf.wdl' + - 'verification/VerifyTasks.wdl' + - '.github/workflows/test_variant_calling.yml' + + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + + +env: + # pipeline configuration + PIPELINE_NAME: TestVariantCalling + DOCKSTORE_PIPELINE_NAME: VariantCalling + PIPELINE_DIR: "pipelines/broad/dna_seq/germline/variant_calling" + + # workspace configuration + TESTING_WORKSPACE: WARP Tests + WORKSPACE_NAMESPACE: warp-pipelines + + # service account configuration + SA_JSON_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + USER: pdt-tester@warp-pipeline-dev.iam.gserviceaccount.com + + +jobs: + TestVariantCalling: + runs-on: ubuntu-latest + # Add "id-token" with the intended permissions. + permissions: + contents: 'read' + id-token: 'write' + actions: write + + steps: + # Step 1: Checkout code + # Purpose: Clones the repository code at the specified reference + - uses: actions/checkout@v3 + with: + ref: ${{ github.ref }} + + # Step 2: Setup Python + # Purpose: Installs Python 3.11 for running pipeline scripts + - name: Set up python + id: setup-python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + # Step 3: Install Dependencies + # Purpose: Installs required Python packages for the pipeline + - name: Install dependencies + run: | + cd scripts/firecloud_api/ + pip install -r requirements.txt + + # Step 4: Set Branch Name + # Purpose: Determines and sets the correct branch name for either PR or direct commits + - name: Set Branch Name + id: set_branch + run: | + if [ -z "${{ github.head_ref }}" ]; then + echo "Branch name is missing, using ${GITHUB_REF##*/}" + echo "BRANCH_NAME=${GITHUB_REF##*/}" >> $GITHUB_ENV + else + echo "Branch name from PR: ${{ github.head_ref }}" + echo "BRANCH_NAME=${{ github.head_ref }}" >> $GITHUB_ENV + fi + + # Step 5: Set Test Type + # Purpose: Determines and sets the correct test type based on the branch name + - name: Set Test Type + id: set_test_type + run: | + if [ "${{ github.event_name }}" == "pull_request" ]; then + # For PRs, set based on target branch + if [ "${{ github.base_ref }}" == "master" ]; then + # If PR is targeting master branch, run Scientific tests + echo "testType=Scientific" >> $GITHUB_ENV + echo "testType=Scientific" + else + # If PR targets any other branch (develop, staging), run Plumbing tests + echo "testType=Plumbing" >> $GITHUB_ENV + echo "testType=Plumbing" + fi + else + # For manual workflow runs (workflow_dispatch) + echo "testType=${{ github.event.inputs.testType }}" >> $GITHUB_ENV + echo "testType=${{ github.event.inputs.testType }}" + fi + + # Step 6: Create Method Configuration + # Purpose: Sets up the testing configuration in Terra workspace + - name: Create new method configuration + run: | + echo "Creating new method configuration for branch: $BRANCH_NAME" + + METHOD_CONFIG_NAME=$(python3 scripts/firecloud_api/firecloud_api.py \ + create_new_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER") + + echo "METHOD_CONFIG_NAME=$METHOD_CONFIG_NAME" >> $GITHUB_ENV + + # Step 7: Cancel Previous Runs + # Purpose: Cancels previous GHA workflows from the same branch (regardless of plumbing or scientific test type) + # to avoid running multiple tests at the same time + - name: Cancel Previous GHA Runs + uses: styfle/cancel-workflow-action@0.11.0 + with: + access_token: ${{ github.token }} + all_but_latest: true + ignore_sha: true + + # Step 8: Cancel Previous Terra Submissions + # Purpose: Abort previous Terra submissions from the same branch to avoid running multiple tests at the same time + # Will not abort a Terra submission if it is a scientific test + - name: Cancel Previous Terra Submissions + if: ${{ !contains(env.METHOD_CONFIG_NAME, '_Scientific_') }} + run: | + python3 scripts/firecloud_api/firecloud_api.py \ + --workspace-namespace "${{ env.WORKSPACE_NAMESPACE }}" \ + --workspace-name "${{ env.TESTING_WORKSPACE }}" \ + --pipeline_name "${{ env.PIPELINE_NAME }}" \ + --branch_name "${{ env.BRANCH_NAME }}" \ + --sa-json-b64 "${{ secrets.PDT_TESTER_SA_B64 }}" \ + --user "${{ env.USER }}" \ + --test_type "$testType" \ + cancel_old_submissions + + # Step 9: Handle Git Commit Hash + # Purpose: Gets the correct Github commit hash for version tracking + - name: Determine Github Commit Hash + id: determine_github_commit_hash + run: | + if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then + echo "Using github.sha for manually triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.sha }}" >> $GITHUB_ENV + elif [ "${{ github.event_name }}" == "pull_request" ]; then + echo "Using github.event.pull_request.head.sha for PR-triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV + else + echo "Unsupported event type: ${{ github.event_name }}" + exit 1 + fi + + # Step 10: Compare Hashes + # Purpose: Compares the Dockstore and Github commit hashes to ensure they match + - name: Compare Dockstore and Github Commit Hashes with Retry + id: compare_hashes + run: | + + # Wait 5.5 minutes for Dockstore to update + sleep 330 + + MAX_WAIT_TIME=$((15 * 60)) # 15 minutes in seconds + WAIT_INTERVAL=60 # 1 minute in seconds + TOTAL_WAITED=0 + + echo "Starting hash comparison with retry mechanism..." + + while [ $TOTAL_WAITED -lt $MAX_WAIT_TIME ]; do + echo "Fetching Dockstore Commit Hash..." + DOCKSTORE_COMMIT_HASH=$(python scripts/dockstore_api/fetch_dockstore_commit.py \ + $DOCKSTORE_TOKEN \ + $DOCKSTORE_PIPELINE_NAME \ + $BRANCH_NAME) + echo "Fetched Dockstore Commit Hash: $DOCKSTORE_COMMIT_HASH" + + echo "GitHub Commit Hash: $GITHUB_COMMIT_HASH" + + if [ "$DOCKSTORE_COMMIT_HASH" == "$GITHUB_COMMIT_HASH" ]; then + echo "Success: The Dockstore Commit Hash matches the GitHub Commit Hash." + exit 0 + else + echo "Mismatch found: $DOCKSTORE_COMMIT_HASH != $GITHUB_COMMIT_HASH" + echo "Retrying in $WAIT_INTERVAL seconds..." + sleep $WAIT_INTERVAL + TOTAL_WAITED=$((TOTAL_WAITED + WAIT_INTERVAL)) + fi + done + + echo "Error: The Dockstore Commit Hash does not match the GitHub Commit Hash after 15 minutes of retries!" + exit 1 + + env: + GITHUB_COMMIT_HASH: ${{ env.GITHUB_COMMIT_HASH }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} + + # Step 11: Run Tests + # Purpose: Main testing step - runs the pipeline and collects results + - name: Update test inputs, Upload to Terra, Submit, Monitor and Retrieve Outputs + run: | + UPDATE_TRUTH="${{ github.event.inputs.updateTruth || 'false' }}" + USE_CALL_CACHE="${{ github.event.inputs.useCallCache || 'true' }}" + TRUTH_BRANCH="${{ github.event.inputs.truthBranch || 'master' }}" + CURRENT_TIME=$(date +"%Y-%m-%d-%H-%M-%S") + MAX_RETRIES=2 + RETRY_DELAY=300 # 300 seconds = 5 minutes + # Initialize variables to aggregate statuses and outputs + ALL_WORKFLOW_STATUSES="Workflow ID | Status"$'\n'"--- | ---" + ALL_OUTPUTS="" + # Initialize arrays to track submission and workflow statuses + declare -a SUBMISSION_IDS + declare -A WORKFLOW_STATUSES + OVERALL_SUCCESS=true + + # Convert UPDATE_TRUTH and USE_CALL_CACHE to a boolean-friendly format ("true" -> true, "false" -> false) + if [ "$UPDATE_TRUTH" = "true" ]; then + UPDATE_TRUTH_BOOL=true + else + UPDATE_TRUTH_BOOL=false + fi + + if [ "$USE_CALL_CACHE" == "true" ]; then + USE_CALL_CACHE_BOOL=true + else + USE_CALL_CACHE_BOOL=false + fi + + TEST_TYPE="${{ env.testType }}" + INPUTS_DIR="$PIPELINE_DIR/test_inputs/$TEST_TYPE" + echo "Running tests with test type: $TEST_TYPE" + + TRUTH_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/truth/$(echo "$TEST_TYPE" | tr '[:upper:]' '[:lower:]')/$TRUTH_BRANCH" + echo "Truth path: $TRUTH_PATH" + RESULTS_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/results/$CURRENT_TIME" + + + + # 1. Submit all jobs first and store their submission IDs + for input_file in "$INPUTS_DIR"/*.json; do + test_input_file=$(python3 scripts/firecloud_api/UpdateTestInputs.py --truth_path "$TRUTH_PATH" \ + --results_path "$RESULTS_PATH" \ + --inputs_json "$input_file" \ + --update_truth "$UPDATE_TRUTH_BOOL" \ + --branch_name "$BRANCH_NAME" ) + echo "Uploading the test input file: $test_input_file" + + # Create the submission_data.json file for this input_file + input_file_filename=$(basename $input_file) + SUBMISSION_DATA_FILE="submission_data.json" + printf '{ + "methodConfigurationNamespace": "%s", + "methodConfigurationName": "%s_%s_%s", + "useCallCache": %s, + "deleteIntermediateOutputFiles": false, + "useReferenceDisks": true, + "memoryRetryMultiplier": 1.2, + "workflowFailureMode": "NoNewCalls", + "userComment": "%s", + "ignoreEmptyOutputs": false + }' "$WORKSPACE_NAMESPACE" "$PIPELINE_NAME" "$TEST_TYPE" "$BRANCH_NAME" "$USE_CALL_CACHE_BOOL" "$input_file_filename" > "$SUBMISSION_DATA_FILE" + + echo "Created submission data file: $SUBMISSION_DATA_FILE" + cat "$SUBMISSION_DATA_FILE" + + # Upload test input file + python3 scripts/firecloud_api/firecloud_api.py \ + upload_test_inputs \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --test_input_file "$test_input_file" \ + --branch_name "$BRANCH_NAME" \ + --sa-json-b64 "$SA_JSON_B64" \ + --test_type "$TEST_TYPE" \ + --user "$USER" + + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + SUBMISSION_ID=$(python3 scripts/firecloud_api/firecloud_api.py submit_job \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --submission_data_file "$SUBMISSION_DATA_FILE") + + echo "Submission ID: $SUBMISSION_ID" + + if [[ "$SUBMISSION_ID" == *"404"* || -z "$SUBMISSION_ID" ]]; then + echo "Error in submission, retrying in $RETRY_DELAY seconds..." + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + echo "Submission successful. Submission ID: $SUBMISSION_ID" + SUBMISSION_IDS+=("$SUBMISSION_ID") + break + done + done + + echo "All jobs have been submitted. Starting to poll for statuses..." + + # Continue with polling and output retrieval... + + # 2. After all submissions are done, start polling for statuses of all jobs + for SUBMISSION_ID in "${SUBMISSION_IDS[@]}"; do + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + echo "Polling for Submission ID: $SUBMISSION_ID" + RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py poll_job_status \ + --submission_id "$SUBMISSION_ID" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE") + + if [ -z "$RESPONSE" ]; then + echo "Failed to retrieve Workflow IDs for submission: $SUBMISSION_ID" + OVERALL_SUCCESS=false + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + WORKFLOW_STATUSES_FOR_SUBMISSION=$(echo "$RESPONSE" | jq -r 'to_entries | map(.key + " | " + .value) | .[]') + WORKFLOW_STATUSES["$SUBMISSION_ID"]="$WORKFLOW_STATUSES_FOR_SUBMISSION" + + # Check if any workflow failed or errored + FAILED_WORKFLOWS=$(echo "$RESPONSE" | jq -r 'to_entries | .[] | select(.value == "Failed" or .value == "Aborted" or .value == "Aborting") | .key') + if [ ! -z "$FAILED_WORKFLOWS" ]; then + echo "Failed workflows detected:" + echo "$FAILED_WORKFLOWS" + OVERALL_SUCCESS=false + fi + + # retrieve workflow outputs + echo "Retrieving workflow outputs for Submission ID: $SUBMISSION_ID..." + for WORKFLOW_ID in $(echo "$RESPONSE" | jq -r 'keys[]'); do + WORKFLOW_OUTPUT=$(python3 scripts/firecloud_api/firecloud_api.py get_workflow_outputs \ + --user "$USER" \ + --sa-json-b64 "$SA_JSON_B64" \ + --submission_id "$SUBMISSION_ID" \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --workflow_id "$WORKFLOW_ID" \ + --pipeline_name "$PIPELINE_NAME") + ALL_OUTPUTS+="$WORKFLOW_OUTPUT"$'\n' + done + break + done + done + + # Generate the final summary after all processing is complete + FINAL_SUMMARY="## Combined Workflow Statuses\n\n" + + # Add all workflow statuses to the summary + for SUBMISSION_ID in "${!WORKFLOW_STATUSES[@]}"; do + # Generate the Terra URL for the submission + SUBMISSION_URL="https://app.terra.bio/#workspaces/$WORKSPACE_NAMESPACE/WARP%20Tests/job_history/$SUBMISSION_ID" + + # Add the Submission ID as a hyperlink + FINAL_SUMMARY+="[Submission ID: $SUBMISSION_ID]($SUBMISSION_URL)\n" + + # Add the workflows and statuses for this submission + FINAL_SUMMARY+="${WORKFLOW_STATUSES[$SUBMISSION_ID]}\n\n" + done + + # Write the complete summary once at the end + echo -e "$FINAL_SUMMARY" >> $GITHUB_STEP_SUMMARY + + # Exit with error if any workflows failed + if [ "$OVERALL_SUCCESS" = false ]; then + echo "" + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "One or more workflows failed in Terra. Check the workflow status summary for details." + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "" + exit 1 + fi + + + + # Step 12: Cleanup + # Purpose: Ensures cleanup of Terra method configurations regardless of test outcome + - name: Delete Method Configuration + if: always() # Ensures it runs regardless of success or failure + run: | + echo "Deleting method configuration for branch: $BRANCH_NAME" + DELETE_RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py delete_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --method_config_name "${PIPELINE_NAME}_${testType}_${BRANCH_NAME}") + echo "Delete response: $DELETE_RESPONSE" + if [ "$DELETE_RESPONSE" == "True" ]; then + echo "Method configuration deleted successfully." + else + echo "Error: Method configuration deletion failed." + exit 1 + fi + + + # Step 13: Print Summary on Success + # Purpose: Prints the final summary of the pipeline execution in case of success + - name: Print Summary on Success + if: success() + run: | + echo "# :white_check_mark: Pipeline Execution Summary :white_check_mark:" >> $GITHUB_STEP_SUMMARY + + # Step 14: Print Summary on Failure + # Purpose: Prints the final summary of the pipeline execution in case of failure + - name: Print Summary on Failure + if: failure() + run: | + echo "# :x: Pipeline Execution Summary (on Failure) :x: " >> $GITHUB_STEP_SUMMARY \ No newline at end of file diff --git a/.github/workflows/test_whole_genome_germline_single_sample.yml b/.github/workflows/test_whole_genome_germline_single_sample.yml new file mode 100644 index 0000000000..0404e8d933 --- /dev/null +++ b/.github/workflows/test_whole_genome_germline_single_sample.yml @@ -0,0 +1,478 @@ +name: Test WholeGenomeGermlineSingleSample + +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + - 'pipelines/broad/dna_seq/germline/single_sample/wgs/**' + - 'structs/dna_seq/DNASeqStructs.wdl' + - 'tasks/broad/Alignment.wdl' + - 'tasks/broad/DragmapAlignment.wdl' + - 'tasks/broad/SplitLargeReadGroup.wdl' + - 'tasks/broad/BamProcessing.wdl' + - 'tasks/broad/Utilities.wdl' + - 'tasks/broad/Qc.wdl' + - 'tasks/broad/AggregatedBamQC.wdl' + - 'tasks/broad/BamToCram.wdl' + - 'pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl' + - 'tasks/broad/GermlineVariantDiscovery.wdl' + - 'tasks/broad/DragenTasks.wdl' + - 'verification/test-wdls/TestWholeGenomeGermlineSingleSample.wdl' + - 'verification/VerifyGermlineSingleSample.wdl' + - 'verification/VerifyMetrics.wdl' + - 'verification/VerifyTasks.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + - '.github/workflows/test_whole_genome_germline_single_sample.yml' + + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + + +env: + # pipeline configuration + PIPELINE_NAME: TestWholeGenomeGermlineSingleSample + DOCKSTORE_PIPELINE_NAME: WholeGenomeGermlineSingleSample + PIPELINE_DIR: "pipelines/broad/dna_seq/germline/single_sample/wgs" + + + # workspace configuration + TESTING_WORKSPACE: WARP Tests + WORKSPACE_NAMESPACE: warp-pipelines + + # service account configuration + SA_JSON_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + USER: pdt-tester@warp-pipeline-dev.iam.gserviceaccount.com + + +jobs: + TestWholeGenomeGermlineSingleSample: + runs-on: ubuntu-latest + # Add "id-token" with the intended permissions. + permissions: + contents: 'read' + id-token: 'write' + actions: write + + steps: + # Step 1: Checkout code + # Purpose: Clones the repository code at the specified reference + - uses: actions/checkout@v3 + with: + ref: ${{ github.ref }} + + # Step 2: Setup Python + # Purpose: Installs Python 3.11 for running pipeline scripts + - name: Set up python + id: setup-python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + # Step 3: Install Dependencies + # Purpose: Installs required Python packages for the pipeline + - name: Install dependencies + run: | + cd scripts/firecloud_api/ + pip install -r requirements.txt + + # Step 4: Set Branch Name + # Purpose: Determines and sets the correct branch name for either PR or direct commits + - name: Set Branch Name + id: set_branch + run: | + if [ -z "${{ github.head_ref }}" ]; then + echo "Branch name is missing, using ${GITHUB_REF##*/}" + echo "BRANCH_NAME=${GITHUB_REF##*/}" >> $GITHUB_ENV + else + echo "Branch name from PR: ${{ github.head_ref }}" + echo "BRANCH_NAME=${{ github.head_ref }}" >> $GITHUB_ENV + fi + + # Step 5: Set Test Type + # Purpose: Determines and sets the correct test type based on the branch name + - name: Set Test Type + id: set_test_type + run: | + if [ "${{ github.event_name }}" == "pull_request" ]; then + # For PRs, set based on target branch + if [ "${{ github.base_ref }}" == "master" ]; then + # If PR is targeting master branch, run Scientific tests + echo "testType=Scientific" >> $GITHUB_ENV + echo "testType=Scientific" + else + # If PR targets any other branch (develop, staging), run Plumbing tests + echo "testType=Plumbing" >> $GITHUB_ENV + echo "testType=Plumbing" + fi + else + # For manual workflow runs (workflow_dispatch) + echo "testType=${{ github.event.inputs.testType }}" >> $GITHUB_ENV + echo "testType=${{ github.event.inputs.testType }}" + fi + + # Step 6: Create Method Configuration + # Purpose: Sets up the testing configuration in Terra workspace + - name: Create new method configuration + run: | + echo "Creating new method configuration for branch: $BRANCH_NAME" + + METHOD_CONFIG_NAME=$(python3 scripts/firecloud_api/firecloud_api.py \ + create_new_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER") + + echo "METHOD_CONFIG_NAME=$METHOD_CONFIG_NAME" >> $GITHUB_ENV + + # Step 7: Cancel Previous Runs + # Purpose: Cancels previous GHA workflows from the same branch (regardless of plumbing or scientific test type) + # to avoid running multiple tests at the same time + - name: Cancel Previous GHA Runs + uses: styfle/cancel-workflow-action@0.11.0 + with: + access_token: ${{ github.token }} + all_but_latest: true + ignore_sha: true + + # Step 8: Cancel Previous Terra Submissions + # Purpose: Abort previous Terra submissions from the same branch to avoid running multiple tests at the same time + # Will not abort a Terra submission if it is a scientific test + - name: Cancel Previous Terra Submissions + if: ${{ !contains(env.METHOD_CONFIG_NAME, '_Scientific_') }} + run: | + python3 scripts/firecloud_api/firecloud_api.py \ + --workspace-namespace "${{ env.WORKSPACE_NAMESPACE }}" \ + --workspace-name "${{ env.TESTING_WORKSPACE }}" \ + --pipeline_name "${{ env.PIPELINE_NAME }}" \ + --branch_name "${{ env.BRANCH_NAME }}" \ + --sa-json-b64 "${{ secrets.PDT_TESTER_SA_B64 }}" \ + --user "${{ env.USER }}" \ + --test_type "$testType" \ + cancel_old_submissions + + # Step 9: Handle Git Commit Hash + # Purpose: Gets the correct Github commit hash for version tracking + - name: Determine Github Commit Hash + id: determine_github_commit_hash + run: | + if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then + echo "Using github.sha for manually triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.sha }}" >> $GITHUB_ENV + elif [ "${{ github.event_name }}" == "pull_request" ]; then + echo "Using github.event.pull_request.head.sha for PR-triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV + else + echo "Unsupported event type: ${{ github.event_name }}" + exit 1 + fi + + # Step 10: Compare Hashes + # Purpose: Compares the Dockstore and Github commit hashes to ensure they match + - name: Compare Dockstore and Github Commit Hashes with Retry + id: compare_hashes + run: | + + # Wait 5.5 minutes for Dockstore to update + sleep 330 + + MAX_WAIT_TIME=$((15 * 60)) # 15 minutes in seconds + WAIT_INTERVAL=60 # 1 minute in seconds + TOTAL_WAITED=0 + + echo "Starting hash comparison with retry mechanism..." + + while [ $TOTAL_WAITED -lt $MAX_WAIT_TIME ]; do + echo "Fetching Dockstore Commit Hash..." + DOCKSTORE_COMMIT_HASH=$(python scripts/dockstore_api/fetch_dockstore_commit.py \ + $DOCKSTORE_TOKEN \ + $DOCKSTORE_PIPELINE_NAME \ + $BRANCH_NAME) + echo "Fetched Dockstore Commit Hash: $DOCKSTORE_COMMIT_HASH" + + echo "GitHub Commit Hash: $GITHUB_COMMIT_HASH" + + if [ "$DOCKSTORE_COMMIT_HASH" == "$GITHUB_COMMIT_HASH" ]; then + echo "Success: The Dockstore Commit Hash matches the GitHub Commit Hash." + exit 0 + else + echo "Mismatch found: $DOCKSTORE_COMMIT_HASH != $GITHUB_COMMIT_HASH" + echo "Retrying in $WAIT_INTERVAL seconds..." + sleep $WAIT_INTERVAL + TOTAL_WAITED=$((TOTAL_WAITED + WAIT_INTERVAL)) + fi + done + + echo "Error: The Dockstore Commit Hash does not match the GitHub Commit Hash after 15 minutes of retries!" + exit 1 + + env: + GITHUB_COMMIT_HASH: ${{ env.GITHUB_COMMIT_HASH }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} + + # Step 11: Run Tests + # Purpose: Main testing step - runs the pipeline and collects results + - name: Update test inputs, Upload to Terra, Submit, Monitor and Retrieve Outputs + run: | + UPDATE_TRUTH="${{ github.event.inputs.updateTruth || 'false' }}" + USE_CALL_CACHE="${{ github.event.inputs.useCallCache || 'true' }}" + TRUTH_BRANCH="${{ github.event.inputs.truthBranch || 'master' }}" + CURRENT_TIME=$(date +"%Y-%m-%d-%H-%M-%S") + MAX_RETRIES=2 + RETRY_DELAY=300 # 300 seconds = 5 minutes + # Initialize variables to aggregate statuses and outputs + ALL_WORKFLOW_STATUSES="Workflow ID | Status"$'\n'"--- | ---" + ALL_OUTPUTS="" + # Initialize arrays to track submission and workflow statuses + declare -a SUBMISSION_IDS + declare -A WORKFLOW_STATUSES + OVERALL_SUCCESS=true + + # Convert UPDATE_TRUTH and USE_CALL_CACHE to a boolean-friendly format ("true" -> true, "false" -> false) + if [ "$UPDATE_TRUTH" = "true" ]; then + UPDATE_TRUTH_BOOL=true + else + UPDATE_TRUTH_BOOL=false + fi + + if [ "$USE_CALL_CACHE" == "true" ]; then + USE_CALL_CACHE_BOOL=true + else + USE_CALL_CACHE_BOOL=false + fi + + TEST_TYPE="${{ env.testType }}" + INPUTS_DIR="$PIPELINE_DIR/test_inputs/$TEST_TYPE" + echo "Running tests with test type: $TEST_TYPE" + + TRUTH_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/truth/$(echo "$TEST_TYPE" | tr '[:upper:]' '[:lower:]')/$TRUTH_BRANCH" + echo "Truth path: $TRUTH_PATH" + RESULTS_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/results/$CURRENT_TIME" + + + + # 1. Submit all jobs first and store their submission IDs + for input_file in "$INPUTS_DIR"/*.json; do + test_input_file=$(python3 scripts/firecloud_api/UpdateTestInputs.py --truth_path "$TRUTH_PATH" \ + --results_path "$RESULTS_PATH" \ + --inputs_json "$input_file" \ + --update_truth "$UPDATE_TRUTH_BOOL" \ + --branch_name "$BRANCH_NAME" ) + echo "Uploading the test input file: $test_input_file" + + # Create the submission_data.json file for this input_file + input_file_filename=$(basename $input_file) + SUBMISSION_DATA_FILE="submission_data.json" + printf '{ + "methodConfigurationNamespace": "%s", + "methodConfigurationName": "%s_%s_%s", + "useCallCache": %s, + "deleteIntermediateOutputFiles": false, + "useReferenceDisks": true, + "memoryRetryMultiplier": 1.2, + "workflowFailureMode": "NoNewCalls", + "userComment": "%s", + "ignoreEmptyOutputs": false + }' "$WORKSPACE_NAMESPACE" "$PIPELINE_NAME" "$TEST_TYPE" "$BRANCH_NAME" "$USE_CALL_CACHE_BOOL" "$input_file_filename" > "$SUBMISSION_DATA_FILE" + + echo "Created submission data file: $SUBMISSION_DATA_FILE" + cat "$SUBMISSION_DATA_FILE" + + # Upload test input file + python3 scripts/firecloud_api/firecloud_api.py \ + upload_test_inputs \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --test_input_file "$test_input_file" \ + --branch_name "$BRANCH_NAME" \ + --sa-json-b64 "$SA_JSON_B64" \ + --test_type "$TEST_TYPE" \ + --user "$USER" + + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + SUBMISSION_ID=$(python3 scripts/firecloud_api/firecloud_api.py submit_job \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --submission_data_file "$SUBMISSION_DATA_FILE") + + echo "Submission ID: $SUBMISSION_ID" + + if [[ "$SUBMISSION_ID" == *"404"* || -z "$SUBMISSION_ID" ]]; then + echo "Error in submission, retrying in $RETRY_DELAY seconds..." + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + echo "Submission successful. Submission ID: $SUBMISSION_ID" + SUBMISSION_IDS+=("$SUBMISSION_ID") + break + done + done + + echo "All jobs have been submitted. Starting to poll for statuses..." + + # Continue with polling and output retrieval... + + # 2. After all submissions are done, start polling for statuses of all jobs + for SUBMISSION_ID in "${SUBMISSION_IDS[@]}"; do + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + echo "Polling for Submission ID: $SUBMISSION_ID" + RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py poll_job_status \ + --submission_id "$SUBMISSION_ID" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE") + + if [ -z "$RESPONSE" ]; then + echo "Failed to retrieve Workflow IDs for submission: $SUBMISSION_ID" + OVERALL_SUCCESS=false + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + WORKFLOW_STATUSES_FOR_SUBMISSION=$(echo "$RESPONSE" | jq -r 'to_entries | map(.key + " | " + .value) | .[]') + WORKFLOW_STATUSES["$SUBMISSION_ID"]="$WORKFLOW_STATUSES_FOR_SUBMISSION" + + # Check if any workflow failed or errored + FAILED_WORKFLOWS=$(echo "$RESPONSE" | jq -r 'to_entries | .[] | select(.value == "Failed" or .value == "Aborted" or .value == "Aborting") | .key') + if [ ! -z "$FAILED_WORKFLOWS" ]; then + echo "Failed workflows detected:" + echo "$FAILED_WORKFLOWS" + OVERALL_SUCCESS=false + fi + + # retrieve workflow outputs + echo "Retrieving workflow outputs for Submission ID: $SUBMISSION_ID..." + for WORKFLOW_ID in $(echo "$RESPONSE" | jq -r 'keys[]'); do + WORKFLOW_OUTPUT=$(python3 scripts/firecloud_api/firecloud_api.py get_workflow_outputs \ + --user "$USER" \ + --sa-json-b64 "$SA_JSON_B64" \ + --submission_id "$SUBMISSION_ID" \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --workflow_id "$WORKFLOW_ID" \ + --pipeline_name "$PIPELINE_NAME") + ALL_OUTPUTS+="$WORKFLOW_OUTPUT"$'\n' + done + break + done + done + + # Generate the final summary after all processing is complete + FINAL_SUMMARY="## Combined Workflow Statuses\n\n" + + # Add all workflow statuses to the summary + for SUBMISSION_ID in "${!WORKFLOW_STATUSES[@]}"; do + # Generate the Terra URL for the submission + SUBMISSION_URL="https://app.terra.bio/#workspaces/$WORKSPACE_NAMESPACE/WARP%20Tests/job_history/$SUBMISSION_ID" + + # Add the Submission ID as a hyperlink + FINAL_SUMMARY+="[Submission ID: $SUBMISSION_ID]($SUBMISSION_URL)\n" + + # Add the workflows and statuses for this submission + FINAL_SUMMARY+="${WORKFLOW_STATUSES[$SUBMISSION_ID]}\n\n" + done + + # Write the complete summary once at the end + echo -e "$FINAL_SUMMARY" >> $GITHUB_STEP_SUMMARY + + # Exit with error if any workflows failed + if [ "$OVERALL_SUCCESS" = false ]; then + echo "" + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "One or more workflows failed in Terra. Check the workflow status summary for details." + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "" + exit 1 + fi + + + + # Step 12: Cleanup + # Purpose: Ensures cleanup of Terra method configurations regardless of test outcome + - name: Delete Method Configuration + if: always() # Ensures it runs regardless of success or failure + run: | + echo "Deleting method configuration for branch: $BRANCH_NAME" + DELETE_RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py delete_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --method_config_name "${PIPELINE_NAME}_${testType}_${BRANCH_NAME}") + echo "Delete response: $DELETE_RESPONSE" + if [ "$DELETE_RESPONSE" == "True" ]; then + echo "Method configuration deleted successfully." + else + echo "Error: Method configuration deletion failed." + exit 1 + fi + + + # Step 13: Print Summary on Success + # Purpose: Prints the final summary of the pipeline execution in case of success + - name: Print Summary on Success + if: success() + run: | + echo "# :white_check_mark: Pipeline Execution Summary :white_check_mark:" >> $GITHUB_STEP_SUMMARY + + # Step 14: Print Summary on Failure + # Purpose: Prints the final summary of the pipeline execution in case of failure + - name: Print Summary on Failure + if: failure() + run: | + echo "# :x: Pipeline Execution Summary (on Failure) :x: " >> $GITHUB_STEP_SUMMARY \ No newline at end of file diff --git a/.github/workflows/test_whole_genome_reprocessing.yml b/.github/workflows/test_whole_genome_reprocessing.yml new file mode 100644 index 0000000000..03527f0871 --- /dev/null +++ b/.github/workflows/test_whole_genome_reprocessing.yml @@ -0,0 +1,481 @@ +name: Test WholeGenomeReprocessing + +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + - 'pipelines/broad/reprocessing/wgs/**' + - 'pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.wdl' + - 'structs/dna_seq/DNASeqStructs.wdl' + - 'tasks/broad/Alignment.wdl' + - 'tasks/broad/DragmapAlignment.wdl' + - 'tasks/broad/SplitLargeReadGroup.wdl' + - 'tasks/broad/BamProcessing.wdl' + - 'tasks/broad/Utilities.wdl' + - 'tasks/broad/Qc.wdl' + - 'tasks/broad/AggregatedBamQC.wdl' + - 'tasks/broad/BamToCram.wdl' + - 'pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl' + - 'pipelines/broad/reprocessing/cram_to_unmapped_bams/CramToUnmappedBams.wdl' + - 'tasks/broad/GermlineVariantDiscovery.wdl' + - 'tasks/broad/DragenTasks.wdl' + - 'verification/VerifyExomeReprocessing.wdl' + - 'verification/test-wdls/TestWholeGenomeGermlineSingleSample.wdl' + - 'verification/VerifyGermlineSingleSample.wdl' + - 'verification/VerifyMetrics.wdl' + - 'verification/VerifyTasks.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + - '.github/workflows/test_whole_genome_reprocessing.yml' + + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + + +env: + # pipeline configuration + PIPELINE_NAME: TestWholeGenomeReprocessing + DOCKSTORE_PIPELINE_NAME: WholeGenomeReprocessing + PIPELINE_DIR: "pipelines/broad/reprocessing/wgs" + + + # workspace configuration + TESTING_WORKSPACE: WARP Tests + WORKSPACE_NAMESPACE: warp-pipelines + + # service account configuration + SA_JSON_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + USER: pdt-tester@warp-pipeline-dev.iam.gserviceaccount.com + + +jobs: + TestWholeGenomeReprocessing: + runs-on: ubuntu-latest + # Add "id-token" with the intended permissions. + permissions: + contents: 'read' + id-token: 'write' + actions: write + + steps: + # Step 1: Checkout code + # Purpose: Clones the repository code at the specified reference + - uses: actions/checkout@v3 + with: + ref: ${{ github.ref }} + + # Step 2: Setup Python + # Purpose: Installs Python 3.11 for running pipeline scripts + - name: Set up python + id: setup-python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + # Step 3: Install Dependencies + # Purpose: Installs required Python packages for the pipeline + - name: Install dependencies + run: | + cd scripts/firecloud_api/ + pip install -r requirements.txt + + # Step 4: Set Branch Name + # Purpose: Determines and sets the correct branch name for either PR or direct commits + - name: Set Branch Name + id: set_branch + run: | + if [ -z "${{ github.head_ref }}" ]; then + echo "Branch name is missing, using ${GITHUB_REF##*/}" + echo "BRANCH_NAME=${GITHUB_REF##*/}" >> $GITHUB_ENV + else + echo "Branch name from PR: ${{ github.head_ref }}" + echo "BRANCH_NAME=${{ github.head_ref }}" >> $GITHUB_ENV + fi + + # Step 5: Set Test Type + # Purpose: Determines and sets the correct test type based on the branch name + - name: Set Test Type + id: set_test_type + run: | + if [ "${{ github.event_name }}" == "pull_request" ]; then + # For PRs, set based on target branch + if [ "${{ github.base_ref }}" == "master" ]; then + # If PR is targeting master branch, run Scientific tests + echo "testType=Scientific" >> $GITHUB_ENV + echo "testType=Scientific" + else + # If PR targets any other branch (develop, staging), run Plumbing tests + echo "testType=Plumbing" >> $GITHUB_ENV + echo "testType=Plumbing" + fi + else + # For manual workflow runs (workflow_dispatch) + echo "testType=${{ github.event.inputs.testType }}" >> $GITHUB_ENV + echo "testType=${{ github.event.inputs.testType }}" + fi + + # Step 6: Create Method Configuration + # Purpose: Sets up the testing configuration in Terra workspace + - name: Create new method configuration + run: | + echo "Creating new method configuration for branch: $BRANCH_NAME" + + METHOD_CONFIG_NAME=$(python3 scripts/firecloud_api/firecloud_api.py \ + create_new_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER") + + echo "METHOD_CONFIG_NAME=$METHOD_CONFIG_NAME" >> $GITHUB_ENV + + # Step 7: Cancel Previous Runs + # Purpose: Cancels previous GHA workflows from the same branch (regardless of plumbing or scientific test type) + # to avoid running multiple tests at the same time + - name: Cancel Previous GHA Runs + uses: styfle/cancel-workflow-action@0.11.0 + with: + access_token: ${{ github.token }} + all_but_latest: true + ignore_sha: true + + # Step 8: Cancel Previous Terra Submissions + # Purpose: Abort previous Terra submissions from the same branch to avoid running multiple tests at the same time + # Will not abort a Terra submission if it is a scientific test + - name: Cancel Previous Terra Submissions + if: ${{ !contains(env.METHOD_CONFIG_NAME, '_Scientific_') }} + run: | + python3 scripts/firecloud_api/firecloud_api.py \ + --workspace-namespace "${{ env.WORKSPACE_NAMESPACE }}" \ + --workspace-name "${{ env.TESTING_WORKSPACE }}" \ + --pipeline_name "${{ env.PIPELINE_NAME }}" \ + --branch_name "${{ env.BRANCH_NAME }}" \ + --sa-json-b64 "${{ secrets.PDT_TESTER_SA_B64 }}" \ + --user "${{ env.USER }}" \ + --test_type "$testType" \ + cancel_old_submissions + + # Step 9: Handle Git Commit Hash + # Purpose: Gets the correct Github commit hash for version tracking + - name: Determine Github Commit Hash + id: determine_github_commit_hash + run: | + if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then + echo "Using github.sha for manually triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.sha }}" >> $GITHUB_ENV + elif [ "${{ github.event_name }}" == "pull_request" ]; then + echo "Using github.event.pull_request.head.sha for PR-triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV + else + echo "Unsupported event type: ${{ github.event_name }}" + exit 1 + fi + + # Step 10: Compare Hashes + # Purpose: Compares the Dockstore and Github commit hashes to ensure they match + - name: Compare Dockstore and Github Commit Hashes with Retry + id: compare_hashes + run: | + + # Wait 5.5 minutes for Dockstore to update + sleep 330 + + MAX_WAIT_TIME=$((15 * 60)) # 15 minutes in seconds + WAIT_INTERVAL=60 # 1 minute in seconds + TOTAL_WAITED=0 + + echo "Starting hash comparison with retry mechanism..." + + while [ $TOTAL_WAITED -lt $MAX_WAIT_TIME ]; do + echo "Fetching Dockstore Commit Hash..." + DOCKSTORE_COMMIT_HASH=$(python scripts/dockstore_api/fetch_dockstore_commit.py \ + $DOCKSTORE_TOKEN \ + $DOCKSTORE_PIPELINE_NAME \ + $BRANCH_NAME) + echo "Fetched Dockstore Commit Hash: $DOCKSTORE_COMMIT_HASH" + + echo "GitHub Commit Hash: $GITHUB_COMMIT_HASH" + + if [ "$DOCKSTORE_COMMIT_HASH" == "$GITHUB_COMMIT_HASH" ]; then + echo "Success: The Dockstore Commit Hash matches the GitHub Commit Hash." + exit 0 + else + echo "Mismatch found: $DOCKSTORE_COMMIT_HASH != $GITHUB_COMMIT_HASH" + echo "Retrying in $WAIT_INTERVAL seconds..." + sleep $WAIT_INTERVAL + TOTAL_WAITED=$((TOTAL_WAITED + WAIT_INTERVAL)) + fi + done + + echo "Error: The Dockstore Commit Hash does not match the GitHub Commit Hash after 15 minutes of retries!" + exit 1 + + env: + GITHUB_COMMIT_HASH: ${{ env.GITHUB_COMMIT_HASH }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} + + # Step 11: Run Tests + # Purpose: Main testing step - runs the pipeline and collects results + - name: Update test inputs, Upload to Terra, Submit, Monitor and Retrieve Outputs + run: | + UPDATE_TRUTH="${{ github.event.inputs.updateTruth || 'false' }}" + USE_CALL_CACHE="${{ github.event.inputs.useCallCache || 'true' }}" + TRUTH_BRANCH="${{ github.event.inputs.truthBranch || 'master' }}" + CURRENT_TIME=$(date +"%Y-%m-%d-%H-%M-%S") + MAX_RETRIES=2 + RETRY_DELAY=300 # 300 seconds = 5 minutes + # Initialize variables to aggregate statuses and outputs + ALL_WORKFLOW_STATUSES="Workflow ID | Status"$'\n'"--- | ---" + ALL_OUTPUTS="" + # Initialize arrays to track submission and workflow statuses + declare -a SUBMISSION_IDS + declare -A WORKFLOW_STATUSES + OVERALL_SUCCESS=true + + # Convert UPDATE_TRUTH and USE_CALL_CACHE to a boolean-friendly format ("true" -> true, "false" -> false) + if [ "$UPDATE_TRUTH" = "true" ]; then + UPDATE_TRUTH_BOOL=true + else + UPDATE_TRUTH_BOOL=false + fi + + if [ "$USE_CALL_CACHE" == "true" ]; then + USE_CALL_CACHE_BOOL=true + else + USE_CALL_CACHE_BOOL=false + fi + + TEST_TYPE="${{ env.testType }}" + INPUTS_DIR="$PIPELINE_DIR/test_inputs/$TEST_TYPE" + echo "Running tests with test type: $TEST_TYPE" + + TRUTH_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/truth/$(echo "$TEST_TYPE" | tr '[:upper:]' '[:lower:]')/$TRUTH_BRANCH" + echo "Truth path: $TRUTH_PATH" + RESULTS_PATH="gs://broad-gotc-test-storage/$DOCKSTORE_PIPELINE_NAME/results/$CURRENT_TIME" + + + + # 1. Submit all jobs first and store their submission IDs + for input_file in "$INPUTS_DIR"/*.json; do + test_input_file=$(python3 scripts/firecloud_api/UpdateTestInputs.py --truth_path "$TRUTH_PATH" \ + --results_path "$RESULTS_PATH" \ + --inputs_json "$input_file" \ + --update_truth "$UPDATE_TRUTH_BOOL" \ + --branch_name "$BRANCH_NAME" ) + echo "Uploading the test input file: $test_input_file" + + # Create the submission_data.json file for this input_file + input_file_filename=$(basename $input_file) + SUBMISSION_DATA_FILE="submission_data.json" + printf '{ + "methodConfigurationNamespace": "%s", + "methodConfigurationName": "%s_%s_%s", + "useCallCache": %s, + "deleteIntermediateOutputFiles": false, + "useReferenceDisks": true, + "memoryRetryMultiplier": 1.2, + "workflowFailureMode": "NoNewCalls", + "userComment": "%s", + "ignoreEmptyOutputs": false + }' "$WORKSPACE_NAMESPACE" "$PIPELINE_NAME" "$TEST_TYPE" "$BRANCH_NAME" "$USE_CALL_CACHE_BOOL" "$input_file_filename" > "$SUBMISSION_DATA_FILE" + + echo "Created submission data file: $SUBMISSION_DATA_FILE" + cat "$SUBMISSION_DATA_FILE" + + # Upload test input file + python3 scripts/firecloud_api/firecloud_api.py \ + upload_test_inputs \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --test_input_file "$test_input_file" \ + --branch_name "$BRANCH_NAME" \ + --sa-json-b64 "$SA_JSON_B64" \ + --test_type "$TEST_TYPE" \ + --user "$USER" + + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + SUBMISSION_ID=$(python3 scripts/firecloud_api/firecloud_api.py submit_job \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --submission_data_file "$SUBMISSION_DATA_FILE") + + echo "Submission ID: $SUBMISSION_ID" + + if [[ "$SUBMISSION_ID" == *"404"* || -z "$SUBMISSION_ID" ]]; then + echo "Error in submission, retrying in $RETRY_DELAY seconds..." + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + echo "Submission successful. Submission ID: $SUBMISSION_ID" + SUBMISSION_IDS+=("$SUBMISSION_ID") + break + done + done + + echo "All jobs have been submitted. Starting to poll for statuses..." + + # Continue with polling and output retrieval... + + # 2. After all submissions are done, start polling for statuses of all jobs + for SUBMISSION_ID in "${SUBMISSION_IDS[@]}"; do + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + echo "Polling for Submission ID: $SUBMISSION_ID" + RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py poll_job_status \ + --submission_id "$SUBMISSION_ID" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE") + + if [ -z "$RESPONSE" ]; then + echo "Failed to retrieve Workflow IDs for submission: $SUBMISSION_ID" + OVERALL_SUCCESS=false + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + WORKFLOW_STATUSES_FOR_SUBMISSION=$(echo "$RESPONSE" | jq -r 'to_entries | map(.key + " | " + .value) | .[]') + WORKFLOW_STATUSES["$SUBMISSION_ID"]="$WORKFLOW_STATUSES_FOR_SUBMISSION" + + # Check if any workflow failed or errored + FAILED_WORKFLOWS=$(echo "$RESPONSE" | jq -r 'to_entries | .[] | select(.value == "Failed" or .value == "Aborted" or .value == "Aborting") | .key') + if [ ! -z "$FAILED_WORKFLOWS" ]; then + echo "Failed workflows detected:" + echo "$FAILED_WORKFLOWS" + OVERALL_SUCCESS=false + fi + + # retrieve workflow outputs + echo "Retrieving workflow outputs for Submission ID: $SUBMISSION_ID..." + for WORKFLOW_ID in $(echo "$RESPONSE" | jq -r 'keys[]'); do + WORKFLOW_OUTPUT=$(python3 scripts/firecloud_api/firecloud_api.py get_workflow_outputs \ + --user "$USER" \ + --sa-json-b64 "$SA_JSON_B64" \ + --submission_id "$SUBMISSION_ID" \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --workflow_id "$WORKFLOW_ID" \ + --pipeline_name "$PIPELINE_NAME") + ALL_OUTPUTS+="$WORKFLOW_OUTPUT"$'\n' + done + break + done + done + + # Generate the final summary after all processing is complete + FINAL_SUMMARY="## Combined Workflow Statuses\n\n" + + # Add all workflow statuses to the summary + for SUBMISSION_ID in "${!WORKFLOW_STATUSES[@]}"; do + # Generate the Terra URL for the submission + SUBMISSION_URL="https://app.terra.bio/#workspaces/$WORKSPACE_NAMESPACE/WARP%20Tests/job_history/$SUBMISSION_ID" + + # Add the Submission ID as a hyperlink + FINAL_SUMMARY+="[Submission ID: $SUBMISSION_ID]($SUBMISSION_URL)\n" + + # Add the workflows and statuses for this submission + FINAL_SUMMARY+="${WORKFLOW_STATUSES[$SUBMISSION_ID]}\n\n" + done + + # Write the complete summary once at the end + echo -e "$FINAL_SUMMARY" >> $GITHUB_STEP_SUMMARY + + # Exit with error if any workflows failed + if [ "$OVERALL_SUCCESS" = false ]; then + echo "" + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "One or more workflows failed in Terra. Check the workflow status summary for details." + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "" + exit 1 + fi + + + + # Step 12: Cleanup + # Purpose: Ensures cleanup of Terra method configurations regardless of test outcome + - name: Delete Method Configuration + if: always() # Ensures it runs regardless of success or failure + run: | + echo "Deleting method configuration for branch: $BRANCH_NAME" + DELETE_RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py delete_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "$PIPELINE_NAME" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --method_config_name "${PIPELINE_NAME}_${testType}_${BRANCH_NAME}") + echo "Delete response: $DELETE_RESPONSE" + if [ "$DELETE_RESPONSE" == "True" ]; then + echo "Method configuration deleted successfully." + else + echo "Error: Method configuration deletion failed." + exit 1 + fi + + + # Step 13: Print Summary on Success + # Purpose: Prints the final summary of the pipeline execution in case of success + - name: Print Summary on Success + if: success() + run: | + echo "# :white_check_mark: Pipeline Execution Summary :white_check_mark:" >> $GITHUB_STEP_SUMMARY + + # Step 14: Print Summary on Failure + # Purpose: Prints the final summary of the pipeline execution in case of failure + - name: Print Summary on Failure + if: failure() + run: | + echo "# :x: Pipeline Execution Summary (on Failure) :x: " >> $GITHUB_STEP_SUMMARY \ No newline at end of file diff --git a/pipelines/broad/reprocessing/cram_to_unmapped_bams/test_inputs/Plumbing/G96830.NA12878.WGS.json b/pipelines/broad/reprocessing/cram_to_unmapped_bams/test_inputs/Plumbing/G96830.NA12878.WGS.json index 4a75091350..896592d9b6 100644 --- a/pipelines/broad/reprocessing/cram_to_unmapped_bams/test_inputs/Plumbing/G96830.NA12878.WGS.json +++ b/pipelines/broad/reprocessing/cram_to_unmapped_bams/test_inputs/Plumbing/G96830.NA12878.WGS.json @@ -1,5 +1,5 @@ { - "CramToUnmappedBams.input_cram": "gs://broad-gotc-test-storage/single_sample/plumbing/truth/{TRUTH_BRANCH}/20k/NA12878_PLUMBING.cram", + "CramToUnmappedBams.input_cram": "gs://broad-gotc-test-storage/single_sample/plumbing/truth/master/20k/NA12878_PLUMBING.cram", "CramToUnmappedBams.output_map": "gs://broad-gotc-test-storage/germline_single_sample/wgs/plumbing/bams/G96830.NA12878/readgroupid_to_bamfilename_map.txt", "CramToUnmappedBams.base_file_name": "G96830.NA12878.WGS", "CramToUnmappedBams.unmapped_bam_suffix": ".unmapped.bam", diff --git a/pipelines/broad/reprocessing/wgs/test_inputs/Plumbing/G96830.NA12878.json b/pipelines/broad/reprocessing/wgs/test_inputs/Plumbing/G96830.NA12878.json index 6a0f7293ca..da81415881 100644 --- a/pipelines/broad/reprocessing/wgs/test_inputs/Plumbing/G96830.NA12878.json +++ b/pipelines/broad/reprocessing/wgs/test_inputs/Plumbing/G96830.NA12878.json @@ -1,5 +1,5 @@ { - "WholeGenomeReprocessing.input_cram": "gs://broad-gotc-test-storage/single_sample/plumbing/truth/{TRUTH_BRANCH}/20k/NA12878_PLUMBING.cram", + "WholeGenomeReprocessing.input_cram": "gs://broad-gotc-test-storage/single_sample/plumbing/truth/master/20k/NA12878_PLUMBING.cram", "WholeGenomeReprocessing.output_map": "gs://broad-gotc-test-storage/germline_single_sample/wgs/plumbing/bams/G96830.NA12878/readgroupid_to_bamfilename_map.txt", "WholeGenomeReprocessing.sample_name": "NA12878 PLUMBING", diff --git a/pipelines/skylab/atac/test_inputs/Plumbing/10k_pbmc_downsampled.json b/pipelines/skylab/atac/test_inputs/Plumbing/10k_pbmc_downsampled.json new file mode 100644 index 0000000000..2b0ead12a3 --- /dev/null +++ b/pipelines/skylab/atac/test_inputs/Plumbing/10k_pbmc_downsampled.json @@ -0,0 +1,23 @@ +{ + "ATAC.input_id":"10k_PBMC_downsampled", + "ATAC.cloud_provider":"gcp", + "ATAC.read1_fastq_gzipped":[ + "gs://broad-gotc-test-storage/Multiome/input/plumbing/fastq_R1_atac.fastq.gz" + ], + "ATAC.read2_fastq_gzipped":[ + "gs://broad-gotc-test-storage/Multiome/input/plumbing/fastq_R2_atac.fastq.gz" + ], + "ATAC.read3_fastq_gzipped":[ + "gs://broad-gotc-test-storage/Multiome/input/plumbing/fastq_R3_atac.fastq.gz" + ], + "ATAC.tar_bwa_reference":"gs://gcp-public-data--broad-references/hg38/v0/bwa/v2_2_1/bwa-mem2-2.2.1-Human-GENCODE-build-GRCh38.tar", + "ATAC.chrom_sizes":"gs://broad-gotc-test-storage/Multiome/input/hg38.chrom.sizes", + "ATAC.cpu_platform_bwa":"Intel Cascade Lake", + "ATAC.num_threads_bwa":"16", + "ATAC.mem_size_bwa":"64", + "ATAC.atac_nhash_id":"example_1234", + "ATAC.annotations_gtf":"gs://gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_v43.annotation.gtf", + "ATAC.whitelist":"gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_atac.txt", + "ATAC.vm_size":"Standard_M128s" + } + \ No newline at end of file diff --git a/pipelines/skylab/paired_tag/test_inputs/Plumbing/BC011_BC015_downsampled.json b/pipelines/skylab/paired_tag/test_inputs/Plumbing/BC011_BC015_downsampled.json index 418063d6eb..e1e025c4eb 100644 --- a/pipelines/skylab/paired_tag/test_inputs/Plumbing/BC011_BC015_downsampled.json +++ b/pipelines/skylab/paired_tag/test_inputs/Plumbing/BC011_BC015_downsampled.json @@ -22,7 +22,7 @@ "PairedTag.preindex":"true", "PairedTag.Atac_preindex.cpu_platform_bwa":"Intel Cascade Lake", "PairedTag.Atac_preindex.num_threads_bwa":"16", - "PairedTag.Atac_preindex.mem_size_bwa":"64", + "PairedTag.Atac_preindex.mem_size_bwa":"64", "PairedTag.soloMultiMappers":"Uniform", "PairedTag.cloud_provider": "gcp", "PairedTag.gex_nhash_id":"example_1234", diff --git a/pipelines/skylab/paired_tag/test_inputs/Plumbing/BI015_downsampled.json b/pipelines/skylab/paired_tag/test_inputs/Plumbing/BI015_downsampled.json index f682f59a1c..102acb73ab 100644 --- a/pipelines/skylab/paired_tag/test_inputs/Plumbing/BI015_downsampled.json +++ b/pipelines/skylab/paired_tag/test_inputs/Plumbing/BI015_downsampled.json @@ -22,7 +22,7 @@ "PairedTag.preindex":"false", "PairedTag.Atac_preindex.cpu_platform_bwa":"Intel Cascade Lake", "PairedTag.Atac_preindex.num_threads_bwa":"16", - "PairedTag.Atac_preindex.mem_size_bwa":"64", + "PairedTag.Atac_preindex.mem_size_bwa":"64", "PairedTag.soloMultiMappers":"Uniform", "PairedTag.cloud_provider": "gcp", "PairedTag.gex_nhash_id":"example_1234", diff --git a/scripts/dockstore_api/fetch_dockstore_commit.py b/scripts/dockstore_api/fetch_dockstore_commit.py new file mode 100644 index 0000000000..430cd80a2d --- /dev/null +++ b/scripts/dockstore_api/fetch_dockstore_commit.py @@ -0,0 +1,51 @@ +import requests +import sys + +def fetch_commit_id(token, repository, version_name): + # Fetch the workflow data + url = f"https://dockstore.org/api/workflows/path/workflow/github.com%2Fbroadinstitute%2Fwarp%2F{repository}/published" + headers = { + "Authorization": f"Bearer {token}", + "Accept": "application/json", + } + + response = requests.get(url, headers=headers) + response.raise_for_status() + data = response.json() + + # Extract workflow ID and version ID + workflow_id = data.get("id") + version_id = next( + (version["id"] for version in data.get("workflowVersions", []) + if version["name"] == version_name), + None + ) + + if not workflow_id or not version_id: + raise ValueError("Workflow ID or Version ID could not be found.") + + # Fetch the specific version details to get the commit ID + version_url = f"https://dockstore.org/api/workflows/{workflow_id}/workflowVersions/{version_id}" + version_response = requests.get(version_url, headers=headers) + version_response.raise_for_status() + version_data = version_response.json() + + # Extract commit ID + commit_id = version_data.get("commitID") + if not commit_id: + raise ValueError("Commit ID could not be found.") + + return commit_id + +if __name__ == "__main__": + if len(sys.argv) != 4: + print("Usage: python fetch_dockstore_commit.py ") + sys.exit(1) + + _, token, repository, version_name = sys.argv + + try: + commit_id = fetch_commit_id(token, repository, version_name) + print(commit_id) + except Exception as e: + print(f"Error: {e}") \ No newline at end of file diff --git a/scripts/firecloud_api/UpdateTestInputs.py b/scripts/firecloud_api/UpdateTestInputs.py new file mode 100644 index 0000000000..1eb45d85e9 --- /dev/null +++ b/scripts/firecloud_api/UpdateTestInputs.py @@ -0,0 +1,128 @@ +import argparse +import json +import os +import ast +from decimal import Decimal + +def format_float(value): + """Format float to avoid scientific notation for small numbers.""" + if isinstance(value, (float, int)): + # Convert to Decimal for precise string representation + return str(Decimal(str(value))) + return value + +def update_test_inputs(inputs_json, truth_path, results_path, update_truth, branch_name): + with open(inputs_json, 'r') as file: + test_inputs = json.load(file) + + # Get the sample name from the test inputs JSON + sample_name = os.path.splitext(os.path.basename(inputs_json))[0] + + # Get the pipeline name from the test inputs JSON + pipeline_name = next(iter(test_inputs)).split('.')[0] + + # Append "Test" in front of the pipeline name + test_name = f"Test{pipeline_name}" + + # Update all keys and ensure nested inputs are handled correctly + updated_inputs = {} + for key, value in test_inputs.items(): + # Split the key to analyze its structure + key_parts = key.split('.') + + # Replace the top-level component with the test_name + key_parts[0] = test_name + + # For nested keys (more than two parts), append the original pipeline name with a `.` + if len(key_parts) > 2: + key_parts[1] = f"{pipeline_name}.{key_parts[1]}" + + # Reconstruct the updated key + new_key = '.'.join(key_parts) + + # Handle different value types appropriately + if isinstance(value, list): + processed_value = [] + for item in value: + if isinstance(item, str) and item.startswith('[') and item.endswith(']'): + try: + inner_list = ast.literal_eval(item) + processed_value.extend(inner_list) + except (ValueError, SyntaxError): + processed_value.append(item) + else: + processed_value.append(item) + updated_inputs[new_key] = processed_value + elif isinstance(value, float): + # Format float values to avoid scientific notation + updated_inputs[new_key] = format_float(value) + else: + updated_inputs[new_key] = value + + # Add the truth_path and results_path to the updated inputs + updated_inputs[f"{test_name}.results_path"] = f"{results_path}/{sample_name}/" + updated_inputs[f"{test_name}.truth_path"] = f"{truth_path}/{sample_name}/" + updated_inputs[f"{test_name}.update_truth"] = update_truth + + # Convert the dictionary to JSON string with explicit float formatting + json_str = json.dumps(updated_inputs, indent=4) + + # Save the updated test inputs JSON + output_name = f"updated_{sample_name}_{branch_name}.json" + with open(output_name, 'w') as file: + file.write(json_str) + + print(f"{output_name}") + return output_name + +def main(): + description = """This script updates the test inputs JSON to work with the test wrapper WDL, + which runs the pipeline and verification""" + + parser = argparse.ArgumentParser(description=description) + + parser.add_argument( + "--truth_path", + dest="truth_path", + required=True, + help="The base path where the truth data is stored", + ) + + parser.add_argument( + "--results_path", + dest="results_path", + required=True, + help="The base path where the test data will be stored", + ) + + parser.add_argument( + "--inputs_json", + dest="inputs_json", + required=True, + help="The JSON file containing the test inputs, formatted to run the pipeline WDL. " + "This will be updated to run the wrapper Test WDL", + ) + + parser.add_argument( + "--update_truth", + dest="update_truth", + default="false", + required=False, + choices=["true", "false"], + help="Boolean flag to update the truth data. If true, the truth data will be updated with the test data. ", + ) + + parser.add_argument( + "--branch_name", + required=True, + help="Branch name of the current pipeline run") + + args = parser.parse_args() + # convert the update_truth flag to a boolean + update_truth_bool = args.update_truth.lower() == "true" + + # Update the test inputs to work with the test wrapper WDL + update_test_inputs(args.inputs_json, args.truth_path, args.results_path, update_truth_bool, args.branch_name) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/scripts/firecloud_api/firecloud_api.py b/scripts/firecloud_api/firecloud_api.py index 95d5e42b29..ec00f5e75b 100644 --- a/scripts/firecloud_api/firecloud_api.py +++ b/scripts/firecloud_api/firecloud_api.py @@ -1,88 +1,231 @@ -""" -firecloud_api.py -Author: Kevin Palis - -This module provides an object-oriented interface for interacting with the Firecloud REST API. -It includes functionalities to submit workflows, retrieve workflow outputs, and monitor -workflow statuses. - -Classes: - - FirecloudAPI: A class to handle Firecloud API interactions. - -Usage: - Initialize the FirecloudAPI class with API token, namespace, and workspace details, and - call its methods to interact with the Firecloud service. -""" - +import base64 +import json import requests +from datetime import datetime, timezone +from urllib.parse import quote +from google.auth.transport.requests import Request +from google.oauth2 import service_account +from google.auth import credentials +import argparse +import logging import time -import json -import sys + +# Configure logging to display INFO level and above messages +logging.basicConfig( + level=logging.INFO, # This will show INFO and higher levels (INFO, WARNING, ERROR, CRITICAL) + format='%(asctime)s - %(levelname)s - %(message)s' +) class FirecloudAPI: - def __init__(self, token, namespace, workspace_name): + def __init__(self, workspace_namespace, workspace_name, sa_json_b64, user, action, method_namespace, method_name): + self.sa_json_b64 = sa_json_b64 + self.namespace = workspace_namespace + self.workspace_name = workspace_name + self.user = user # Store the user email + self.base_url = "https://api.firecloud.org/api" + self.action = action + self.method_namespace = method_namespace + self.method_name = method_name + + # Setup credentials once during initialization + scopes = ['profile', 'email', 'openid'] + decoded_sa = base64.b64decode(sa_json_b64).decode('utf-8') + sa_credentials = service_account.Credentials.from_service_account_info( + json.loads(decoded_sa), + scopes=scopes + ) + self.delegated_creds = sa_credentials.with_subject(user) + + def get_method_config_name(self, pipeline_name, branch_name, test_type): """ - Initializes the FirecloudAPI object with authentication and workspace details. + Helper method to consistently generate method configuration names - :param token: API access token - :param namespace: Workspace namespace - :param workspace_name: Workspace name + :param pipeline_name: Name of the pipeline + :param branch_name: Name of the branch + :param test_type: Type of test (Scientific or Plumbing) + :return: Formatted method configuration name """ - self.token = token - self.namespace = namespace - self.workspace_name = workspace_name - self.base_url = "https://api.firecloud.org/api" - self.headers = { - 'accept': '*/*', - 'Authorization': f'Bearer {self.token}', + return f"{pipeline_name}_{test_type}_{branch_name}" + + def build_auth_headers(self, token: str): + if not self.delegated_creds.valid: + logging.info("Refreshing credentials.") + self.delegated_creds.refresh(Request()) + token = self.delegated_creds.token + return { + "content-type": "application/json", + "Authorization": f"Bearer {token}", } - def get_workflow_outputs(self, submission_id, workflow_id, pipeline_name): + def get_user_token(self, credentials: credentials): """ - Fetches workflow outputs from the Firecloud API. + Get test user's access token + """ + # if token is expired or about to expire in 10 seconds, refresh and then use it + if not credentials.valid: + logging.info("Fetching user's new access token") + credentials.refresh(Request()) + logging.info("Token refreshed.") + else: + expiry_timestamp = credentials.expiry.replace(tzinfo=timezone.utc).timestamp() + now_timestamp = datetime.now(timezone.utc).timestamp() + # if token is about to expire in 1 minute, refresh and then use it + if expiry_timestamp - now_timestamp < 60: + logging.info("Fetching user's new access token") + credentials.refresh(Request()) + logging.info("Token refreshed.") - :param submission_id: The ID of the submission - :param workflow_id: The ID of the workflow - :param pipeline_name: The name of the pipeline whose outputs are required - :return: Outputs dictionary and a list of output values + return credentials.token + + def submit_job(self, submission_data_file): + token = self.get_user_token(self.delegated_creds) + headers = self.build_auth_headers(token) + url = f"{self.base_url}/workspaces/{self.namespace}/{quote(self.workspace_name)}/submissions" + response = requests.post(url, json=submission_data_file, headers=headers) + + # Print status code and response body for debugging + logging.info(f"Response status code for submitting job: {response.status_code}") + logging.info(f"Response body: {response.text}") + + if response.status_code == 201: + try: + # Parse the response as JSON + response_json = response.json() + + # Extract the submissionId + submission_id = response_json.get("submissionId", None) + if submission_id: + logging.info(f"Submission ID extracted: {submission_id}") + return submission_id + else: + logging.error("Error: submissionId not found in the response.") + return None + except json.JSONDecodeError: + logging.error("Error: Failed to parse JSON response.") + return None + else: + logging.error(f"Failed to submit job. Status code: {response.status_code}") + logging.error(f"Response body: {response.text}") + return None + + + def create_new_method_config(self, branch_name, pipeline_name): """ - # Construct the API endpoint URL for fetching workflow outputs - url = f"{self.base_url}/workspaces/{self.namespace}/{self.workspace_name}/submissions/{submission_id}/workflows/{workflow_id}/outputs" - response = requests.get(url, headers=self.headers) + Creates a new method configuration in the workspace via Firecloud API. - # Check if the API request was successful + :param branch_name: The branch name + :param pipeline_name: The name of the pipeline + :return: The name of the created method configuration or None if failed + """ + + # Create method config name with test type + method_config_name = self.get_method_config_name(pipeline_name, branch_name, args.test_type) + + payload = { + "deleted": False, + "inputs": {}, + "methodConfigVersion": 0, + "methodRepoMethod": { + "methodUri": f"dockstore://github.com/broadinstitute/warp/{pipeline_name}/{branch_name}", + "sourceRepo": "dockstore", + "methodPath": f"github.com/broadinstitute/warp/{pipeline_name}", + "methodVersion": f"{branch_name}" + }, + "name": method_config_name, + "namespace": "warp-pipelines", + "outputs": {}, + "prerequisites": {} + } + logging.info(f"Creating new method configuration: {json.dumps(payload, indent=2)}") + + # Construct the API endpoint URL for creating a new method configuration + url = f"{self.base_url}/workspaces/{self.namespace}/{quote(self.workspace_name)}/method_configs/{self.namespace}/{method_config_name}" + + token = self.get_user_token(self.delegated_creds) + headers = self.build_auth_headers(token) + + # Create the new method configuration in the workspace + response = requests.put(url, headers=headers, json=payload) + + # Check if the method configuration was created successfully if response.status_code == 200: - json_response = response.json() - # Extract outputs for the specified pipeline name - outputs = json_response.get('tasks', {}).get(pipeline_name, {}).get('outputs', {}) - output_values = list(outputs.values()) - return outputs, output_values + logging.info(f"Method configuration {method_config_name} created successfully.") + return method_config_name else: - print(f"Failed to retrieve workflow outputs. Status code: {response.status_code}") - return None, None + logging.error(f"Failed to create method configuration. Status code: {response.status_code}") + logging.error(f"Response body: {response.text}") + return None - def create_submission(self, submission_data): + + def upload_test_inputs(self, pipeline_name, test_inputs, branch_name, test_type): """ - Submits a workflow to the Firecloud API. + Uploads test inputs to the workspace via Firecloud API. - :param submission_data: JSON data containing submission details - :return: Submission ID if successful, None otherwise + :param test_inputs: JSON data containing test inputs + :param pipeline_name: The name of the pipeline + :param branch_name: The name of the branch + :param test_type: The type of test (Scientific or Plumbing) + :return: True if successful, False otherwise """ - # Construct the API endpoint URL for creating a new submission - url = f"{self.base_url}/workspaces/{self.namespace}/{self.workspace_name}/submissions" - response = requests.post(url, headers=self.headers, json=submission_data) - # Check if the submission was created successfully - if response.status_code == 201: - submission_id = response.json().get('submissionId') - #print(f"Submission created with ID: {submission_id}") - return submission_id + method_config_name = self.get_method_config_name(pipeline_name, branch_name, test_type) + url = f"{self.base_url}/workspaces/{self.namespace}/{quote(self.workspace_name)}/method_configs/{self.namespace}/{method_config_name}" + + token = self.get_user_token(self.delegated_creds) + headers = self.build_auth_headers(token) + + # get the current method configuration + response = requests.get(url, headers=headers) + + if response.status_code == 404: + logging.info(f"Method config {method_config_name} not found. Creating new config...") + if not self.create_new_method_config(branch_name, pipeline_name): + logging.error("Failed to create new method configuration.") + return False + response = requests.get(url, headers=headers) + if response.status_code != 200: + logging.error(f"Failed to get method configuration. Status code: {response.status_code}") + return False + + config = response.json() + print(f"Current method configuration: {json.dumps(config, indent=2)}") + # update the config with the new inputs + print(f"Opening test inputs file: {test_inputs}") + with open(test_inputs, 'r') as file: + inputs_json = json.load(file) + print("Test inputs loaded successfully.") + inputs_json = self.quote_values(inputs_json) + print(f"here is test json after quote_values: {json.dumps(inputs_json, indent=2)}") + config["inputs"] = inputs_json + + # Construct the methodUri with the branch name + base_url = f"github.com/broadinstitute/warp/{pipeline_name}" + method_uri = f"dockstore://{quote(base_url)}/{branch_name}" + print(f"Updating methodUri with branch name: {method_uri}") + config["methodRepoMethod"]["methodUri"] = method_uri + + print(f"Updating methodVersion with branch name: {branch_name}") + config["methodRepoMethod"]["methodVersion"] = branch_name + + # We need to increment the methodConfigVersion by 1 every time we update the method configuration + config["methodConfigVersion"] += 1 # Increment version number by 1 + print(f"Updated method configuration: {json.dumps(config, indent=2)}") + + + # post the updated method config to the workspace + response = requests.post(url, headers=headers, json=config) + print(f"Response status code for uploading inputs: {response.status_code}") + print(f"Response text: {response.text}") + + # Check if the test inputs were uploaded successfully + if response.status_code == 200: + print("Test inputs uploaded successfully.") + return True else: - print(f"Failed to create submission. Status code: {response.status_code}") - return None + print(f"Failed to upload test inputs. Status code: {response.status_code}") + return False - - def poll_submission_status(self, submission_id): + def poll_job_status(self, submission_id): """ Polls the status of a submission until it is complete and returns a dictionary of workflow IDs and their statuses. @@ -95,20 +238,22 @@ def poll_submission_status(self, submission_id): # Continuously poll the status of the submission until completion while True: - status_response = requests.get(status_url, headers=self.headers) + # Get the token and headers + token = self.get_user_token(self.delegated_creds) + headers = self.build_auth_headers(token) + status_response = requests.get(status_url, headers=headers) # Check if the response status code is successful (200) if status_response.status_code != 200: - print(f"Error: Received status code {status_response.status_code}", file=sys.stderr) - print(f"Response content: {status_response.text}", file=sys.stderr) + logging.error(f"Error: Received status code {status_response.status_code}") + logging.info(f"Response content: {status_response.text}") return {} - try: # Parse the response as JSON status_data = status_response.json() except json.JSONDecodeError: - print("Error decoding JSON response.", file=sys.stderr) - print(f"Response content: {status_response.text}", file=sys.stderr) + logging.error("Error decoding JSON response.") + logging.info(f"Response content: {status_response.text}") return {} # Retrieve workflows and their statuses @@ -124,63 +269,324 @@ def poll_submission_status(self, submission_id): if submission_status == "Done": break - # Wait for 60 seconds before polling again - time.sleep(60) + # Wait for 20 seconds before polling again + time.sleep(20) return workflow_status_map + def quote_values(self, inputs_json): + """ + Format JSON values with proper handling of nested structures + """ + def format_value(val): + if isinstance(val, bool): + return str(val).lower() + elif isinstance(val, dict): + return json.dumps(val, indent=2) + elif isinstance(val, list): + if all(isinstance(x, str) for x in val): + return json.dumps(val) + return json.dumps([format_value(x) for x in val]) + elif isinstance(val, (int, float)): + return str(val) + elif val is None: + return "" + elif isinstance(val, str): + if val.startswith("{") and val.endswith("}"): + try: + parsed = json.loads(val) + return json.dumps(parsed, indent=2) + except json.JSONDecodeError: + return f'"{val}"' + return f'"{val}"' + return f'"{str(val)}"' -# Bash Script Interaction -if __name__ == "__main__": - import argparse - - # Set up command-line argument parsing - parser = argparse.ArgumentParser(description='Interact with Firecloud API.') - parser.add_argument('--token', required=True, help='API access token') - parser.add_argument('--namespace', required=True, help='Workspace namespace') - parser.add_argument('--workspace', required=True, help='Workspace name') - parser.add_argument('--action', required=True, choices=['get_outputs', 'submit', 'poll_status'], help='Action to perform') - parser.add_argument('--submission_id', help='Submission ID (required for get_outputs and poll_status)') - parser.add_argument('--workflow_id', help='Workflow ID (required for get_outputs)') - parser.add_argument('--pipeline_name', help='Pipeline name (required for get_outputs)') - parser.add_argument('--submission_data_file', help='Path to submission data JSON file (required for submit)') + return {key: format_value(value) for key, value in inputs_json.items()} - args = parser.parse_args() + def get_workflow_outputs(self, submission_id, workflow_id, pipeline_name): + """ + Fetches workflow outputs from the Firecloud API. - # Initialize the FirecloudAPI instance with provided arguments - firecloud_api = FirecloudAPI(args.token, args.namespace, args.workspace) + :param submission_id: The ID of the submission + :param workflow_id: The ID of the workflow + :param pipeline_name: The name of the pipeline whose outputs are required + :return: Outputs dictionary and a list of output values + """ + # Construct the API endpoint URL for fetching workflow outputs + url = f"{self.base_url}/workspaces/{self.namespace}/{self.workspace_name}/submissions/{submission_id}/workflows/{workflow_id}/outputs" + response = requests.get(url, headers=self.headers) + + # Check if the API request was successful + if response.status_code == 200: + json_response = response.json() + # Extract outputs for the specified pipeline name + outputs = json_response.get('tasks', {}).get(pipeline_name, {}).get('outputs', {}) + output_values = list(outputs.values()) + return outputs, output_values + else: + logging.error(f"Failed to retrieve workflow outputs. Status code: {response.status_code}") + return None, None + + #def gsutil_copy(self, source, destination): + # #client = storage.Client() # Uses GOOGLE_APPLICATION_CREDENTIALS implicitly + # source_bucket_name, source_blob_name = source.replace("gs://", "").split("/", 1) + # destination_bucket_name, destination_blob_name = destination.replace("gs://", "").split("/", 1) - # Perform actions based on the specified action argument - if args.action == 'get_outputs': - if not all([args.submission_id, args.workflow_id, args.pipeline_name]): - print("For 'get_outputs', --submission_id, --workflow_id, and --pipeline_name are required.") + # source_bucket = self.storage_client.bucket(source_bucket_name) + # source_blob = source_bucket.blob(source_blob_name) + # destination_bucket = self.storage_client.bucket(destination_bucket_name) + + # source_bucket.copy_blob(source_blob, destination_bucket, destination_blob_name) + + def delete_method_config(self, method_config_name): + """ + Deletes a method configuration from the workspace. + + :param method_config_name: The name of the method configuration to delete + :return: True if deletion is successful, False otherwise + """ + url = f"{self.base_url}/workspaces/{self.namespace}/{quote(self.workspace_name)}/method_configs/{self.namespace}/{method_config_name}" + + token = self.get_user_token(self.delegated_creds) + headers = self.build_auth_headers(token) + + # Send a DELETE request to delete the method configuration + response = requests.delete(url, headers=headers) + + if response.status_code == 204: + logging.info(f"Method configuration {method_config_name} deleted successfully.") + print("True") + return True else: - outputs, output_values = firecloud_api.get_workflow_outputs(args.submission_id, args.workflow_id, args.pipeline_name) - #print(outputs) - # Convert the dictionary, outputs, to a JSON string and print it + logging.error(f"Failed to delete method configuration {method_config_name}. Status code: {response.status_code}") + logging.error(f"Response body: {response.text}") + return False + + def get_active_submissions(self, method_config_name=None): + """ + Get all active workflow submissions for the workspace. + Optionally filter by method configuration name. + """ + url = f"{self.base_url}/workspaces/{self.namespace}/{quote(self.workspace_name)}/submissions" + token = self.get_user_token(self.delegated_creds) + headers = self.build_auth_headers(token) + + response = requests.get(url, headers=headers) + + if response.status_code != 200: + logging.error(f"Failed to get submissions. Status code: {response.status_code}") + logging.error(f"Response body: {response.text}") + return [] + + submissions = response.json() + active_submissions = [] + + for submission in submissions: + # Check if submission is active (not Done, Aborted, or Failed) + if submission['status'] in ['Submitted', 'Running', 'Queued']: + config_name = submission.get('methodConfigurationName', '') + if config_name.startswith(method_config_name): + active_submissions.append(submission) + + return active_submissions + + def cancel_submission(self, submission_id): + """ + Cancel a specific workflow submission. + """ + url = f"{self.base_url}/workspaces/{self.namespace}/{quote(self.workspace_name)}/submissions/{submission_id}" + token = self.get_user_token(self.delegated_creds) + headers = self.build_auth_headers(token) + + response = requests.delete(url, headers=headers) + + if response.status_code not in [204]: + logging.error(f"Failed to cancel submission {submission_id}. Status code: {response.status_code}") + logging.error(f"Response body: {response.text}") + return False + + logging.info(f"Successfully cancelled submission {submission_id}") + return True + + def cancel_old_submissions(self, pipeline_name, branch_name): + """ + Cancel all active submissions for a pipeline's method configuration. + Returns the number of cancelled submissions. + """ + method_config_name = self.get_method_config_name(pipeline_name, branch_name, args.test_type) + active_submissions = self.get_active_submissions(method_config_name) + cancelled_count = 0 + + for submission in active_submissions: + if self.cancel_submission(submission['submissionId']): + cancelled_count += 1 + logging.info(f"Cancelled submission {submission['submissionId']}") + + return cancelled_count + + + def main(self): + logging.info("Starting process based on action.") + + if self.action == "submit_job": + submission_id = self.submit_job() + logging.info(f"Job submission complete with ID: {submission_id}") + elif self.action == "create_new_method_config": + if not args.pipeline_name or not args.branch_name: + parser.error("Arguments --pipeline_name and --branch_name are required for 'create_new_method_config'") + method_config_name = self.create_new_method_config(args.branch_name, args.pipeline_name) + print(method_config_name) + if method_config_name: + logging.info(f"Method configuration created with name: {method_config_name}") + else: + logging.error("Failed to create method configuration.") + elif self.action == "delete_method_config": + if not args.method_config_name: + if not all([args.pipeline_name, args.branch_name]): + parser.error("Either --method_config_name or both --pipeline_name and --branch_name are required") + method_config_name = self.get_method_config_name(args.pipeline_name, args.branch_name, args.test_type) + else: + method_config_name = args.method_config_name + result = self.delete_method_config(method_config_name) + print(str(result).lower()) + elif self.action == "upload_test_inputs": + success = self.upload_test_inputs(self.pipeline_name, self.test_input_file, self.branch_name, self.test_type) + if success: + logging.info("Test inputs uploaded successfully.") + else: + logging.error("Failed to upload test inputs.") + elif self.action == "poll_job_status": + status = self.poll_job_status() + logging.info(f"Final job status: {status}") + elif self.action == "create_new_method_config": + method_config_name = self.create_new_method_config(self.branch_name, self.pipeline_name) + if method_config_name: + logging.info("Method configuration created successfully.") + else: + logging.error("Failed to create method configuration.") + elif self.action == "delete_method_config": + if not args.method_config_name: + parser.error("Argument --method_config_name is required for 'delete_method_config'") + else: + # Delete the method configuration + result = self.delete_method_config(args.method_config_name) + if result: + logging.info("Method configuration deleted successfully.") + else: + logging.error("Failed to delete method configuration.") + elif self.action == "get_workflow_outputs": + if not args.submission_id or not args.workflow_id or not args.pipeline_name: + parser.error("Arguments --submission_id, --workflow_id, and --pipeline_name are required for 'get_workflow_outputs'") + # Fetch workflow outputs + outputs, output_values = self.get_workflow_outputs(args.submission_id, args.workflow_id, args.pipeline_name) if outputs: - print(json.dumps(outputs)) # Output the dictionary as a JSON string for bash parsing + logging.info(f"Workflow outputs: {json.dumps(outputs, indent=2)}") + logging.info(f"Output values: {output_values}") else: - print("No outputs found or an error occurred.", file=sys.stderr) + logging.error("Failed to retrieve workflow outputs.") + else: + logging.error(f"Unknown action: {self.action}") + + - elif args.action == 'submit': +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--sa-json-b64", required=True, help="Base64 encoded service account JSON") + parser.add_argument("--user", required=True, help="User email for impersonation") + parser.add_argument("--workspace-namespace", required=True, help="Namespace of the workspace.") + parser.add_argument("--workspace-name", required=True, help="Name of the workspace.") + parser.add_argument("--pipeline_name", help="Pipeline name (required for 'upload_test_inputs')") + parser.add_argument("--test_input_file", help="Path to test input file (required for 'upload_test_inputs')") + parser.add_argument("--branch_name", help="Branch name for the method repository (required for 'upload_test_inputs')") + parser.add_argument("--method_namespace", help="Method namespace") + parser.add_argument("--method_name", help="Method name") + parser.add_argument('--submission_data_file', help='Path to submission data JSON file (required for submit)') + parser.add_argument('--submission_id', help='Submission ID (required for poll_job_status)') + parser.add_argument('--workflow_id', help='Workflow ID (required for get_workflow_outputs)') + parser.add_argument("--source", help="Source GCS path for gsutil copy") + parser.add_argument("--destination", help="Destination GCS path for gsutil copy") + parser.add_argument("--method_config_name", help="Name of the method configuration to delete") + parser.add_argument("--test_type", help="Test type (Scientific or Plumbing)") + parser.add_argument("action", choices=["submit_job", "upload_test_inputs", "poll_job_status", "get_workflow_outputs", "create_new_method_config", "delete_method_config", "cancel_old_submissions"], + help="Action to perform: 'submit_job', 'upload_test_inputs', 'poll_job_status', 'get_workflow_outputs', 'create_new_method_config', or 'delete_method_config'") + + args = parser.parse_args() + + # Pass action to the FirecloudAPI constructor + api = FirecloudAPI( + sa_json_b64=args.sa_json_b64, + user=args.user, + workspace_namespace=args.workspace_namespace, + workspace_name=args.workspace_name, + action=args.action, + method_namespace=args.method_namespace, + method_name=args.method_name + ) + + + if args.action == "upload_test_inputs": + # Check for required arguments for upload_test_inputs action + if not args.pipeline_name or not args.test_input_file or not args.branch_name: + parser.error("Arguments --pipeline_name, --test_input_file, and --branch_name are required for 'upload_test_inputs'") + # Call the function to upload test inputs + api.upload_test_inputs(args.pipeline_name, args.test_input_file, args.branch_name, args.test_type) + + elif args.action == "submit_job": + # Check for required argument for submit_job action if not args.submission_data_file: - print("For 'submit', --submission_data_file is required.") + parser.error("Argument --submission_data_file is required for 'submit_job'") + # Load the submission data from the provided file else: - # Load submission data from the specified JSON file with open(args.submission_data_file, 'r') as file: submission_data = json.load(file) - submission_id = firecloud_api.create_submission(submission_data) + # Submit the job with the loaded submission data + submission_id = api.submit_job(submission_data) print(submission_id) - elif args.action == 'poll_status': - if not args.submission_id: - print("For 'poll_status', --submission_id is required.", file=sys.stderr) - else: - workflow_status_map = firecloud_api.poll_submission_status(args.submission_id) - - # Convert the dictionary to a JSON string and print it - if workflow_status_map: - print(json.dumps(workflow_status_map)) # Output the dictionary as a JSON string for bash parsing - else: - print("No workflows found or an error occurred.", file=sys.stderr) \ No newline at end of file + elif args.action == "poll_job_status": + if not args.submission_id: + parser.error("Argument --submission_id is required for 'poll_job_status'") + else: + # Poll the job status with the provided submission ID + workflow_status_map = api.poll_job_status(args.submission_id) + + # Convert the dictionary to a JSON string and print it + if workflow_status_map: + print(json.dumps(workflow_status_map)) # Output the dictionary as a JSON string for bash parsing + else: + print("No workflows found or an error occurred.") + elif args.action == "create_new_method_config": + # Check for required arguments for create_new_method_config action + if not args.pipeline_name or not args.branch_name: + parser.error("Arguments --pipeline_name and --branch_name are required for 'create_new_method_config'") + # Call the function to create a new method configuration + method_config_name = api.create_new_method_config(args.branch_name, args.pipeline_name) + print(method_config_name) + if method_config_name: + logging.info(f"Method configuration created with name: {method_config_name}") + else: + logging.error("Failed to create method configuration.") + elif args.action == "delete_method_config": + if not args.method_config_name: + parser.error("Argument --method_config_name is required for 'delete_method_config'") + else: + # Delete the method configuration + result = api.delete_method_config(args.method_config_name) + if result: + logging.info("Method configuration deleted successfully.") + else: + logging.error("Failed to delete method configuration.") + elif args.action == "cancel_old_submissions": + if not all([args.pipeline_name, args.branch_name]): + parser.error("Arguments --pipeline_name and --branch_name are required for 'cancel_old_submissions'") + + # Cancel old submissions + cancelled_count = api.cancel_old_submissions( + args.pipeline_name, + args.branch_name + ) + print(f"Cancelled {cancelled_count} old submissions") + + + + diff --git a/scripts/firecloud_api/requirements.txt b/scripts/firecloud_api/requirements.txt new file mode 100644 index 0000000000..16846b02d3 --- /dev/null +++ b/scripts/firecloud_api/requirements.txt @@ -0,0 +1,2 @@ +requests==2.31.0 +google-auth==2.23.3 \ No newline at end of file diff --git a/tasks/broad/TerraCopyFilesFromCloudToCloud.wdl b/tasks/broad/TerraCopyFilesFromCloudToCloud.wdl new file mode 100644 index 0000000000..66b6eb69a4 --- /dev/null +++ b/tasks/broad/TerraCopyFilesFromCloudToCloud.wdl @@ -0,0 +1,49 @@ +version 1.0 + +## Copyright Broad Institute, 2024 +## +## This WDL defines tasks used for moving files from place to place on Terra Platform. +## +## Runtime parameters are often optimized for Broad's Google Cloud Platform implementation. +## For program versions, see docker containers. +## +## LICENSING : +## This script is released under the WDL source code license (BSD-3) (see LICENSE in +## https://github.com/broadinstitute/wdl). Note however that the programs it calls may +## be subject to different licenses. Users are responsible for checking that they are +## authorized to run all programs before running this script. Please see the docker +## page at https://hub.docker.com/r/broadinstitute/genomes-in-the-cloud/ for detailed +## licensing information pertaining to the included programs. + +task TerraCopyFilesFromCloudToCloud { + input { + Array[String] files_to_copy + String destination_cloud_path + Float? contamination + } + + command { + set -euo pipefail + + gcloud config set storage/process_count 16 + gcloud config set storage/thread_count 2 + echo ~{default='no_contamination' contamination} > contamination + + if ! grep -q no_contamination contamination; then + gcloud storage cp -L cp.log contamination ~{destination_cloud_path}.contamination + fi + gcloud storage cp ~{sep=' ' files_to_copy} ~{destination_cloud_path} + } + + output { + Boolean done = true + } + + runtime { + memory: "16 GiB" + cpu: "1" + disks: "local-disk 32 HDD" + docker: "gcr.io/google.com/cloudsdktool/google-cloud-cli:499.0.0-slim" + preemptible: 3 + } +} diff --git a/verification/VerifyATAC.wdl b/verification/VerifyATAC.wdl new file mode 100644 index 0000000000..daf84254be --- /dev/null +++ b/verification/VerifyATAC.wdl @@ -0,0 +1,44 @@ +version 1.0 + +import "../verification/VerifyTasks.wdl" as VerifyTasks + +workflow VerifyATAC { + + input { + File test_atac_bam + File truth_atac_bam + + File test_fragment_file + File truth_fragment_file + + File test_atac_h5ad + File truth_atac_h5ad + + File test_atac_library_metrics + File truth_atac_library_metrics + + Boolean? done + } + + call VerifyTasks.CompareBams as CompareAtacBams { + input: + test_bam = test_atac_bam, + truth_bam = truth_atac_bam, + lenient_header = true + } + call VerifyTasks.CompareTabix as CompareFragment { + input: + test_fragment_file = test_fragment_file, + truth_fragment_file = truth_fragment_file + } + call VerifyTasks.CompareH5adFilesATAC as CompareH5adFilesATAC { + input: + test_h5ad = test_atac_h5ad, + truth_h5ad = truth_atac_h5ad + } + call VerifyTasks.CompareLibraryFiles as CompareLibraryMetrics { + input: + test_text_file = test_atac_library_metrics, + truth_text_file = truth_atac_library_metrics + } +} \ No newline at end of file diff --git a/verification/test-wdls/TestATAC.wdl b/verification/test-wdls/TestATAC.wdl new file mode 100644 index 0000000000..3ad0a1322e --- /dev/null +++ b/verification/test-wdls/TestATAC.wdl @@ -0,0 +1,157 @@ +version 1.0 + +import "../../pipelines/skylab/atac/atac.wdl" as ATAC +import "../../verification/VerifyATAC.wdl" as VerifyATAC +import "../../tasks/broad/Utilities.wdl" as Utilities +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy + +workflow TestATAC { + + input { + # Fastq inputs + Array[String] read1_fastq_gzipped + Array[String] read2_fastq_gzipped + Array[String] read3_fastq_gzipped + + # Output prefix/base name for all intermediate files and pipeline outputs + String input_id + String cloud_provider + # Additional library aliquot ID + String? atac_nhash_id + + #Expected cells from library preparation + Int atac_expected_cells = 3000 + + # Option for running files with preindex + Boolean preindex = false + + # BWA ref + File tar_bwa_reference + # BWA machine type -- to select number of splits + Int num_threads_bwa = 128 + Int mem_size_bwa = 512 + String cpu_platform_bwa = "Intel Ice Lake" + String vm_size + + # Text file containing chrom_sizes for genome build (i.e. hg38) + File chrom_sizes + #File for annotations for calculating ATAC TSSE + File annotations_gtf + # Whitelist + File whitelist + + # TrimAdapters input + String adapter_seq_read1 = "GTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG" + String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG" + + # These values will be determined and injected into the inputs by the scala test framework + String truth_path + String results_path + Boolean update_truth + Boolean run_cellbender = false + } + + meta { + allowNestedInputs: true + } + + call ATAC.ATAC { + input: + read1_fastq_gzipped = read1_fastq_gzipped, + read2_fastq_gzipped = read2_fastq_gzipped, + read3_fastq_gzipped = read3_fastq_gzipped, + input_id = input_id, + cloud_provider = cloud_provider, + atac_nhash_id = atac_nhash_id, + atac_expected_cells = atac_expected_cells, + preindex = preindex, + tar_bwa_reference = tar_bwa_reference, + num_threads_bwa = num_threads_bwa, + mem_size_bwa = mem_size_bwa, + cpu_platform_bwa = cpu_platform_bwa, + vm_size = vm_size, + chrom_sizes = chrom_sizes, + annotations_gtf = annotations_gtf, + whitelist = whitelist, + adapter_seq_read1 = adapter_seq_read1, + adapter_seq_read3 = adapter_seq_read3 + } + + + # Collect all of the pipeline outputs into single Array[String] + Array[String] pipeline_outputs = flatten([ + [ # atac file outputs + ATAC.fragment_file, + ATAC.bam_aligned_output, + ATAC.snap_metrics, + ATAC.library_metrics_file + ], + ]) + + + # Collect all of the pipeline metrics into single Array[String] + Array[String] pipeline_metrics = flatten([ + [ # File outputs + ATAC.fragment_file, + ATAC.bam_aligned_output, + ATAC.snap_metrics, + ATAC.library_metrics_file + ], + ]) + + # Copy results of pipeline to test results bucket + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { + input: + files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), + destination_cloud_path = results_path + } + + # If updating truth then copy output to truth bucket + if (update_truth){ + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { + input: + files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), + destination_cloud_path = truth_path + } + } + + # This is achieved by passing each desired file/array[files] to GetValidationInputs + if (!update_truth){ + call Utilities.GetValidationInputs as GetAtacBam { + input: + input_file = ATAC.bam_aligned_output, + results_path = results_path, + truth_path = truth_path + } + call Utilities.GetValidationInputs as GetFragmentFile { + input: + input_file = ATAC.fragment_file, + results_path = results_path, + truth_path = truth_path + } + call Utilities.GetValidationInputs as GetSnapMetrics { + input: + input_file = ATAC.snap_metrics, + results_path = results_path, + truth_path = truth_path + } + call Utilities.GetValidationInputs as GetAtacLibraryMetrics { + input: + input_file = ATAC.library_metrics_file, + results_path = results_path, + truth_path = truth_path + } + call VerifyATAC.VerifyATAC as Verify { + input: + truth_atac_bam = GetAtacBam.truth_file, + test_atac_bam = GetAtacBam.results_file, + truth_fragment_file = GetFragmentFile.truth_file, + test_fragment_file = GetFragmentFile.results_file, + truth_atac_h5ad = GetSnapMetrics.truth_file, + test_atac_h5ad = GetSnapMetrics.results_file, + truth_atac_library_metrics = GetAtacLibraryMetrics.truth_file, + test_atac_library_metrics = GetAtacLibraryMetrics.results_file, + done = CopyToTestResults.done + } + } +} diff --git a/verification/test-wdls/TestCramToUnmappedBams.wdl b/verification/test-wdls/TestCramToUnmappedBams.wdl index 4a9927642e..ea80f74bda 100644 --- a/verification/test-wdls/TestCramToUnmappedBams.wdl +++ b/verification/test-wdls/TestCramToUnmappedBams.wdl @@ -4,7 +4,7 @@ version 1.0 import "../../pipelines/broad/reprocessing/cram_to_unmapped_bams/CramToUnmappedBams.wdl" as CramToUnmappedBams import "../../verification/VerifyCramToUnmappedBamsUpdated.wdl" as VerifyCramToUnmappedBamsUpdated import "../../tasks/broad/Utilities.wdl" as Utilities -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy workflow TestCramToUnmappedBams { @@ -22,8 +22,6 @@ workflow TestCramToUnmappedBams { String truth_path String results_path Boolean update_truth - String vault_token_path - String google_account_vault_path } meta { @@ -55,21 +53,17 @@ workflow TestCramToUnmappedBams { # Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { input: files_to_copy = flatten([pipeline_outputs]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = results_path } # If updating truth then copy output to truth bucket if (update_truth){ - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { input: files_to_copy = flatten([pipeline_outputs]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = truth_path } } diff --git a/verification/test-wdls/TestExomeGermlineSingleSample.wdl b/verification/test-wdls/TestExomeGermlineSingleSample.wdl index 59110d09be..bb6424100b 100644 --- a/verification/test-wdls/TestExomeGermlineSingleSample.wdl +++ b/verification/test-wdls/TestExomeGermlineSingleSample.wdl @@ -3,187 +3,181 @@ version 1.0 import "../../pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.wdl" as ExomeGermlineSingleSample import "../../verification/VerifyGermlineSingleSample.wdl" as VerifyGermlineSingleSample import "../../tasks/broad/Utilities.wdl" as Utilities -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy workflow TestExomeGermlineSingleSample { - input { - PapiSettings papi_settings - SampleAndUnmappedBams sample_and_unmapped_bams - DNASeqSingleSampleReferences references - VariantCallingScatterSettings scatter_settings - - File? fingerprint_genotypes_file - File? fingerprint_genotypes_index - - File target_interval_list - File bait_interval_list - String bait_set_name - - Boolean provide_bam_output = false - - # These values will be determined and injected into the inputs by the scala test framework - String truth_path - String results_path - Boolean update_truth - String vault_token_path - String google_account_vault_path - String cloud_provider - } - - meta { - allowNestedInputs: true - } - - # Run the pipeline - call ExomeGermlineSingleSample.ExomeGermlineSingleSample { - input: - sample_and_unmapped_bams = sample_and_unmapped_bams, - references = references, - scatter_settings = scatter_settings, - fingerprint_genotypes_file = fingerprint_genotypes_file, - fingerprint_genotypes_index = fingerprint_genotypes_index, - papi_settings = papi_settings, - target_interval_list = target_interval_list, - bait_interval_list = bait_interval_list, - bait_set_name = bait_set_name, - provide_bam_output = provide_bam_output, - cloud_provider = cloud_provider - } - - # Collect all of the pipeline outputs into a single Array[String]] - Array[String] pipeline_outputs = flatten([ - [ # File outputs - ExomeGermlineSingleSample.selfSM, - ExomeGermlineSingleSample.agg_insert_size_histogram_pdf, - ExomeGermlineSingleSample.agg_quality_distribution_pdf, - ExomeGermlineSingleSample.calculate_read_group_checksum_md5, - ExomeGermlineSingleSample.agg_insert_size_histogram_pdf, - ExomeGermlineSingleSample.agg_quality_distribution_pdf, - ExomeGermlineSingleSample.output_cram, - ExomeGermlineSingleSample.output_cram_index, - ExomeGermlineSingleSample.output_cram_md5, - ExomeGermlineSingleSample.validate_cram_file_report, - ExomeGermlineSingleSample.output_vcf, - ExomeGermlineSingleSample.output_vcf_index - ], # Array[File] outputs - ExomeGermlineSingleSample.unsorted_read_group_base_distribution_by_cycle_pdf, - ExomeGermlineSingleSample.unsorted_read_group_insert_size_histogram_pdf, - ExomeGermlineSingleSample.unsorted_read_group_quality_by_cycle_pdf, - ExomeGermlineSingleSample.unsorted_read_group_quality_distribution_pdf, - # File? outputs - select_all([ExomeGermlineSingleSample.output_bqsr_reports]), - select_all([ExomeGermlineSingleSample.output_bam]), - select_all([ExomeGermlineSingleSample.output_bam_index]), - ]) - - # Collect all of the pipeline metrics into a single Array[String] - Array[String] pipeline_metrics = flatten([ - [ # File outputs - ExomeGermlineSingleSample.read_group_alignment_summary_metrics, - ExomeGermlineSingleSample.agg_alignment_summary_metrics, - ExomeGermlineSingleSample.agg_bait_bias_detail_metrics, - ExomeGermlineSingleSample.agg_bait_bias_summary_metrics, - ExomeGermlineSingleSample.agg_insert_size_metrics, - ExomeGermlineSingleSample.agg_pre_adapter_detail_metrics, - ExomeGermlineSingleSample.agg_pre_adapter_summary_metrics, - ExomeGermlineSingleSample.agg_quality_distribution_metrics, - ExomeGermlineSingleSample.agg_error_summary_metrics, - ExomeGermlineSingleSample.duplicate_metrics, - ExomeGermlineSingleSample.gvcf_summary_metrics, - ExomeGermlineSingleSample.gvcf_detail_metrics, - ExomeGermlineSingleSample.hybrid_selection_metrics, - ], # Array[File] outputs - ExomeGermlineSingleSample.quality_yield_metrics, - ExomeGermlineSingleSample.unsorted_read_group_base_distribution_by_cycle_metrics, - ExomeGermlineSingleSample.unsorted_read_group_insert_size_metrics, - ExomeGermlineSingleSample.unsorted_read_group_quality_by_cycle_metrics, - ExomeGermlineSingleSample.unsorted_read_group_quality_distribution_metrics, - # File? outputs - select_all([ExomeGermlineSingleSample.cross_check_fingerprints_metrics]), - select_all([ExomeGermlineSingleSample.fingerprint_summary_metrics]), - select_all([ExomeGermlineSingleSample.fingerprint_detail_metrics]), - ]) - - # Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { - input: - files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, - contamination = ExomeGermlineSingleSample.contamination, - destination_cloud_path = results_path - } - - # If updating truth then copy pipeline results to truth bucket - if (update_truth){ - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { - input: - files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, - contamination = ExomeGermlineSingleSample.contamination, - destination_cloud_path = truth_path - } - } - - # If not updating truth then we need to collect all input for the validation WDL - # This is achieved by passing each desired file/array[files] to GetValidationInputs - if (!update_truth){ - call Utilities.GetValidationInputs as GetMetricsInputs { - input: - input_files = pipeline_metrics, - results_path = results_path, - truth_path = truth_path + input { + PapiSettings papi_settings + SampleAndUnmappedBams sample_and_unmapped_bams + DNASeqSingleSampleReferences references + VariantCallingScatterSettings scatter_settings + + File? fingerprint_genotypes_file + File? fingerprint_genotypes_index + + File target_interval_list + File bait_interval_list + String bait_set_name + + Boolean provide_bam_output = false + + # These values will be determined and injected into the inputs by the scala test framework + String truth_path + String results_path + Boolean update_truth + String cloud_provider } - call Utilities.GetValidationInputs as GetCrams { - input: - input_file = ExomeGermlineSingleSample.output_cram, - results_path = results_path, - truth_path = truth_path + meta { + allowNestedInputs: true } - call Utilities.GetValidationInputs as GetCrais { - input: - input_file = ExomeGermlineSingleSample.output_cram_index, - results_path = results_path, - truth_path = truth_path + # Run the pipeline + call ExomeGermlineSingleSample.ExomeGermlineSingleSample { + input: + sample_and_unmapped_bams = sample_and_unmapped_bams, + references = references, + scatter_settings = scatter_settings, + fingerprint_genotypes_file = fingerprint_genotypes_file, + fingerprint_genotypes_index = fingerprint_genotypes_index, + papi_settings = papi_settings, + target_interval_list = target_interval_list, + bait_interval_list = bait_interval_list, + bait_set_name = bait_set_name, + provide_bam_output = provide_bam_output, + cloud_provider = cloud_provider } - call Utilities.GetValidationInputs as GetGVCFs { - input: - input_file = ExomeGermlineSingleSample.output_vcf, - results_path = results_path, - truth_path = truth_path + # Collect all of the pipeline outputs into a single Array[String]] + Array[String] pipeline_outputs = flatten([ + [ # File outputs + ExomeGermlineSingleSample.selfSM, + ExomeGermlineSingleSample.agg_insert_size_histogram_pdf, + ExomeGermlineSingleSample.agg_quality_distribution_pdf, + ExomeGermlineSingleSample.calculate_read_group_checksum_md5, + ExomeGermlineSingleSample.agg_insert_size_histogram_pdf, + ExomeGermlineSingleSample.agg_quality_distribution_pdf, + ExomeGermlineSingleSample.output_cram, + ExomeGermlineSingleSample.output_cram_index, + ExomeGermlineSingleSample.output_cram_md5, + ExomeGermlineSingleSample.validate_cram_file_report, + ExomeGermlineSingleSample.output_vcf, + ExomeGermlineSingleSample.output_vcf_index + ], # Array[File] outputs + ExomeGermlineSingleSample.unsorted_read_group_base_distribution_by_cycle_pdf, + ExomeGermlineSingleSample.unsorted_read_group_insert_size_histogram_pdf, + ExomeGermlineSingleSample.unsorted_read_group_quality_by_cycle_pdf, + ExomeGermlineSingleSample.unsorted_read_group_quality_distribution_pdf, + # File? outputs + select_all([ExomeGermlineSingleSample.output_bqsr_reports]), + select_all([ExomeGermlineSingleSample.output_bam]), + select_all([ExomeGermlineSingleSample.output_bam_index]), + ]) + + # Collect all of the pipeline metrics into a single Array[String] + Array[String] pipeline_metrics = flatten([ + [ # File outputs + ExomeGermlineSingleSample.read_group_alignment_summary_metrics, + ExomeGermlineSingleSample.agg_alignment_summary_metrics, + ExomeGermlineSingleSample.agg_bait_bias_detail_metrics, + ExomeGermlineSingleSample.agg_bait_bias_summary_metrics, + ExomeGermlineSingleSample.agg_insert_size_metrics, + ExomeGermlineSingleSample.agg_pre_adapter_detail_metrics, + ExomeGermlineSingleSample.agg_pre_adapter_summary_metrics, + ExomeGermlineSingleSample.agg_quality_distribution_metrics, + ExomeGermlineSingleSample.agg_error_summary_metrics, + ExomeGermlineSingleSample.duplicate_metrics, + ExomeGermlineSingleSample.gvcf_summary_metrics, + ExomeGermlineSingleSample.gvcf_detail_metrics, + ExomeGermlineSingleSample.hybrid_selection_metrics, + ], # Array[File] outputs + ExomeGermlineSingleSample.quality_yield_metrics, + ExomeGermlineSingleSample.unsorted_read_group_base_distribution_by_cycle_metrics, + ExomeGermlineSingleSample.unsorted_read_group_insert_size_metrics, + ExomeGermlineSingleSample.unsorted_read_group_quality_by_cycle_metrics, + ExomeGermlineSingleSample.unsorted_read_group_quality_distribution_metrics, + # File? outputs + select_all([ExomeGermlineSingleSample.cross_check_fingerprints_metrics]), + select_all([ExomeGermlineSingleSample.fingerprint_summary_metrics]), + select_all([ExomeGermlineSingleSample.fingerprint_detail_metrics]), + ]) + + # Copy results of pipeline to test results bucket + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { + input: + files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), + contamination = ExomeGermlineSingleSample.contamination, + destination_cloud_path = results_path } - call Utilities.GetValidationInputs as GetGVCFIndexes { - input: - input_file = ExomeGermlineSingleSample.output_vcf_index, - results_path = results_path, - truth_path = truth_path + # If updating truth then copy pipeline results to truth bucket + if (update_truth){ + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { + input: + files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), + contamination = ExomeGermlineSingleSample.contamination, + destination_cloud_path = truth_path + } } - - # done is dummy input to force copy completion before verification - call VerifyGermlineSingleSample.VerifyGermlineSingleSample as Verify { - input: - truth_metrics = GetMetricsInputs.truth_files, - truth_cram = GetCrams.truth_file, - truth_crai = GetCrais.truth_file, - truth_gvcf = GetGVCFs.truth_file, - truth_gvcf_index = GetGVCFIndexes.truth_file, - test_metrics = GetMetricsInputs.results_files, - test_cram = GetCrams.results_file, - test_crai = GetCrais.results_file, - test_gvcf = GetGVCFs.results_file, - test_gvcf_index = GetGVCFIndexes.results_file, - done = CopyToTestResults.done + + # If not updating truth then we need to collect all input for the validation WDL + # This is achieved by passing each desired file/array[files] to GetValidationInputs + if (!update_truth){ + call Utilities.GetValidationInputs as GetMetricsInputs { + input: + input_files = pipeline_metrics, + results_path = results_path, + truth_path = truth_path + } + + call Utilities.GetValidationInputs as GetCrams { + input: + input_file = ExomeGermlineSingleSample.output_cram, + results_path = results_path, + truth_path = truth_path + } + + call Utilities.GetValidationInputs as GetCrais { + input: + input_file = ExomeGermlineSingleSample.output_cram_index, + results_path = results_path, + truth_path = truth_path + } + + call Utilities.GetValidationInputs as GetGVCFs { + input: + input_file = ExomeGermlineSingleSample.output_vcf, + results_path = results_path, + truth_path = truth_path + } + + call Utilities.GetValidationInputs as GetGVCFIndexes { + input: + input_file = ExomeGermlineSingleSample.output_vcf_index, + results_path = results_path, + truth_path = truth_path + } + + # done is dummy input to force copy completion before verification + call VerifyGermlineSingleSample.VerifyGermlineSingleSample as Verify { + input: + truth_metrics = GetMetricsInputs.truth_files, + truth_cram = GetCrams.truth_file, + truth_crai = GetCrais.truth_file, + truth_gvcf = GetGVCFs.truth_file, + truth_gvcf_index = GetGVCFIndexes.truth_file, + test_metrics = GetMetricsInputs.results_files, + test_cram = GetCrams.results_file, + test_crai = GetCrais.results_file, + test_gvcf = GetGVCFs.results_file, + test_gvcf_index = GetGVCFIndexes.results_file, + done = CopyToTestResults.done + } } - } - output { - Array[File]? metric_comparison_report_files = Verify.metric_comparison_report_files - } + output { + Array[File]? metric_comparison_report_files = Verify.metric_comparison_report_files + } -} +} \ No newline at end of file diff --git a/verification/test-wdls/TestExomeReprocessing.wdl b/verification/test-wdls/TestExomeReprocessing.wdl index 44905716ad..17d56a44ef 100644 --- a/verification/test-wdls/TestExomeReprocessing.wdl +++ b/verification/test-wdls/TestExomeReprocessing.wdl @@ -3,7 +3,7 @@ version 1.0 import "../../pipelines/broad/reprocessing/exome/ExomeReprocessing.wdl" as ExomeReprocessing import "../../verification/VerifyExomeReprocessing.wdl" as VerifyExomeReprocessing import "../../tasks/broad/Utilities.wdl" as Utilities -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy import "../../structs/dna_seq/DNASeqStructs.wdl" @@ -39,8 +39,6 @@ workflow TestExomeReprocessing { String truth_path String results_path Boolean update_truth - String vault_token_path - String google_account_vault_path } @@ -124,21 +122,17 @@ workflow TestExomeReprocessing { ]) # Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = results_path } # If updating truth then copy pipeline results to truth bucket if (update_truth){ - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = truth_path } } diff --git a/verification/test-wdls/TestIlluminaGenotypingArray.wdl b/verification/test-wdls/TestIlluminaGenotypingArray.wdl index f70710653f..46b8c680e6 100644 --- a/verification/test-wdls/TestIlluminaGenotypingArray.wdl +++ b/verification/test-wdls/TestIlluminaGenotypingArray.wdl @@ -4,7 +4,7 @@ version 1.0 import "../../pipelines/broad/genotyping/illumina/IlluminaGenotypingArray.wdl" as IlluminaGenotypingArray import "../../verification/VerifyIlluminaGenotypingArray.wdl" as VerifyIlluminaGenotypingArray import "../../tasks/broad/Utilities.wdl" as Utilities -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy workflow TestIlluminaGenotypingArray { @@ -46,14 +46,13 @@ workflow TestIlluminaGenotypingArray { String truth_path String results_path Boolean update_truth - String vault_token_path - String google_account_vault_path } meta { allowNestedInputs: true } - + + call IlluminaGenotypingArray.IlluminaGenotypingArray { input: sample_alias = sample_alias, @@ -88,10 +87,9 @@ workflow TestIlluminaGenotypingArray { disk_size = disk_size, preemptible_tries = preemptible_tries, genotype_concordance_threshold = genotype_concordance_threshold - } - + # Collect all of the pipeline outputs into single Array[String] Array[String] pipeline_outputs = flatten([ [ # File outputs @@ -108,7 +106,7 @@ workflow TestIlluminaGenotypingArray { select_all([IlluminaGenotypingArray.output_vcf_md5_cloud_path]), ]) - + # Collect all of the pipeline metrics into single Array[String] Array[String] pipeline_metrics = flatten([ # File? outputs @@ -127,21 +125,17 @@ workflow TestIlluminaGenotypingArray { ]) # Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = results_path } - + # If updating truth then copy output to truth bucket if (update_truth){ - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { - input: + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { + input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = truth_path } } @@ -185,29 +179,25 @@ workflow TestIlluminaGenotypingArray { results_path = results_path, truth_path = truth_path } - call VerifyIlluminaGenotypingArray.VerifyIlluminaGenotypingArray as Verify { input: - truth_metrics = GetMetrics.truth_files, + truth_metrics = GetMetrics.truth_files, test_metrics = GetMetrics.results_files, - truth_gtc = GetGtc.truth_file, + truth_gtc = GetGtc.truth_file, test_gtc = GetGtc.results_file, - truth_vcf = GetVcf.truth_file, + truth_vcf = GetVcf.truth_file, test_vcf = GetVcf.results_file, - truth_fp_vcf = GetFpVcf.truth_file, + truth_fp_vcf = GetFpVcf.truth_file, test_fp_vcf = GetFpVcf.results_file, - truth_red_idat_md5 = GetRedIdatMd5.truth_file, + truth_red_idat_md5 = GetRedIdatMd5.truth_file, test_red_idat_md5 = GetRedIdatMd5.results_file, - truth_green_idat_md5 = GetGreenIdatMd5.truth_file, + truth_green_idat_md5 = GetGreenIdatMd5.truth_file, test_green_idat_md5 = GetGreenIdatMd5.results_file, bead_pool_manifest_file = bead_pool_manifest_file, done = CopyToTestResults.done } - } - - - - + output { + } } \ No newline at end of file diff --git a/verification/test-wdls/TestImputation.wdl b/verification/test-wdls/TestImputation.wdl index 5d340b333d..98b987b99c 100644 --- a/verification/test-wdls/TestImputation.wdl +++ b/verification/test-wdls/TestImputation.wdl @@ -4,7 +4,7 @@ version 1.0 import "../../pipelines/broad/arrays/imputation/Imputation.wdl" as Imputation import "../../verification/VerifyImputation.wdl" as VerifyImputation import "../../tasks/broad/Utilities.wdl" as Utilities -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy workflow TestImputation { @@ -37,8 +37,6 @@ workflow TestImputation { String truth_path String results_path Boolean update_truth - String vault_token_path - String google_account_vault_path } meta { @@ -98,21 +96,17 @@ workflow TestImputation { ]) # Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = results_path } # If updating truth then copy output to truth bucket if (update_truth){ - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = truth_path } } diff --git a/verification/test-wdls/TestJointGenotyping.wdl b/verification/test-wdls/TestJointGenotyping.wdl index 389d7307b6..6951be0056 100644 --- a/verification/test-wdls/TestJointGenotyping.wdl +++ b/verification/test-wdls/TestJointGenotyping.wdl @@ -4,7 +4,7 @@ version 1.0 import "../../pipelines/broad/dna_seq/germline/joint_genotyping/JointGenotyping.wdl" as JointGenotyping import "../../verification/VerifyJointGenotyping.wdl" as VerifyJointGenotyping import "../../tasks/broad/Utilities.wdl" as Utilities -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy workflow TestJointGenotyping { @@ -60,8 +60,6 @@ workflow TestJointGenotyping { String truth_path String results_path Boolean update_truth - String vault_token_path - String google_account_vault_path } meta { @@ -142,21 +140,17 @@ workflow TestJointGenotyping { ]) # Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = results_path } # If updating truth then copy output to truth bucket if (update_truth){ - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = truth_path } } diff --git a/verification/test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl b/verification/test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl index 3c98269a4b..f40494dc12 100644 --- a/verification/test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl +++ b/verification/test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl @@ -4,7 +4,7 @@ version 1.0 import "../../pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl" as MultiSampleSmartSeq2SingleNucleus import "../../verification/VerifyMultiSampleSmartSeq2SingleNucleus.wdl" as VerifyMultiSampleSmartSeq2SingleNucleus import "../../tasks/broad/Utilities.wdl" as Utilities -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy workflow TestMultiSampleSmartSeq2SingleNucleus { @@ -31,8 +31,6 @@ workflow TestMultiSampleSmartSeq2SingleNucleus { String truth_path String results_path Boolean update_truth - String vault_token_path - String google_account_vault_path String cloud_provider } @@ -79,21 +77,17 @@ workflow TestMultiSampleSmartSeq2SingleNucleus { # Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { input: files_to_copy = flatten([pipeline_outputs]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = results_path } # If updating truth then copy output to truth bucket if (update_truth){ - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { input: files_to_copy = flatten([pipeline_outputs]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = truth_path } } diff --git a/verification/test-wdls/TestMultiome.wdl b/verification/test-wdls/TestMultiome.wdl index e710bb1942..9479a9c031 100644 --- a/verification/test-wdls/TestMultiome.wdl +++ b/verification/test-wdls/TestMultiome.wdl @@ -4,7 +4,8 @@ version 1.0 import "../../pipelines/skylab/multiome/Multiome.wdl" as Multiome import "../../verification/VerifyMultiome.wdl" as VerifyMultiome import "../../tasks/broad/Utilities.wdl" as Utilities -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy + workflow TestMultiome { @@ -49,8 +50,6 @@ workflow TestMultiome { String truth_path String results_path Boolean update_truth - String vault_token_path - String google_account_vault_path Boolean run_cellbender = false Boolean run_peak_calling = false @@ -124,21 +123,17 @@ workflow TestMultiome { ]) # Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = results_path } # If updating truth then copy output to truth bucket if (update_truth){ - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = truth_path } } diff --git a/verification/test-wdls/TestOptimus.wdl b/verification/test-wdls/TestOptimus.wdl index c980face75..79487d446e 100644 --- a/verification/test-wdls/TestOptimus.wdl +++ b/verification/test-wdls/TestOptimus.wdl @@ -3,7 +3,7 @@ version 1.0 import "../../tasks/broad/Utilities.wdl" as Utilities import "../../pipelines/skylab/optimus/Optimus.wdl" as Optimus import "../../verification/VerifyOptimus.wdl" as VerifyOptimus -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy workflow TestOptimus { @@ -57,8 +57,6 @@ workflow TestOptimus { String truth_path String results_path Boolean update_truth - String vault_token_path - String google_account_vault_path String cloud_provider @@ -120,21 +118,17 @@ Array[String] pipeline_outputs = flatten([ ]) # Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = results_path } # If updating truth then copy pipeline results to truth bucket if (update_truth){ - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = truth_path } } diff --git a/verification/test-wdls/TestPairedTag.wdl b/verification/test-wdls/TestPairedTag.wdl index 9fcb2ebbd5..8ec9ae364a 100644 --- a/verification/test-wdls/TestPairedTag.wdl +++ b/verification/test-wdls/TestPairedTag.wdl @@ -4,7 +4,7 @@ version 1.0 import "../../pipelines/skylab/paired_tag/PairedTag.wdl" as PairedTag import "../../verification/VerifyPairedTag.wdl" as VerifyPairedTag import "../../tasks/broad/Utilities.wdl" as Utilities -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy workflow TestPairedTag { @@ -52,8 +52,6 @@ workflow TestPairedTag { String truth_path String results_path Boolean update_truth - String vault_token_path - String google_account_vault_path Boolean run_cellbender = false String cloud_provider @@ -127,21 +125,17 @@ workflow TestPairedTag { ]) # Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = results_path } # If updating truth then copy output to truth bucket if (update_truth){ - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = truth_path } } diff --git a/verification/test-wdls/TestRNAWithUMIsPipeline.wdl b/verification/test-wdls/TestRNAWithUMIsPipeline.wdl index e9eedd5aa2..babae40fdc 100644 --- a/verification/test-wdls/TestRNAWithUMIsPipeline.wdl +++ b/verification/test-wdls/TestRNAWithUMIsPipeline.wdl @@ -2,7 +2,7 @@ version 1.0 import "../../tasks/broad/Utilities.wdl" as Utilities import "../../verification/VerifyRNAWithUMIs.wdl" as VerifyRNAWithUMIs -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy import "../../pipelines/broad/rna_seq/RNAWithUMIsPipeline.wdl" as RNAWithUMIsPipeline workflow TestRNAWithUMIsPipeline { @@ -48,8 +48,6 @@ workflow TestRNAWithUMIsPipeline { String truth_path String results_path Boolean update_truth - String vault_token_path - String google_account_vault_path } meta { @@ -110,21 +108,17 @@ workflow TestRNAWithUMIsPipeline { Array[String] pipeline_text_metrics = select_all([RNAWithUMIsPipeline.rnaseqc2_metrics]) #Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics, pipeline_text_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = results_path } # If updating truth then copy pipeline results to truth bucket if (update_truth) { - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics, pipeline_text_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = truth_path } } diff --git a/verification/test-wdls/TestReblockGVCF.wdl b/verification/test-wdls/TestReblockGVCF.wdl index e35ccad0af..eac450ebfb 100644 --- a/verification/test-wdls/TestReblockGVCF.wdl +++ b/verification/test-wdls/TestReblockGVCF.wdl @@ -4,7 +4,7 @@ version 1.0 import "../../pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.wdl" as ReblockGVCF import "../../verification/VerifyGvcf.wdl" as VerifyGvcf import "../../tasks/broad/Utilities.wdl" as Utilities -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy workflow TestReblockGVCF { @@ -25,8 +25,6 @@ workflow TestReblockGVCF { String truth_path String results_path Boolean update_truth - String vault_token_path - String google_account_vault_path String cloud_provider } @@ -63,21 +61,17 @@ workflow TestReblockGVCF { # Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { input: files_to_copy = flatten([pipeline_outputs]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = results_path } # If updating truth then copy output to truth bucket if (update_truth){ - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { input: files_to_copy = flatten([pipeline_outputs]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = truth_path } } diff --git a/verification/test-wdls/TestSlideSeq.wdl b/verification/test-wdls/TestSlideSeq.wdl index b0523fee21..96a53bd7c2 100644 --- a/verification/test-wdls/TestSlideSeq.wdl +++ b/verification/test-wdls/TestSlideSeq.wdl @@ -4,7 +4,7 @@ version 1.0 import "../../pipelines/skylab/slideseq/SlideSeq.wdl" as SlideSeq import "../../verification/VerifySlideSeq.wdl" as VerifySlideSeq import "../../tasks/broad/Utilities.wdl" as Utilities -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy workflow TestSlideSeq { @@ -24,8 +24,6 @@ workflow TestSlideSeq { String truth_path String results_path Boolean update_truth - String vault_token_path - String google_account_vault_path String cloud_provider } @@ -75,21 +73,17 @@ workflow TestSlideSeq { ]) # Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = results_path } # If updating truth then copy output to truth bucket if (update_truth){ - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = truth_path } } diff --git a/verification/test-wdls/TestUltimaGenomicsJointGenotyping.wdl b/verification/test-wdls/TestUltimaGenomicsJointGenotyping.wdl index de9899439b..e8908b92de 100644 --- a/verification/test-wdls/TestUltimaGenomicsJointGenotyping.wdl +++ b/verification/test-wdls/TestUltimaGenomicsJointGenotyping.wdl @@ -4,7 +4,7 @@ version 1.0 import "../../pipelines/broad/dna_seq/germline/joint_genotyping/UltimaGenomics/UltimaGenomicsJointGenotyping.wdl" as UltimaGenomicsJointGenotyping import "../../verification/VerifyUltimaGenomicsJointGenotyping.wdl" as VerifyUltimaGenomicsJointGenotyping import "../../tasks/broad/Utilities.wdl" as Utilities -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy workflow TestUltimaGenomicsJointGenotyping { @@ -46,8 +46,6 @@ workflow TestUltimaGenomicsJointGenotyping { String truth_path String results_path Boolean update_truth - String vault_token_path - String google_account_vault_path } meta { @@ -115,21 +113,17 @@ workflow TestUltimaGenomicsJointGenotyping { ]) # Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = results_path } # If updating truth then copy output to truth bucket if (update_truth){ - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = truth_path } } diff --git a/verification/test-wdls/TestUltimaGenomicsWholeGenomeCramOnly.wdl b/verification/test-wdls/TestUltimaGenomicsWholeGenomeCramOnly.wdl index 5203abb500..5275b62cee 100644 --- a/verification/test-wdls/TestUltimaGenomicsWholeGenomeCramOnly.wdl +++ b/verification/test-wdls/TestUltimaGenomicsWholeGenomeCramOnly.wdl @@ -4,7 +4,7 @@ version 1.0 import "../../pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.wdl" as UltimaGenomicsWholeGenomeCramOnly import "../../verification/VerifyUltimaGenomicsWholeGenomeCramOnly.wdl" as VerifyUltimaGenomicsWholeGenomeCramOnly import "../../tasks/broad/Utilities.wdl" as Utilities -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy workflow TestUltimaGenomicsWholeGenomeCramOnly { @@ -23,8 +23,6 @@ workflow TestUltimaGenomicsWholeGenomeCramOnly { String truth_path String results_path Boolean update_truth - String vault_token_path - String google_account_vault_path } meta { @@ -80,21 +78,17 @@ workflow TestUltimaGenomicsWholeGenomeCramOnly { ]) # Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = results_path } # If updating truth then copy output to truth bucket if (update_truth){ - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = truth_path } } diff --git a/verification/test-wdls/TestUltimaGenomicsWholeGenomeGermline.wdl b/verification/test-wdls/TestUltimaGenomicsWholeGenomeGermline.wdl index 9e1af52645..5842a52acf 100644 --- a/verification/test-wdls/TestUltimaGenomicsWholeGenomeGermline.wdl +++ b/verification/test-wdls/TestUltimaGenomicsWholeGenomeGermline.wdl @@ -4,7 +4,7 @@ version 1.0 import "../../pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.wdl" as UltimaGenomicsWholeGenomeGermline import "../../verification/VerifyUltimaGenomicsWholeGenomeGermline.wdl" as VerifyUltimaGenomicsWholeGenomeGermline import "../../tasks/broad/Utilities.wdl" as Utilities -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy workflow TestUltimaGenomicsWholeGenomeGermline { @@ -26,8 +26,6 @@ workflow TestUltimaGenomicsWholeGenomeGermline { String truth_path String results_path Boolean update_truth - String vault_token_path - String google_account_vault_path } meta { @@ -92,21 +90,17 @@ workflow TestUltimaGenomicsWholeGenomeGermline { ]) # Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = results_path } # If updating truth then copy output to truth bucket if (update_truth){ - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = truth_path } } diff --git a/verification/test-wdls/TestVariantCalling.wdl b/verification/test-wdls/TestVariantCalling.wdl index 3054e0a1b9..9a79ac4d68 100644 --- a/verification/test-wdls/TestVariantCalling.wdl +++ b/verification/test-wdls/TestVariantCalling.wdl @@ -4,7 +4,7 @@ version 1.0 import "../../pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl" as VariantCalling import "../../verification/VerifyGvcf.wdl" as VerifyGvcf import "../../tasks/broad/Utilities.wdl" as Utilities -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy workflow TestVariantCalling { @@ -37,8 +37,6 @@ workflow TestVariantCalling { String truth_path String results_path Boolean update_truth - String vault_token_path - String google_account_vault_path String cloud_provider } @@ -99,21 +97,17 @@ workflow TestVariantCalling { ]) # Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = results_path } # If updating truth then copy output to truth bucket if (update_truth){ - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = truth_path } } diff --git a/verification/test-wdls/TestWholeGenomeGermlineSingleSample.wdl b/verification/test-wdls/TestWholeGenomeGermlineSingleSample.wdl index 16b54c3876..9c5c44cf97 100644 --- a/verification/test-wdls/TestWholeGenomeGermlineSingleSample.wdl +++ b/verification/test-wdls/TestWholeGenomeGermlineSingleSample.wdl @@ -3,7 +3,7 @@ version 1.0 import "../../pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.wdl" as WholeGenomeGermlineSingleSample import "../../verification/VerifyGermlineSingleSample.wdl" as VerifyGermlineSingleSample import "../../tasks/broad/Utilities.wdl" as Utilities -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy workflow TestWholeGenomeGermlineSingleSample { @@ -38,8 +38,6 @@ workflow TestWholeGenomeGermlineSingleSample { String truth_path String results_path Boolean update_truth - String vault_token_path - String google_account_vault_path } meta { @@ -132,22 +130,18 @@ workflow TestWholeGenomeGermlineSingleSample { ]) # Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, contamination = WholeGenomeGermlineSingleSample.contamination, destination_cloud_path = results_path } # If updating truth then copy pipeline results to truth bucket if (update_truth){ - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, contamination = WholeGenomeGermlineSingleSample.contamination, destination_cloud_path = truth_path } diff --git a/verification/test-wdls/TestWholeGenomeReprocessing.wdl b/verification/test-wdls/TestWholeGenomeReprocessing.wdl index bc5566d18e..12f3db9bfb 100644 --- a/verification/test-wdls/TestWholeGenomeReprocessing.wdl +++ b/verification/test-wdls/TestWholeGenomeReprocessing.wdl @@ -4,7 +4,7 @@ version 1.0 import "../../pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.wdl" as WholeGenomeReprocessing import "../../verification/VerifyExomeReprocessing.wdl" as VerifyExomeReprocessing import "../../tasks/broad/Utilities.wdl" as Utilities -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy workflow TestWholeGenomeReprocessing { @@ -30,8 +30,6 @@ workflow TestWholeGenomeReprocessing { String truth_path String results_path Boolean update_truth - String vault_token_path - String google_account_vault_path } meta { @@ -124,21 +122,17 @@ workflow TestWholeGenomeReprocessing { ]) # Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = results_path } # If updating truth then copy output to truth bucket if (update_truth){ - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = truth_path } } diff --git a/verification/test-wdls/Testsnm3C.wdl b/verification/test-wdls/Testsnm3C.wdl index b8bfccb705..c65ee4471b 100644 --- a/verification/test-wdls/Testsnm3C.wdl +++ b/verification/test-wdls/Testsnm3C.wdl @@ -4,7 +4,7 @@ version 1.0 import "../../pipelines/skylab/snm3C/snm3C.wdl" as snm3C import "../../verification/Verifysnm3C.wdl" as Verifysnm3C import "../../tasks/broad/Utilities.wdl" as Utilities -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy workflow Testsnm3C { @@ -33,8 +33,6 @@ workflow Testsnm3C { String truth_path String results_path Boolean update_truth - String vault_token_path - String google_account_vault_path } meta { @@ -84,21 +82,17 @@ workflow Testsnm3C { # Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { input: files_to_copy = flatten([pipeline_outputs]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = results_path } # If updating truth then copy output to truth bucket if (update_truth){ - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { input: files_to_copy = flatten([pipeline_outputs]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = truth_path } }