Skip to content

E2E on AWS CUDA - [DO NOT MERGE] exploration #5061

E2E on AWS CUDA - [DO NOT MERGE] exploration

E2E on AWS CUDA - [DO NOT MERGE] exploration #5061

name: E2E on AWS CUDA
run-name: E2E on AWS CUDA - ${{ github.event.workflow_run.display_title }}
# We have to keep pre-commit AWS CUDA testing in a separate workflow because we
# need access to AWS secret and that isn't available on pull_request jobs for
# PRs from forks. And github's "require approval for all outside collaborators"
# is bypassed on pull_request_target.
#
# Also, we use commit status and not check suite/run (which, in theory, is more
# powerful) due to https://github.com/orgs/community/discussions/24616.
on:
workflow_run:
workflows: [SYCL Pre Commit on Linux]
types:
- completed
permissions:
contents: read
jobs:
create-check:
runs-on: [Linux, build]
permissions:
checks: write
statuses: write
steps:
- uses: actions/github-script@v7
with:
script: |
const sha = context.payload.workflow_run.head_sha
const run_id = '${{ github.run_id }}'
const this_run_url = 'https://github.com/' + context.repo.owner + '/' + context.repo.repo + '/actions/runs/' + run_id
// Create commit status.
await github.request('POST /repos/{owner}/{repo}/statuses/{sha}', {
owner: context.repo.owner,
repo: context.repo.repo,
sha: sha,
state: 'pending',
target_url: this_run_url,
description: 'SYCL E2E on AWS CUDA',
context: 'SYCL E2E on AWS CUDA',
})
aws-start:
runs-on: ubuntu-20.04
environment: aws
steps:
- uses: actions/checkout@v4
with:
sparse-checkout: devops/actions/aws-ec2
- run: npm install ./devops/actions/aws-ec2
- uses: ./devops/actions/aws-ec2
with:
mode: start
runs-on-list: '[{"runs-on":"aws_cuda-${{ github.event.workflow_run.id }}-${{ github.event.workflow_run.run_attempt }}","aws-ami":"ami-01cb0573cb039ab24","aws-type":["g5.2xlarge","g5.4xlarge"],"aws-disk":"/dev/sda1:64","aws-spot":"false"}]'
GH_PERSONAL_ACCESS_TOKEN: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
AWS_ACCESS_KEY: ${{ secrets.AWS_ACCESS_KEY }}
AWS_SECRET_KEY: ${{ secrets.AWS_SECRET_KEY }}
e2e-cuda:
needs: [aws-start]
uses: ./.github/workflows/sycl-linux-run-tests.yml
with:
name: CUDA E2E
runner: '["aws_cuda-${{ github.event.workflow_run.id }}-${{ github.event.workflow_run.run_attempt }}"]'
image: ghcr.io/intel/llvm/ubuntu2204_build:latest
image_options: -u 1001 --gpus all --cap-add SYS_ADMIN --env NVIDIA_DISABLE_REQUIRE=1
target_devices: ext_oneapi_cuda:gpu
# No idea why but that seems to work and be in sync with the main
# pre-commit workflow.
ref: ${{ github.event.workflow_run.referenced_workflows[0].sha }}
merge_ref: ''
sycl_toolchain_artifact: sycl_linux_default
sycl_toolchain_archive: llvm_sycl.tar.zst
sycl_toolchain_decompress_command: zstd
update-check:
needs: [create-check, e2e-cuda]
if: always()
runs-on: [Linux, build]
permissions:
checks: write
statuses: write
steps:
- uses: actions/github-script@v7
with:
script: |
const sha = context.payload.workflow_run.head_sha
const run_id = '${{ github.run_id }}'
const this_run_url = 'https://github.com/' + context.repo.owner + '/' + context.repo.repo + '/actions/runs/' + run_id
// Update commit status.
await github.request('POST /repos/{owner}/{repo}/statuses/{sha}', {
owner: context.repo.owner,
repo: context.repo.repo,
sha: sha,
state: '${{ needs.e2e-cuda.result }}',
target_url: this_run_url,
description: 'SYCL E2E on AWS CUDA',
context: 'SYCL E2E on AWS CUDA',
})
aws-stop:
needs: [aws-start, e2e-cuda]
if: always()
runs-on: ubuntu-20.04
environment: aws
steps:
- uses: actions/checkout@v4
with:
sparse-checkout: devops/actions/aws-ec2
- run: npm install ./devops/actions/aws-ec2
- uses: ./devops/actions/aws-ec2
with:
mode: stop
runs-on-list: '[{"runs-on":"aws_cuda-${{ github.event.workflow_run.id }}-${{ github.event.workflow_run.run_attempt }}","aws-ami":"ami-01cb0573cb039ab24","aws-type":["g5.2xlarge","g5.4xlarge"],"aws-disk":"/dev/sda1:64","aws-spot":"false"}]'
GH_PERSONAL_ACCESS_TOKEN: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
AWS_ACCESS_KEY: ${{ secrets.AWS_ACCESS_KEY }}
AWS_SECRET_KEY: ${{ secrets.AWS_SECRET_KEY }}