diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 72b97bb2e1d..44a736a22c2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -44,8 +44,9 @@ jobs: # prepare images locally, tagged by commit hash - name: "Build E2E Image" timeout-minutes: 40 - run: earthly ./yarn-project+export-end-to-end + run: earthly-ci ./yarn-project+export-end-to-end # We base our e2e list used in e2e-x86 off the targets in ./yarn-project/end-to-end + # (Note ARM uses just 2 tests as a smoketest) - name: Create list of end-to-end jobs id: e2e_list run: echo "list=$(earthly ls ./yarn-project/end-to-end | grep -v '+base' | sed 's/+//' | jq -R . | jq -cs .)" >> $GITHUB_OUTPUT @@ -68,7 +69,7 @@ jobs: - name: Test working-directory: ./yarn-project/end-to-end/ timeout-minutes: 25 - run: earthly -P --no-output +${{ matrix.test }} --e2e_mode=cache + run: earthly-ci -P --no-output +${{ matrix.test }} --e2e_mode=cache # TODO # - name: Upload logs # run: BRANCH=${{ github.ref_name }} PULL_REQUEST=${{ github.event.number }} scripts/ci/upload_logs_to_s3 ./yarn-project/end-to-end/log @@ -92,7 +93,7 @@ jobs: working-directory: ./barretenberg/cpp/ timeout-minutes: 25 # limit our parallelism to half our cores - run: earthly --no-output +test --hardware_concurrency=64 + run: earthly-ci --no-output +test --hardware_concurrency=64 # push benchmarking binaries to dockerhub registry bb-bench-binaries: @@ -108,7 +109,7 @@ jobs: if: ${{ github.event.inputs.just_start_spot != 'true' }} timeout-minutes: 15 working-directory: ./barretenberg/cpp/ - run: earthly --push +bench-binaries + run: earthly-ci --push +bench-binaries setup-bench: uses: ./.github/workflows/setup-runner.yml @@ -136,12 +137,12 @@ jobs: - name: Client IVC Bench working-directory: ./barretenberg/cpp/ timeout-minutes: 15 - run: earthly --no-output +bench-client-ivc --bench_mode=cache + run: earthly-ci --no-output +bench-client-ivc --bench_mode=cache - name: Ultrahonk Bench working-directory: ./barretenberg/cpp/ timeout-minutes: 15 - run: earthly --no-output +bench-ultra-honk --bench_mode=cache + run: earthly-ci --no-output +bench-ultra-honk --bench_mode=cache merge-check: runs-on: ${{ github.actor }}-x86 diff --git a/scripts/earthly-ci b/scripts/earthly-ci new file mode 100755 index 00000000000..43eeb9b17aa --- /dev/null +++ b/scripts/earthly-ci @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# A wrapper for Earthly that is meant to caught signs of known intermittent failures and continue. +# The silver lining is if Earthly does crash, the cache can pick up the build. +set -eu -o pipefail + +# Flag to determine if -i is present +INTERACTIVE=false +# Check for -i flag in the arguments +for arg in "$@"; do + if [ "$arg" == "-i" ] || [ "$arg" == "--interactive" ]; then + INTERACTIVE=true + break + fi +done + +OUTPUT_FILE=$(mktemp) +# capture output to handle earthly edge cases +if $INTERACTIVE ; then + # don't play nice with tee if interactive + earthly $@ +elif ! earthly $@ 2>&1 | tee $OUTPUT_FILE >&2 ; then + # we try earthly once, capturing output + # if we get one of our (unfortunate) known failures, handle retries + # TODO potentially handle other intermittent errors here + if grep 'failed to get edge: inconsistent graph state' $OUTPUT_FILE >/dev/null ; then + # TODO when earthly is overloaded we sometimes get + # 'failed to solve: failed to get edge: inconsistent graph state' + echo "Got 'inconsistent graph state'. Restarting earthly. See https://github.com/earthly/earthly/issues/2454'" + earthly $@ + # TODO handle + # could not configure satellite: failed getting org: unable to authenticate: failed to execute login request: Post + else + # otherwise, propagate error + exit 1 + fi +fi diff --git a/scripts/earthly-cloud b/scripts/earthly-cloud deleted file mode 100755 index d2a0e39bc2c..00000000000 --- a/scripts/earthly-cloud +++ /dev/null @@ -1,84 +0,0 @@ -#!/usr/bin/env bash - -# This script uses Earthly cloud satellites based on a runner type and hash of the GITHUB_ACTOR environment variable. -# ARM or x86 can be specified. -# Usage: earthly-cloud [options] -# Arguments: -# runner type: The type of runner, e.g., 'build' or 'bench'. -# architecture: The target architecture, e.g., 'arm' or 'x86'. -set -eu -o pipefail - -# Check if at least two arguments are passed -if [ $# -lt 2 ]; then - echo "Error: Insufficient arguments provided." - echo "Usage: $0 [options]" - exit 1 -fi - -RUNNER_TYPE=$1 -ARCH=$2 -shift 2 - -if [ "$ARCH" == "arm" ]; then - PLATFORM=linux/arm64 -elif [ "$ARCH" == "x86" ]; then - PLATFORM=linux/amd64 -fi - -# default sizes for build type -if [ "$RUNNER_TYPE" == "build" ] ; then - SIZE=4xlarge - NUMBER_OF_RUNNERS=2 - if [ "$ARCH" = arm ] ; then - NUMBER_OF_RUNNERS=1 - fi - # TODO why cant we set this?? - # MAX_PARALLELISM=8 -elif [ "$RUNNER_TYPE" == "bench" ] ; then - SIZE=2xlarge - NUMBER_OF_RUNNERS=1 - # MAX_PARALLELISM=1 -elif [ "$RUNNER_TYPE" == "test" ] ; then - SIZE=4xlarge - NUMBER_OF_RUNNERS=1 -fi - -# Flag to determine if -i is present -INTERACTIVE=false -# Check for -i flag in the arguments -for arg in "$@"; do - if [ "$arg" == "-i" ] || [ "$arg" == "--interactive" ]; then - INTERACTIVE=true - break - fi -done - -# we hash our GITHUB_ACTOR to pick from 1 to NUMBER_RUNNERS (inclusive) as RUNNER_ID -# this means everyone gets assigned to runners based on their user group -NAME_HASH=$(cksum <<< "$GITHUB_ACTOR" | cut -f 1 -d ' ') -RUNNER_ID=$(($NAME_HASH % $NUMBER_OF_RUNNERS + 1)) -RUNNER=$RUNNER_TYPE-$RUNNER_ID-$ARCH -earthly sat --org aztec launch --size $SIZE --platform $PLATFORM $RUNNER || true -# --remote-cache=aztecprotocol/cache:bb-native-tests -EARTHLY_FLAGS="-P --no-output --org aztec --sat $RUNNER" -OUTPUT_FILE=$(mktemp) -# capture output to handle earthly edge cases -if $INTERACTIVE ; then - # don't play nice with tee if interactive - earthly $EARTHLY_FLAGS $@ -elif ! earthly $EARTHLY_FLAGS $@ 2>&1 | tee $OUTPUT_FILE >&2 ; then - # we try earthly once, capturing output - # if we get one of our (unfortunate) known failures, handle retries - # TODO potentially handle other intermittent errors here - if grep 'failed to get edge: inconsistent graph state' $OUTPUT_FILE >/dev/null ; then - # TODO when earthly is overloaded we sometimes get - # 'failed to solve: failed to get edge: inconsistent graph state' - echo "Got 'inconsistent graph state'. Restarting earthly. See https://github.com/earthly/earthly/issues/2454'" - earthly $EARTHLY_FLAGS $@ - # TODO handle - # could not configure satellite: failed getting org: unable to authenticate: failed to execute login request: Post - else - # otherwise, propagate error - exit 1 - fi -fi diff --git a/scripts/setup_env.sh b/scripts/setup_env.sh index 97fb8fd68a1..70ecf9d7654 100755 --- a/scripts/setup_env.sh +++ b/scripts/setup_env.sh @@ -8,6 +8,6 @@ echo FORCE_COLOR=1 >> $GITHUB_ENV echo "Logging in to Docker..." echo $1 | docker login -u aztecprotocolci --password-stdin -# Make earthly-cloud and earthly-cloud-bench scripts available +# Make earthly-ci script available echo "PATH=$(dirname $(realpath $0)):$PATH" >> $GITHUB_ENV echo "EARTHLY_CONFIG=$(git rev-parse --show-toplevel)/.github/earthly-ci-config.yml" >> $GITHUB_ENV \ No newline at end of file