Threads not processes #2034
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Functional tests | |
# NOTE(mikal): git repos are checked out to /srv/github/_work/{repo}/{repo} | |
# which is available as GITHUB_WORKSPACE. You can find other environment | |
# variables at: | |
# https://docs.github.com/en/actions/learn-github-actions/environment-variables | |
# | |
# The can_enqueue / can_merge scheme is as described at: | |
# https://boinkor.net/2023/11/neat-github-actions-patterns-for-github-merge-queues/ | |
on: | |
workflow_dispatch: | |
merge_group: | |
pull_request: | |
branches: | |
- develop | |
- v*-releases | |
paths-ignore: | |
- 'docs/**' | |
- mkdocs.yml | |
jobs: | |
# These sanity checks used to be three separate jobs, but are now combined | |
# to save a little time and reduce CI runner churn. | |
sanity_checks: | |
runs-on: [self-hosted, vm] | |
needs: enqueue_pr_comment | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.ref }}-lint | |
cancel-in-progress: true | |
steps: | |
- name: Checkout code with two commits | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 2 | |
- name: Lint with flake8 | |
timeout-minutes: 5 | |
run: | | |
/usr/bin/tox -eflake8 | |
- name: Attempt to install requirements | |
timeout-minutes: 5 | |
run: | | |
python3 -mvenv /tmp/venv | |
/tmp/venv/bin/pip3 install -r requirements.txt | |
- name: Run python3 unit tests | |
timeout-minutes: 10 | |
run: | | |
/usr/bin/tox -epy3 | |
# Failures get announced here before we cancel stuff | |
- uses: slackapi/[email protected] | |
if: failure() | |
with: | |
method: chat.update | |
token: "${{ secrets.SLACK_BOT_TOKEN }}" | |
payload: | | |
channel: ${{ secrets.SLACK_CHANNEL_SHAKENFIST_CI }} | |
ts: "${{ needs.enqueue_pr_comment.outputs.ts }}" | |
text: | | |
The PR test attempt job at https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} failed. | |
PR title: ${{ github.event.pull_request.title }} | |
PR link: ${{ github.event.pull_request.html_url }} | |
PR author: ${{ github.event.pull_request.user.login }} | |
attachments: | |
- color: "ee9090" | |
fields: | |
- title: "Status" | |
short: true | |
value: "Failure in sanity tests" | |
# We do this instead of "fast-fail: true" because we want to cancel jobs | |
# outside of the matrix as well. | |
# - uses: andymckay/[email protected] | |
# if: failure() | |
# name: Cancel entire workflow on failure | |
functional_matrix_pr: | |
if: always() && github.event_name != 'merge_group' | |
runs-on: [self-hosted, vm] | |
needs: enqueue_pr_comment | |
name: "${{ matrix.os.description }}" | |
strategy: | |
fail-fast: false | |
matrix: | |
os: [ | |
{ | |
description: 'smoke', | |
job_name: 'smoke', | |
base_image: 'sf://label/ci-images/debian-11', | |
base_image_user: 'debian', | |
topology: 'localhost', | |
concurrency: 3, | |
stestr_config: 'smoke-ci.conf' | |
}, | |
] | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os.job_name }} | |
cancel-in-progress: true | |
steps: | |
- name: Log matrix details | |
run: | | |
echo "job_name: ${{ matrix.os.job_name }}" | |
echo "base_image: ${{ matrix.os.base_image }}" | |
echo "base_image_user: ${{ matrix.os.base_image_user }}" | |
echo "topology: ${{ matrix.os.topology }}" | |
echo "concurrency: ${{ matrix.os.concurrency }}" | |
- name: Setup test environment | |
uses: shakenfist/actions/setup-test-environment@main | |
- name: Build infrastructure | |
run: | | |
cd ${GITHUB_WORKSPACE}/actions | |
ansible-playbook -i /home/debian/ansible-hosts \ | |
--extra-vars "identifier=${SHAKENFIST_NAMESPACE} source_path=${GITHUB_WORKSPACE} \ | |
base_image=${{ matrix.os.base_image }} base_image_user=${{ matrix.os.base_image_user }}" \ | |
ansible/ci-topology-${{ matrix.os.topology }}.yml | |
if [ "${{ matrix.os.topology }}" == "localhost" ]; then | |
echo "UPLOAD_TARGET=10.0.0.10" >> $GITHUB_ENV | |
else | |
nodes=(10.0.0.20 10.0.0.21 10.0.0.22 10.0.0.23 10.0.0.24) | |
random_index=$(( RANDOM % 5 )) | |
echo "UPLOAD_TARGET=${nodes[$random_index]}" >> $GITHUB_ENV | |
fi | |
- name: Log environmental configuration | |
run: | | |
echo "## ci-environment.sh" | |
cat ${GITHUB_WORKSPACE}/ci-environment.sh | |
echo | |
echo "## Environment variables" | |
echo "SF_HEAD_SHA = ${SF_HEAD_SHA}" | |
echo "SHAKENFIST_NAMESPACE = ${SHAKENFIST_NAMESPACE}" | |
echo "UPLOAD_TARGET = ${UPLOAD_TARGET}" | |
- name: Copy CI tools to primary | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
cd ${GITHUB_WORKSPACE}/actions | |
scp -i /srv/github/id_ci -o StrictHostKeyChecking=no \ | |
-o UserKnownHostsFile=/dev/null -rp tools \ | |
${{ matrix.os.base_image_user }}@${primary}:. | |
echo "" | |
echo "Copied tools:" | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no \ | |
-o UserKnownHostsFile=/dev/null \ | |
${{ matrix.os.base_image_user }}@${primary} "ls tools" | |
- name: Log github actions buffering status | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
cd ${GITHUB_WORKSPACE}/actions | |
tools/run_remote ${primary} python3 tools/buffer.py | |
- name: Run getsf installer on primary | |
timeout-minutes: 30 | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no \ | |
-o UserKnownHostsFile=/dev/null \ | |
${{ matrix.os.base_image_user }}@${primary} /tmp/getsf-wrapper | |
echo "" | |
echo "" | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no \ | |
-o UserKnownHostsFile=/dev/null \ | |
${{ matrix.os.base_image_user }}@${primary} \ | |
'sudo rm /etc/apache2/sites-enabled/*; sudo a2ensite sf-example.conf; sudo apachectl graceful' | |
- name: Wait for API to start answering | |
run: | | |
set +e | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ | |
${{ matrix.os.base_image_user }}@${primary} 'sudo chmod ugo+r /etc/sf/* /var/log/syslog' | |
count=0 | |
while [ $count -lt 60 ] | |
do | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ | |
${{ matrix.os.base_image_user }}@${primary} '. /etc/sf/sfrc; sf-client instance list' | |
if [ $? == 0 ]; then | |
exit 0 | |
fi | |
count=$(( $count + 1 )) | |
sleep 5 | |
done | |
exit 1 | |
- name: Import cached images | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
setup="export PATH=$PATH:/srv/shakenfist/venv/bin;" | |
setup="${setup} . /etc/sf/sfrc;" | |
setup="${setup} export SHAKENFIST_API_URL=http://localhost:13000;" | |
setup="${setup} sf-client artifact upload" | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ | |
${{ matrix.os.base_image_user }}@${UPLOAD_TARGET} \ | |
"${setup} debian-11 /srv/ci/debian:11 --shared --no-checksum" | |
- name: Make the traces directory | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ | |
${{ matrix.os.base_image_user }}@${primary} "sudo mkdir -p /srv/ci/traces; sudo chown -R ${{ matrix.os.base_image_user }}:${{ matrix.os.base_image_user }} /srv/ci/traces" | |
- name: Run functional tests | |
timeout-minutes: 30 | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
scp -rp -i /srv/github/id_ci -o StrictHostKeyChecking=no \ | |
-o UserKnownHostsFile=/dev/null \ | |
${source_path}/shakenfist \ | |
${{ matrix.os.base_image_user }}@${primary}:shakenfist | |
script="cd shakenfist/deploy/;" | |
script="${script} . /etc/sf/sfrc;" | |
script="${script} sudo PIP_BREAK_SYSTEM_PACKAGES=1 pip3 install -r requirements.txt;" | |
script="${script} set -e;" | |
script="${script} stestr run --config ${{ matrix.os.stestr_config }} --concurrency=${{ matrix.os.concurrency}};" | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ | |
${{ matrix.os.base_image_user }}@${primary} "${script}" | |
- name: List slowest tests | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ | |
${{ matrix.os.base_image_user }}@${primary} \ | |
"cd shakenfist/deploy; stestr slowest || true" | |
- name: List failing tests | |
if: failure() | |
id: failures | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
echo "Gathering list of failed tests..." | |
touch ${GITHUB_WORKSPACE}/failed | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ | |
${{ matrix.os.base_image_user }}@${primary} \ | |
"cd shakenfist/deploy; stestr failing || true" > ${GITHUB_WORKSPACE}/failed | |
echo | |
failed=$(cat ${GITHUB_WORKSPACE}/failed | egrep "^FAIL: " | \ | |
sed -z 's/FAIL: shakenfist_ci\.//g;s/\n/, /g;s/, $/\n/') | |
echo "Failed tests:" | |
cat ${GITHUB_WORKSPACE}/failed | |
echo | |
echo "failures<<EOF" >> ${GITHUB_OUTPUT} | |
echo "${failed}" >> ${GITHUB_OUTPUT} | |
echo "EOF" >> ${GITHUB_OUTPUT} | |
echo "GITHUB_OUTPUT is:" | |
cat ${GITHUB_OUTPUT} | |
- name: Check logs | |
if: always() | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
cd ${GITHUB_WORKSPACE}/actions | |
tools/run_remote ${primary} "sudo bash tools/ci_log_checks.sh develop ${{ matrix.os.job_name }}" | |
# On Ubuntu 22.04 the cleaner is rated a CPU hog because of etcd cleanup | |
# cost. That's not really something we can control, so just ignore the CPU | |
# usage of that process instead. | |
- name: Check SF process CPU usage | |
if: always() | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ | |
${{ matrix.os.base_image_user }}@${primary} '. /etc/sf/sfrc; sf-client node cpuhogs -t 0.99 --ignore sf_cleaner' | |
- name: Check for reasonable data rates | |
timeout-minutes: 5 | |
if: always() | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
cd ${GITHUB_WORKSPACE}/actions | |
tools/run_remote ${primary} "sudo bash tools/ci_event_checks.sh develop" | |
- name: Fetch and tweak inventory | |
if: always() | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
scp -i /srv/github/id_ci -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ | |
${{ matrix.os.base_image_user }}@${primary}:/etc/sf/inventory.yaml /srv/github/ | |
sed -i 's|/root/.ssh|/home/debian/.ssh|g' /srv/github/inventory.yaml | |
echo "=====" | |
cat /srv/github/inventory.yaml | |
- name: Gather logs | |
if: always() | |
run: | | |
set -x | |
# Fetch unit test tracing. Sometimes this isn't present if we failed | |
# super early in the CI job. | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
mkdir -p /srv/github/bundle/ | |
scp -i /srv/github/id_ci -o StrictHostKeyChecking=no \ | |
-o UserKnownHostsFile=/dev/null -rp \ | |
${{ matrix.os.base_image_user }}@${primary}:/srv/ci/traces \ | |
/srv/github/bundle/ || true | |
# We need the ssh key in the place ansible expects it to be, which isn't | |
# true on the CI worker node. | |
cp /srv/github/id_ci /home/debian/.ssh/id_rsa | |
cp /srv/github/id_ci.pub /home/debian/.ssh/id_rsa.pub | |
ansible-playbook -i /srv/github/inventory.yaml \ | |
--extra-vars "base_image_user=${{ matrix.os.base_image_user }} \ | |
ansible_ssh_common_args='-o StrictHostKeyChecking=no'" \ | |
${GITHUB_WORKSPACE}/actions/ansible/ci-gather-logs.yml | |
- uses: actions/upload-artifact@v4 | |
if: always() | |
with: | |
name: bundle-functional-cluster-${{ matrix.os.job_name }} | |
retention-days: 90 | |
if-no-files-found: error | |
path: /srv/github/artifacts/bundle.zip | |
# Failures get announced here before we cancel stuff | |
- uses: slackapi/[email protected] | |
if: failure() | |
with: | |
method: chat.update | |
token: "${{ secrets.SLACK_BOT_TOKEN }}" | |
payload: | | |
channel: ${{ secrets.SLACK_CHANNEL_SHAKENFIST_CI }} | |
ts: "${{ needs.enqueue_pr_comment.outputs.ts }}" | |
text: | | |
The PR test attempt job at https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} failed. | |
PR title: ${{ github.event.pull_request.title }} | |
PR link: ${{ github.event.pull_request.html_url }} | |
PR author: ${{ github.event.pull_request.user.login }} | |
attachments: | |
- color: "ee9090" | |
fields: | |
- title: "Status" | |
short: true | |
value: "Failure in functional matrix tests" | |
- uses: slackapi/[email protected] | |
if: failure() | |
with: | |
method: chat.postMessage | |
token: "${{ secrets.SLACK_BOT_TOKEN }}" | |
payload: | | |
channel: ${{ secrets.SLACK_CHANNEL_SHAKENFIST_CI }} | |
thread_ts: "${{ needs.enqueue_pr_comment.outputs.ts }}" | |
text: | | |
Failures are: | |
${{ steps.failures.outputs.failures }} | |
# We do this instead of "fast-fail: true" because we want to cancel jobs | |
# outside of the matrix as well. | |
# - uses: andymckay/[email protected] | |
# if: failure() | |
# name: Cancel entire workflow on failure | |
functional_matrix_merge: | |
if: github.event_name == 'merge_group' | |
name: "${{ matrix.os.description }}" | |
needs: merge_pr_comment | |
strategy: | |
fail-fast: false | |
matrix: | |
os: [ | |
{ | |
description: 'debian 11 single machine', | |
job_name: 'debian-11-localhost', | |
base_image: 'sf://label/ci-images/debian-11', | |
base_image_user: 'debian', | |
topology: 'localhost', | |
concurrency: 3, | |
stestr_config: 'cluster-ci.conf' | |
}, | |
{ | |
description: 'debian 12 cluster', | |
job_name: 'debian-12-slim-primary', | |
base_image: 'sf://label/ci-images/debian-12', | |
base_image_user: 'debian', | |
topology: 'slim-primary', | |
concurrency: 5, | |
stestr_config: 'cluster-ci.conf' | |
}, | |
{ | |
description: 'ubuntu 24.04 cluster', | |
job_name: 'ubuntu-2404-slim-primary', | |
base_image: 'sf://label/ci-images/ubuntu-2404', | |
base_image_user: 'ubuntu', | |
topology: 'slim-primary', | |
concurrency: 5, | |
stestr_config: 'cluster-ci.conf' | |
}, | |
{ | |
description: 'guests', | |
job_name: 'guests', | |
base_image: 'sf://label/ci-images/debian-11', | |
base_image_user: 'debian', | |
topology: 'slim-primary', | |
concurrency: 5, | |
stestr_config: 'guest-ci.conf' | |
} | |
] | |
runs-on: [self-hosted, vm] | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os.job_name }} | |
cancel-in-progress: true | |
steps: | |
- name: Log matrix details | |
run: | | |
echo "job_name: ${{ matrix.os.job_name }}" | |
echo "base_image: ${{ matrix.os.base_image }}" | |
echo "base_image_user: ${{ matrix.os.base_image_user }}" | |
echo "topology: ${{ matrix.os.topology }}" | |
echo "concurrency: ${{ matrix.os.concurrency }}" | |
- name: Setup test environment | |
uses: shakenfist/actions/setup-test-environment@main | |
- name: Build infrastructure | |
run: | | |
cd ${GITHUB_WORKSPACE}/actions | |
ansible-playbook -i /home/debian/ansible-hosts \ | |
--extra-vars "identifier=${SHAKENFIST_NAMESPACE} source_path=${GITHUB_WORKSPACE} \ | |
base_image=${{ matrix.os.base_image }} base_image_user=${{ matrix.os.base_image_user }}" \ | |
ansible/ci-topology-${{ matrix.os.topology }}.yml | |
if [ "${{ matrix.os.topology }}" == "localhost" ]; then | |
echo "UPLOAD_TARGET=10.0.0.10" >> $GITHUB_ENV | |
else | |
nodes=(10.0.0.20 10.0.0.21 10.0.0.22 10.0.0.23 10.0.0.24) | |
random_index=$(( RANDOM % 5 )) | |
echo "UPLOAD_TARGET=${nodes[$random_index]}" >> $GITHUB_ENV | |
fi | |
- name: Log environmental configuration | |
run: | | |
echo "## ci-environment.sh" | |
cat ${GITHUB_WORKSPACE}/ci-environment.sh | |
echo | |
echo "## Environment variables" | |
echo "SF_HEAD_SHA = ${SF_HEAD_SHA}" | |
echo "SHAKENFIST_NAMESPACE = ${SHAKENFIST_NAMESPACE}" | |
echo "UPLOAD_TARGET = ${UPLOAD_TARGET}" | |
- name: Copy CI tools to primary | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
cd ${GITHUB_WORKSPACE}/actions | |
scp -i /srv/github/id_ci -o StrictHostKeyChecking=no \ | |
-o UserKnownHostsFile=/dev/null -rp tools \ | |
${{ matrix.os.base_image_user }}@${primary}:. | |
echo "" | |
echo "Copied tools:" | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no \ | |
-o UserKnownHostsFile=/dev/null \ | |
${{ matrix.os.base_image_user }}@${primary} "ls tools" | |
- name: Log github actions buffering status | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
cd ${GITHUB_WORKSPACE}/actions | |
tools/run_remote ${primary} python3 tools/buffer.py | |
- name: Run getsf installer on primary | |
timeout-minutes: 30 | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no \ | |
-o UserKnownHostsFile=/dev/null \ | |
${{ matrix.os.base_image_user }}@${primary} /tmp/getsf-wrapper | |
echo "" | |
echo "" | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no \ | |
-o UserKnownHostsFile=/dev/null \ | |
${{ matrix.os.base_image_user }}@${primary} \ | |
'sudo rm /etc/apache2/sites-enabled/*; sudo a2ensite sf-example.conf; sudo apachectl graceful' | |
- name: Wait for API to start answering | |
run: | | |
set +e | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ | |
${{ matrix.os.base_image_user }}@${primary} 'sudo chmod ugo+r /etc/sf/* /var/log/syslog' | |
count=0 | |
while [ $count -lt 60 ] | |
do | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ | |
${{ matrix.os.base_image_user }}@${primary} '. /etc/sf/sfrc; sf-client instance list' | |
if [ $? == 0 ]; then | |
exit 0 | |
fi | |
count=$(( $count + 1 )) | |
sleep 5 | |
done | |
exit 1 | |
- name: Import cached images | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
setup="export PATH=$PATH:/srv/shakenfist/venv/bin;" | |
setup="${setup} . /etc/sf/sfrc;" | |
setup="${setup} export SHAKENFIST_API_URL=http://localhost:13000;" | |
setup="${setup} sf-client artifact upload" | |
# The cluster CI image, used by all CI types | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ | |
${{ matrix.os.base_image_user }}@${UPLOAD_TARGET} \ | |
"${setup} debian-11 /srv/ci/debian:11 --shared --no-checksum" | |
# Additional images used only by guest CI | |
if [ "${{ matrix.os.stestr_config }}" == "guest-ci.conf" ]; then | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no \ | |
-o UserKnownHostsFile=/dev/null \ | |
${{ matrix.os.base_image_user }}@${UPLOAD_TARGET} \ | |
"${setup} ubuntu-2004 /srv/ci/ubuntu:20.04 --shared --no-checksum" | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no \ | |
-o UserKnownHostsFile=/dev/null \ | |
${{ matrix.os.base_image_user }}@${UPLOAD_TARGET} \ | |
"${setup} debian-12 /srv/ci/debian:12 --shared --no-checksum" | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no \ | |
-o UserKnownHostsFile=/dev/null \ | |
${{ matrix.os.base_image_user }}@${UPLOAD_TARGET} \ | |
"${setup} cirros /srv/ci/cirros --shared" | |
fi | |
- name: Create a base level of activity in the cluster | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ | |
${{ matrix.os.base_image_user }}@${primary} \ | |
'echo "==== sfrc ===="; cat /etc/sf/sfrc; echo "==== end sfrc ===="' | |
echo "" | |
echo "" | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ | |
${{ matrix.os.base_image_user }}@${primary} \ | |
'. /etc/sf/sfrc; for i in `seq 100`; do sf-client --async=continue network create background-$i 10.$i.0.0/24 > /dev/null; echo -n "."; done' | |
echo "" | |
- name: Make the traces directory | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ | |
${{ matrix.os.base_image_user }}@${primary} "sudo mkdir -p /srv/ci/traces; sudo chown -R ${{ matrix.os.base_image_user }}:${{ matrix.os.base_image_user }} /srv/ci/traces" | |
- name: Run functional tests | |
timeout-minutes: 120 | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
scp -rp -i /srv/github/id_ci -o StrictHostKeyChecking=no \ | |
-o UserKnownHostsFile=/dev/null \ | |
${source_path}/shakenfist \ | |
${{ matrix.os.base_image_user }}@${primary}:shakenfist | |
script="cd shakenfist/deploy/;" | |
script="${script} . /etc/sf/sfrc;" | |
script="${script} sudo PIP_BREAK_SYSTEM_PACKAGES=1 pip3 install -r requirements.txt;" | |
script="${script} set -e;" | |
script="${script} stestr run --config ${{ matrix.os.stestr_config }} --concurrency=${{ matrix.os.concurrency}};" | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ | |
${{ matrix.os.base_image_user }}@${primary} "${script}" | |
- name: List slowest tests | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ | |
${{ matrix.os.base_image_user }}@${primary} \ | |
"cd shakenfist/deploy; stestr slowest || true" | |
- name: List failing tests | |
if: failure() | |
id: failures | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
echo "Gathering list of failed tests..." | |
touch ${GITHUB_WORKSPACE}/failed | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ | |
${{ matrix.os.base_image_user }}@${primary} \ | |
"cd shakenfist/deploy; stestr failing || true" > ${GITHUB_WORKSPACE}/failed | |
echo | |
failed=$(cat ${GITHUB_WORKSPACE}/failed | egrep "^FAIL: " | \ | |
sed -z 's/^FAIL: shakenfist_ci\.//g;s/\n/, /g;s/, $/\n/') | |
echo "Failed tests:" | |
cat ${GITHUB_WORKSPACE}/failed | |
echo | |
echo "failures<<EOF" >> ${GITHUB_OUTPUT} | |
echo "${failed}" >> ${GITHUB_OUTPUT} | |
echo "EOF" >> ${GITHUB_OUTPUT} | |
echo "GITHUB_OUTPUT is:" | |
cat ${GITHUB_OUTPUT} | |
- name: Check logs | |
if: always() | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
cd ${GITHUB_WORKSPACE}/actions | |
tools/run_remote ${primary} "sudo bash tools/ci_log_checks.sh develop ${{ matrix.os.job_name }}" | |
- name: Check SF process CPU usage | |
if: always() | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ | |
${{ matrix.os.base_image_user }}@${primary} '. /etc/sf/sfrc; sf-client node cpuhogs -t 0.99' | |
- name: Check for reasonable data rates | |
timeout-minutes: 5 | |
if: always() | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
cd ${GITHUB_WORKSPACE}/actions | |
tools/run_remote ${primary} "sudo bash tools/ci_event_checks.sh develop" | |
- name: Fetch and tweak inventory | |
if: always() | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
scp -i /srv/github/id_ci -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ | |
${{ matrix.os.base_image_user }}@${primary}:/etc/sf/inventory.yaml /srv/github/ | |
sed -i 's|/root/.ssh|/home/debian/.ssh|g' /srv/github/inventory.yaml | |
echo "=====" | |
cat /srv/github/inventory.yaml | |
- name: Gather logs | |
if: always() | |
run: | | |
set -x | |
# Fetch unit test tracing. Sometimes this isn't present if we failed | |
# super early in the CI job. | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
mkdir -p /srv/github/bundle/ | |
scp -i /srv/github/id_ci -o StrictHostKeyChecking=no \ | |
-o UserKnownHostsFile=/dev/null -rp \ | |
${{ matrix.os.base_image_user }}@${primary}:/srv/ci/traces \ | |
/srv/github/bundle/ || true | |
# We need the ssh key in the place ansible expects it to be, which isn't | |
# true on the CI worker node. | |
cp /srv/github/id_ci /home/debian/.ssh/id_rsa | |
cp /srv/github/id_ci.pub /home/debian/.ssh/id_rsa.pub | |
ansible-playbook -i /srv/github/inventory.yaml \ | |
--extra-vars "base_image_user=${{ matrix.os.base_image_user }} \ | |
ansible_ssh_common_args='-o StrictHostKeyChecking=no'" \ | |
${GITHUB_WORKSPACE}/actions/ansible/ci-gather-logs.yml | |
- uses: actions/upload-artifact@v4 | |
if: always() | |
with: | |
name: bundle-functional-cluster-${{ matrix.os.job_name }} | |
retention-days: 90 | |
if-no-files-found: error | |
path: /srv/github/artifacts/bundle.zip | |
# Failures get announced here before we cancel stuff | |
- uses: slackapi/[email protected] | |
if: failure() | |
with: | |
method: chat.update | |
token: "${{ secrets.SLACK_BOT_TOKEN }}" | |
payload: | | |
channel: ${{ secrets.SLACK_CHANNEL_SHAKENFIST_CI }} | |
ts: "${{ needs.merge_pr_comment.outputs.ts }}" | |
text: | | |
The PR merge attempt job at https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} failed. | |
The PR commit message is: | |
${{ needs.merge_pr_comment.outputs.commit }} | |
attachments: | |
- color: "ee9090" | |
fields: | |
- title: "Status" | |
short: true | |
value: "Failure in functional matrix tests" | |
- uses: slackapi/[email protected] | |
if: failure() | |
with: | |
method: chat.postMessage | |
token: "${{ secrets.SLACK_BOT_TOKEN }}" | |
payload: | | |
channel: ${{ secrets.SLACK_CHANNEL_SHAKENFIST_CI }} | |
thread_ts: "${{ needs.merge_pr_comment.outputs.ts }}" | |
text: | | |
Failures are: | |
${{ steps.failures.outputs.failures }} | |
# We do this instead of "fast-fail: true" because we want to cancel jobs | |
# outside of the matrix as well. | |
# - uses: andymckay/[email protected] | |
# if: failure() | |
# name: Cancel entire workflow on failure | |
ansible_modules: | |
if: github.event_name == 'merge_group' | |
runs-on: [self-hosted, vm] | |
needs: merge_pr_comment | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.ref }}-ansiblemodules | |
cancel-in-progress: true | |
steps: | |
- name: Setup test environment | |
uses: shakenfist/actions/setup-test-environment@main | |
- name: Build infrastructure | |
run: | | |
cd ${GITHUB_WORKSPACE}/actions | |
ansible-playbook -i /home/debian/ansible-hosts \ | |
--extra-vars "identifier=${SHAKENFIST_NAMESPACE} source_path=${GITHUB_WORKSPACE} \ | |
base_image=sf://label/ci-images/debian-11 base_image_user=debian" \ | |
ansible/ci-topology-slim-primary.yml | |
nodes=(10.0.0.20 10.0.0.21 10.0.0.22 10.0.0.23 10.0.0.24) | |
random_index=$(( RANDOM % 5 )) | |
echo "UPLOAD_TARGET=${nodes[$random_index]}" >> $GITHUB_ENV | |
- name: Log environmental configuration | |
run: | | |
echo "## ci-environment.sh" | |
cat ${GITHUB_WORKSPACE}/ci-environment.sh | |
echo | |
echo "## Environment variables" | |
echo "SF_HEAD_SHA = ${SF_HEAD_SHA}" | |
echo "SHAKENFIST_NAMESPACE = ${SHAKENFIST_NAMESPACE}" | |
echo "UPLOAD_TARGET = ${UPLOAD_TARGET}" | |
- name: Copy CI tools to primary | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
cd ${GITHUB_WORKSPACE}/actions | |
scp -i /srv/github/id_ci -o StrictHostKeyChecking=no \ | |
-o UserKnownHostsFile=/dev/null -rp tools \ | |
debian@$primary:. | |
- name: Run getsf installer on primary | |
timeout-minutes: 30 | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no \ | |
-o UserKnownHostsFile=/dev/null \ | |
debian@$primary /tmp/getsf-wrapper | |
echo "" | |
echo "" | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no \ | |
-o UserKnownHostsFile=/dev/null \ | |
debian@$primary \ | |
'sudo rm /etc/apache2/sites-enabled/*; sudo a2ensite sf-example.conf; sudo apachectl graceful' | |
- name: Wait for API to start answering | |
run: | | |
set +e | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ | |
debian@$primary 'sudo chmod ugo+r /etc/sf/* /var/log/syslog' | |
count=0 | |
while [ $count -lt 60 ] | |
do | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ | |
debian@$primary '. /etc/sf/sfrc; sf-client instance list' | |
if [ $? == 0 ]; then | |
exit 0 | |
fi | |
count=$(( $count + 1 )) | |
sleep 5 | |
done | |
exit 1 | |
- name: Import cached images | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
setup="export PATH=$PATH:/srv/shakenfist/venv/bin;" | |
setup="${setup} . /etc/sf/sfrc;" | |
setup="${setup} export SHAKENFIST_API_URL=http://localhost:13000;" | |
setup="${setup} sf-client artifact upload" | |
# The cluster CI image, used by all CI types | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ | |
debian@${UPLOAD_TARGET} \ | |
"${setup} debian-11 /srv/ci/debian:11 --shared --no-checksum" | |
- name: Run ansible module tests | |
timeout-minutes: 60 | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
scp -rp -i /srv/github/id_ci -o StrictHostKeyChecking=no \ | |
-o UserKnownHostsFile=/dev/null \ | |
$source_path/shakenfist \ | |
debian@$primary:shakenfist | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ | |
debian@$primary "cd shakenfist/deploy; . /etc/sf/sfrc; bash ansiblemoduletests.sh" | |
- name: Check logs | |
if: always() | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
cd ${GITHUB_WORKSPACE}/actions | |
tools/run_remote ${primary} "sudo bash tools/ci_log_checks.sh develop ansible-modules" | |
- name: Check process CPU usage | |
if: always() | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ | |
debian@$primary '. /etc/sf/sfrc; sf-client node cpuhogs -t 0.99' | |
- name: Check for reasonable data rates | |
timeout-minutes: 5 | |
if: always() | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
cd ${GITHUB_WORKSPACE}/actions | |
tools/run_remote ${primary} "sudo bash tools/ci_event_checks.sh develop" | |
- name: Fetch and tweak inventory | |
if: always() | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
scp -i /srv/github/id_ci -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ | |
debian@$primary:/etc/sf/inventory.yaml /srv/github/ | |
sed -i 's|/root/.ssh|/home/debian/.ssh|g' /srv/github/inventory.yaml | |
echo "=====" | |
cat /srv/github/inventory.yaml | |
- name: Gather logs | |
if: always() | |
run: | | |
set -x | |
# We need the ssh key in the place ansible expects it to be, which isn't | |
# true on the CI worker node. | |
cp /srv/github/id_ci /home/debian/.ssh/id_rsa | |
cp /srv/github/id_ci.pub /home/debian/.ssh/id_rsa.pub | |
chown -R debian.debian /home/debian/.ssh | |
ansible-playbook -i /srv/github/inventory.yaml \ | |
--extra-vars "base_image_user=debian ansible_ssh_common_args='-o StrictHostKeyChecking=no'" \ | |
${GITHUB_WORKSPACE}/actions/ansible/ci-gather-logs.yml | |
- uses: actions/upload-artifact@v4 | |
if: always() | |
with: | |
name: bundle-functional-ansible-modules | |
retention-days: 90 | |
if-no-files-found: error | |
path: /srv/github/artifacts/bundle.zip | |
# Failures get announced here before we cancel stuff | |
- uses: slackapi/[email protected] | |
if: failure() | |
with: | |
method: chat.update | |
token: "${{ secrets.SLACK_BOT_TOKEN }}" | |
payload: | | |
channel: ${{ secrets.SLACK_CHANNEL_SHAKENFIST_CI }} | |
ts: "${{ needs.merge_pr_comment.outputs.ts }}" | |
text: | | |
The PR merge attempt job at https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} failed. | |
The PR commit message is: | |
${{ needs.merge_pr_comment.outputs.commit }} | |
attachments: | |
- color: "ee9090" | |
fields: | |
- title: "Status" | |
short: true | |
value: "Failure in ansible-modules tests" | |
# We do this instead of "fast-fail: true" because we want to cancel jobs | |
# outside of the matrix as well. | |
# - uses: andymckay/[email protected] | |
# if: failure() | |
# name: Cancel entire workflow on failure | |
node_lifecycle: | |
if: github.event_name == 'merge_group' | |
needs: merge_pr_comment | |
runs-on: [self-hosted, vm] | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.ref }}-nodelifecycle | |
cancel-in-progress: true | |
steps: | |
- name: Setup test environment | |
uses: shakenfist/actions/setup-test-environment@main | |
- name: Build infrastructure | |
run: | | |
cd ${GITHUB_WORKSPACE}/actions | |
ansible-playbook -i /home/debian/ansible-hosts \ | |
--extra-vars "identifier=${SHAKENFIST_NAMESPACE} source_path=${GITHUB_WORKSPACE} \ | |
base_image=sf://label/ci-images/debian-11 base_image_user=debian" \ | |
ansible/ci-topology-slim-primary.yml | |
nodes=(10.0.0.20 10.0.0.21 10.0.0.22 10.0.0.23 10.0.0.24) | |
random_index=$(( RANDOM % 5 )) | |
echo "UPLOAD_TARGET=${nodes[$random_index]}" >> $GITHUB_ENV | |
- name: Log environmental configuration | |
run: | | |
echo "## ci-environment.sh" | |
cat ${GITHUB_WORKSPACE}/ci-environment.sh | |
echo | |
echo "## Environment variables" | |
echo "SF_HEAD_SHA = ${SF_HEAD_SHA}" | |
echo "SHAKENFIST_NAMESPACE = ${SHAKENFIST_NAMESPACE}" | |
echo "UPLOAD_TARGET = ${UPLOAD_TARGET}" | |
- name: Copy CI tools to primary | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
cd ${GITHUB_WORKSPACE}/actions | |
scp -i /srv/github/id_ci -o StrictHostKeyChecking=no \ | |
-o UserKnownHostsFile=/dev/null -rp tools \ | |
debian@$primary:. | |
- name: Run getsf installer on primary | |
timeout-minutes: 30 | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no \ | |
-o UserKnownHostsFile=/dev/null \ | |
debian@$primary /tmp/getsf-wrapper | |
echo "" | |
echo "" | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no \ | |
-o UserKnownHostsFile=/dev/null \ | |
debian@$primary \ | |
'sudo rm /etc/apache2/sites-enabled/*; sudo a2ensite sf-example.conf; sudo apachectl graceful' | |
- name: Wait for API to start answering | |
run: | | |
set +e | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ | |
debian@$primary 'sudo chmod ugo+r /etc/sf/* /var/log/syslog' | |
count=0 | |
while [ $count -lt 60 ] | |
do | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ | |
debian@$primary '. /etc/sf/sfrc; sf-client instance list' | |
if [ $? == 0 ]; then | |
exit 0 | |
fi | |
count=$(( $count + 1 )) | |
sleep 5 | |
done | |
exit 1 | |
- name: Import cached images | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
setup="export PATH=$PATH:/srv/shakenfist/venv/bin;" | |
setup="${setup} . /etc/sf/sfrc;" | |
setup="${setup} export SHAKENFIST_API_URL=http://localhost:13000;" | |
setup="${setup} sf-client artifact upload" | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ | |
debian@${UPLOAD_TARGET} \ | |
"${setup} debian-11 /srv/ci/debian:11 --shared --no-checksum" | |
- name: Run node lifecycle tests | |
timeout-minutes: 60 | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
scp -rp -i /srv/github/id_ci -o StrictHostKeyChecking=no \ | |
-o UserKnownHostsFile=/dev/null \ | |
$source_path/shakenfist \ | |
debian@$primary:shakenfist | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ | |
debian@$primary "cd shakenfist/deploy; . /etc/sf/sfrc; bash nodelifecycletests.sh" | |
- name: Check logs | |
if: always() | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
cd ${GITHUB_WORKSPACE}/actions | |
tools/run_remote ${primary} "sudo bash tools/ci_log_checks.sh develop node-lifecycle" | |
- name: Restart Shaken Fist nodes so we can collect logs | |
if: always() | |
run: | | |
set -x | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
for node in ${sf1_uuid} ${sf2_uuid} ${sf3_uuid} ${sf4_uuid} ${sf5_uuid}; do | |
sf-client instance reboot --hard ${node} | |
done | |
sleep 90 | |
- name: Check for reasonable data rates | |
timeout-minutes: 5 | |
if: always() | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
cd ${GITHUB_WORKSPACE}/actions | |
tools/run_remote ${primary} "sudo bash tools/ci_event_checks.sh develop" | |
- name: Fetch and tweak inventory | |
if: always() | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
scp -i /srv/github/id_ci -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ | |
debian@$primary:/etc/sf/inventory.yaml /srv/github/ | |
sed -i 's|/root/.ssh|/home/debian/.ssh|g' /srv/github/inventory.yaml | |
echo "=====" | |
cat /srv/github/inventory.yaml | |
- name: Gather logs | |
if: always() | |
run: | | |
set -x | |
# We need the ssh key in the place ansible expects it to be, which isn't | |
# true on the CI worker node. | |
cp /srv/github/id_ci /home/debian/.ssh/id_rsa | |
cp /srv/github/id_ci.pub /home/debian/.ssh/id_rsa.pub | |
chown -R debian.debian /home/debian/.ssh | |
ansible-playbook -i /srv/github/inventory.yaml \ | |
--extra-vars "base_image_user=debian ansible_ssh_common_args='-o StrictHostKeyChecking=no'" \ | |
${GITHUB_WORKSPACE}/actions/ansible/ci-gather-logs.yml | |
- uses: actions/upload-artifact@v4 | |
if: always() | |
with: | |
name: bundle-functional-node-lifecycle | |
retention-days: 90 | |
if-no-files-found: error | |
path: /srv/github/artifacts/bundle.zip | |
# Failures get announced here before we cancel stuff | |
- uses: slackapi/[email protected] | |
if: failure() | |
with: | |
method: chat.update | |
token: "${{ secrets.SLACK_BOT_TOKEN }}" | |
payload: | | |
channel: ${{ secrets.SLACK_CHANNEL_SHAKENFIST_CI }} | |
ts: "${{ needs.merge_pr_comment.outputs.ts }}" | |
text: | | |
The PR merge attempt job at https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} failed. | |
The PR commit message is: | |
${{ needs.merge_pr_comment.outputs.commit }} | |
attachments: | |
- color: "ee9090" | |
fields: | |
- title: "Status" | |
short: true | |
value: "Failure in node-lifecycle tests" | |
# We do this instead of "fast-fail: true" because we want to cancel jobs | |
# outside of the matrix as well. | |
# - uses: andymckay/[email protected] | |
# if: failure() | |
# name: Cancel entire workflow on failure | |
upgrade_0_6_15: | |
if: github.event_name == 'merge_group' | |
needs: merge_pr_comment | |
runs-on: [self-hosted, vm] | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.ref }}-upgrade-0-6-15 | |
cancel-in-progress: true | |
steps: | |
- name: Setup test environment | |
uses: shakenfist/actions/setup-test-environment@main | |
- name: Build infrastructure | |
run: | | |
cd ${GITHUB_WORKSPACE}/actions | |
ansible-playbook -i /home/debian/ansible-hosts \ | |
--extra-vars "identifier=${SHAKENFIST_NAMESPACE} source_path=${GITHUB_WORKSPACE} \ | |
base_image=sf://label/ci-images/debian-11 base_image_user=debian" \ | |
ansible/ci-topology-localhost-upgrade.yml | |
echo "UPLOAD_TARGET=10.0.0.10" >> $GITHUB_ENV | |
- name: Log environmental configuration | |
run: | | |
echo "## ci-environment.sh" | |
cat ${GITHUB_WORKSPACE}/ci-environment.sh | |
echo | |
echo "## Environment variables" | |
echo "SF_HEAD_SHA = ${SF_HEAD_SHA}" | |
echo "SHAKENFIST_NAMESPACE = ${SHAKENFIST_NAMESPACE}" | |
echo "UPLOAD_TARGET = ${UPLOAD_TARGET}" | |
- name: Copy CI tools to primary | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
cd ${GITHUB_WORKSPACE}/actions | |
scp -i /srv/github/id_ci -o StrictHostKeyChecking=no \ | |
-o UserKnownHostsFile=/dev/null -rp tools \ | |
debian@$primary:. | |
- name: Run getsf to only install etcd, and then restore a backup | |
timeout-minutes: 30 | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no \ | |
-o UserKnownHostsFile=/dev/null \ | |
debian@$primary /tmp/getsf-wrapper --tags bootstrap,etcd | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no \ | |
-o UserKnownHostsFile=/dev/null \ | |
debian@$primary 'sudo chmod ugo+r /etc/sf/*; . /etc/sf/sfrc; /srv/shakenfist/venv/bin/sf-backup restore /srv/ci/backup-0.6.15-20230319' | |
- name: Run getsf installer on primary | |
timeout-minutes: 30 | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no \ | |
-o UserKnownHostsFile=/dev/null \ | |
debian@$primary /tmp/getsf-wrapper | |
echo "" | |
echo "" | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no \ | |
-o UserKnownHostsFile=/dev/null \ | |
debian@$primary \ | |
'sudo rm /etc/apache2/sites-enabled/*; sudo a2ensite sf-example.conf; sudo apachectl graceful' | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no \ | |
-o UserKnownHostsFile=/dev/null \ | |
debian@$primary 'sudo chmod ugo+r /etc/sf/*' | |
# The sf-backup script deliberately clobbers keys in its output, so we need | |
# to repair the system key before we can use command line tools. | |
- name: Repair clobbered system key | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no \ | |
-o UserKnownHostsFile=/dev/null \ | |
debian@$primary '. /etc/sf/sfrc; /srv/shakenfist/venv/bin/sf-ctl bootstrap-system-key deploy ${SHAKENFIST_KEY}' | |
- name: List current nodes | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
ssh -i /srv/github/id_ci -o StrictHostKeyChecking=no \ | |
-o UserKnownHostsFile=/dev/null \ | |
debian@$primary '. /etc/sf/sfrc; sf-client node list' | |
- name: Wait for five minutes | |
run: sleep 300 | |
- name: Check logs | |
if: always() | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
cd ${GITHUB_WORKSPACE}/actions | |
tools/run_remote ${primary} "sudo bash tools/ci_log_checks.sh develop upgrade-0-6-15" | |
- name: Check for reasonable data rates | |
timeout-minutes: 5 | |
if: always() | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
cd ${GITHUB_WORKSPACE}/actions | |
tools/run_remote ${primary} "sudo bash tools/ci_event_checks.sh develop" | |
- name: Fetch and tweak inventory | |
if: always() | |
run: | | |
. ${GITHUB_WORKSPACE}/ci-environment.sh | |
scp -i /srv/github/id_ci -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \ | |
debian@$primary:/etc/sf/inventory.yaml /srv/github/ | |
sed -i 's|/root/.ssh|/home/debian/.ssh|g' /srv/github/inventory.yaml | |
echo "=====" | |
cat /srv/github/inventory.yaml | |
- name: Gather logs | |
if: always() | |
run: | | |
set -x | |
# We need the ssh key in the place ansible expects it to be, which isn't | |
# true on the CI worker node. | |
cp /srv/github/id_ci /home/debian/.ssh/id_rsa | |
cp /srv/github/id_ci.pub /home/debian/.ssh/id_rsa.pub | |
ansible-playbook -i /srv/github/inventory.yaml \ | |
--extra-vars "base_image_user=debian ansible_ssh_common_args='-o StrictHostKeyChecking=no'" \ | |
${GITHUB_WORKSPACE}/actions/ansible/ci-gather-logs.yml | |
- uses: actions/upload-artifact@v4 | |
if: always() | |
with: | |
name: bundle-functional-upgrade-0-6-15 | |
retention-days: 90 | |
if-no-files-found: error | |
path: /srv/github/artifacts/bundle.zip | |
# Failures get announced here before we cancel stuff | |
- uses: slackapi/[email protected] | |
if: failure() | |
with: | |
method: chat.update | |
token: "${{ secrets.SLACK_BOT_TOKEN }}" | |
payload: | | |
channel: ${{ secrets.SLACK_CHANNEL_SHAKENFIST_CI }} | |
ts: "${{ needs.merge_pr_comment.outputs.ts }}" | |
text: | | |
The merge attempt job at https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} failed. | |
The PR commit message is: | |
${{ needs.merge_pr_comment.outputs.commit }} | |
attachments: | |
- color: "ee9090" | |
fields: | |
- title: "Status" | |
short: true | |
value: "Failure in upgrade tests" | |
# We do this instead of "fast-fail: true" because we want to cancel jobs | |
# outside of the matrix as well. | |
# - uses: andymckay/[email protected] | |
# if: failure() | |
# name: Cancel entire workflow on failure | |
enqueue_pr_comment: | |
if: always() && github.event_name == 'pull_request' | |
runs-on: ubuntu-latest | |
permissions: | |
pull-requests: write | |
outputs: | |
ts: "${{ steps.slack.outputs.ts }}" | |
commit: "${{ steps.commit_message.outputs.commit }}" | |
steps: | |
- name: Log the github event | |
run: | | |
cat - <<EOF | |
${{ toJSON(github.event.pull_request) }}" | |
EOF | |
- name: Tell people a PR test attempt is running | |
id: slack | |
uses: slackapi/[email protected] | |
with: | |
method: chat.postMessage | |
token: "${{ secrets.SLACK_BOT_TOKEN }}" | |
payload: | | |
channel: ${{ secrets.SLACK_CHANNEL_SHAKENFIST_CI }} | |
text: | | |
A PR test attempt is running at https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} :rocket: | |
PR title: ${{ github.event.pull_request.title }} | |
PR link: ${{ github.event.pull_request.html_url }} | |
PR author: ${{ github.event.pull_request.user.login }} | |
attachments: | |
- color: "9090ee" | |
fields: | |
- title: "Status" | |
short: true | |
value: "In Progress" | |
merge_pr_comment: | |
if: always() && github.event_name == 'merge_group' | |
runs-on: ubuntu-latest | |
permissions: | |
pull-requests: write | |
outputs: | |
ts: "${{ steps.slack.outputs.ts }}" | |
commit: "${{ steps.commit_message.outputs.commit }}" | |
steps: | |
- name: Log the github event | |
run: | | |
cat - <<EOF | |
${{ toJSON(github.event.merge_group) }}" | |
EOF | |
- name: Reformat the commit message as a quoted block | |
id: commit_message | |
run: | | |
commit="${{ github.event.merge_group.head_commit.message }}" | |
echo "Commit message is:" | |
echo "${commit}" | |
echo | |
commit=$(echo ${commit} | sed 's/^/> /') | |
echo "Quoted message is:" | |
echo "${commit}" | |
echo | |
echo "commit<<EOF" >> ${GITHUB_OUTPUT} | |
echo "${commit}" >> ${GITHUB_OUTPUT} | |
echo "EOF" >> ${GITHUB_OUTPUT} | |
echo "GITHUB_OUTPUT is:" | |
cat ${GITHUB_OUTPUT} | |
- name: Tell people a merge attempt is running | |
id: slack | |
uses: slackapi/[email protected] | |
with: | |
method: chat.postMessage | |
token: "${{ secrets.SLACK_BOT_TOKEN }}" | |
payload: | | |
channel: ${{ secrets.SLACK_CHANNEL_SHAKENFIST_CI }} | |
text: | | |
A merge queue attempt is running at https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} :rocket: | |
The PR commit message is: | |
${{ steps.commit_message.outputs.commit }} | |
attachments: | |
- color: "9090ee" | |
fields: | |
- title: "Status" | |
short: true | |
value: "In Progress" | |
can_see_status: | |
runs-on: ubuntu-latest | |
steps: | |
- name: "Immediate success" | |
run: true | |
can_enqueue: | |
needs: | |
- functional_matrix_pr | |
- enqueue_pr_comment | |
- sanity_checks | |
if: always() && github.event_name != 'merge_group' | |
permissions: | |
actions: read | |
runs-on: ubuntu-latest | |
steps: | |
- env: | |
NEEDS_JSON: "${{toJSON(needs)}}" | |
name: Transform outcomes | |
run: | | |
echo "ALL_SUCCESS=$(echo "$NEEDS_JSON" | jq '. | to_entries | map([.value.result == "success", .value.result == "skipped"] | any) | all')" >>$GITHUB_ENV | |
- name: Check outcomes | |
run: "[ $ALL_SUCCESS == true ]" | |
- uses: slackapi/[email protected] | |
if: success() | |
with: | |
method: chat.update | |
token: "${{ secrets.SLACK_BOT_TOKEN }}" | |
payload: | | |
channel: ${{ secrets.SLACK_CHANNEL_SHAKENFIST_CI }} | |
ts: "${{ needs.enqueue_pr_comment.outputs.ts }}" | |
text: | | |
The PR test attempt job at https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} succeeded. | |
PR title: ${{ github.event.pull_request.title }} | |
PR link: ${{ github.event.pull_request.html_url }} | |
PR author: ${{ github.event.pull_request.user.login }} | |
attachments: | |
- color: "90ee90" | |
fields: | |
- title: "Status" | |
short: true | |
value: "Success" | |
can_merge: | |
needs: | |
- functional_matrix_merge | |
- ansible_modules | |
- node_lifecycle | |
- upgrade_0_6_15 | |
- merge_pr_comment | |
if: always() && github.event_name == 'merge_group' | |
permissions: | |
actions: read | |
runs-on: ubuntu-latest | |
steps: | |
- env: | |
NEEDS_JSON: "${{toJSON(needs)}}" | |
name: Transform outcomes | |
run: | | |
echo "ALL_SUCCESS=$(echo "$NEEDS_JSON" | jq '. | to_entries | map([.value.result == "success", .value.result == "skipped"] | any) | all')" >>$GITHUB_ENV | |
- name: Check outcomes | |
run: "[ $ALL_SUCCESS == true ]" | |
- uses: slackapi/[email protected] | |
if: success() | |
with: | |
method: chat.update | |
token: "${{ secrets.SLACK_BOT_TOKEN }}" | |
payload: | | |
channel: ${{ secrets.SLACK_CHANNEL_SHAKENFIST_CI }} | |
ts: "${{ needs.merge_pr_comment.outputs.ts }}" | |
text: | |
The merge attempt job at https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} succeeded. | |
The PR commit message is: | |
${{ needs.merge_pr_comment.outputs.commit }} | |
attachments: | |
- color: "90ee90" | |
fields: | |
- title: "Status" | |
short: true | |
value: "Success" |