Skip to content

Commit

Permalink
Migrate integration tests to Bazel and K8s
Browse files Browse the repository at this point in the history
Remove the broken docker-compose workflows, rewrite the tests to be
rootlessly runnable via bazel and pin infrastructure outside of Bazel's
build graph in Nix.

Each integration test now spins up a nativelink deployment in K8s, runs
the test and removes the deployment again. All integration tests now
have timeouts to provide faster feedback on failing tests.

Apart from a few dynamically declared IPs the new approach is fully
reproducible and can reuse containers from the existing nix workflow.

The "build nativelink with nativelink" test has been removed as the
LRE/Remote test already covers that usecase.
  • Loading branch information
aaronmondal committed Dec 19, 2023
1 parent df6f5b9 commit f79a26f
Show file tree
Hide file tree
Showing 28 changed files with 563 additions and 276 deletions.
2 changes: 2 additions & 0 deletions .bazelrc
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

common --enable_bzlmod

# Use the earliest supported C++ version for protoc.
build --cxxopt=-std=c++14 --host_cxxopt=-std=c++14

Expand Down
55 changes: 55 additions & 0 deletions .github/workflows/integration.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
---
name: Integration tests

on:
push:
branches: [main]
pull_request:
branches: [main]

permissions: read-all

jobs:
remote:
strategy:
fail-fast: false
matrix:
os: [large-ubuntu-22.04]
name: Integration tests / ${{ matrix.os }}
runs-on: ${{ matrix.os }}
steps:
- name: Checkout
uses: >- # v4.1.1
actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
- name: Install Nix
uses: >- #v7
DeterminateSystems/nix-installer-action@5620eb4af6b562c53e4d4628c0b6e4f9d9ae8612
- name: Cache Nix derivations
uses: >- # Custom commit, last pinned at 2023-11-17.
DeterminateSystems/magic-nix-cache-action@a04e6275a6bea232cd04fc6f3cbf20d4cb02a3e1
- name: Start Kubernetes cluster
run: >
nix develop --impure --command
bash -c "./deployment-examples/kubernetes/00_infra.sh \
&& ./deployment-examples/kubernetes/01_operations.sh"
- name: Run warmup tests (intentionally fail)
continue-on-error: true
run: |
nix develop --impure --command
bash -c "bazel test integration_tests \
--platforms=@rules_nixpkgs_core//platforms:host"
- name: Clean outer directories
run: >
nix develop --impure --command
bash -c "bazel clean"
- name: Run integration tests
run: >
nix develop --impure --command
bash -c "bazel test integration_tests \
--platforms=@rules_nixpkgs_core//platforms:host"
2 changes: 1 addition & 1 deletion .github/workflows/lre.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ jobs:
- name: Get gateway IPs
id: gateway-ips
run: |
echo "cache_ip=$(kubectl get gtw cache -o=jsonpath='{.status.addresses[0].value}')" >> "$GITHUB_ENV"
echo "cache_ip=$(kubectl get gtw insecure-cache -o=jsonpath='{.status.addresses[0].value}')" >> "$GITHUB_ENV"
echo "scheduler_ip=$(kubectl get gtw scheduler -o=jsonpath='{.status.addresses[0].value}')" >> "$GITHUB_ENV"
- name: Print cluster state
Expand Down
91 changes: 0 additions & 91 deletions .github/workflows/main.yml

This file was deleted.

10 changes: 10 additions & 0 deletions BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,13 @@ sh_test(
name = "dummy_test",
srcs = [":dummy_test_sh"],
)

sh_library(
name = "current_tag",
srcs = ["@nativelink-current-tag//:bin/nativelink-current-tag"],
target_compatible_with = select({
"@rules_nixpkgs_core//constraints:support_nix": [],
"//conditions:default": ["@platforms//:incompatible"],
}),
visibility = ["//visibility:public"],
)
23 changes: 23 additions & 0 deletions MODULE.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
module(
name = "nativelink",
version = "0.0.0",
)

bazel_dep(
name = "rules_bazel_integration_test",
version = "0.20.0",
dev_dependency = True,
)

bazel_binaries = use_extension(
"@rules_bazel_integration_test//:extensions.bzl",
"bazel_binaries",
dev_dependency = True,
)
bazel_binaries.download(version_file = "//:.bazelversion")
use_repo(
bazel_binaries,
"bazel_binaries",
"bazel_binaries_bazelisk",
"build_bazel_bazel_.bazelversion",
)
26 changes: 26 additions & 0 deletions WORKSPACE.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,29 @@ http_archive(
load("@com_google_protobuf//:protobuf_deps.bzl", "protobuf_deps")

protobuf_deps()

http_archive(
name = "io_tweag_rules_nixpkgs",
sha256 = "980edfceef2e59e1122d9be6c52413bc298435f0a3d452532b8a48d7562ffd67",
strip_prefix = "rules_nixpkgs-0.10.0",
urls = [
"https://github.com/tweag/rules_nixpkgs/releases/download/v0.10.0/rules_nixpkgs-0.10.0.tar.gz",
],
)

load(
"@io_tweag_rules_nixpkgs//nixpkgs:repositories.bzl",
"rules_nixpkgs_dependencies",
)

rules_nixpkgs_dependencies()

load("@io_tweag_rules_nixpkgs//nixpkgs:nixpkgs.bzl", "nixpkgs_flake_package")

nixpkgs_flake_package(
name = "nativelink-current-tag",
fail_not_supported = False,
nix_flake_file = "//:flake.nix",
nix_flake_lock_file = "//:flake.lock",
package = "currentTag",
)
1 change: 1 addition & 0 deletions deployment-examples/kubernetes/00_infra.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/usr/bin/env bash
# This script sets up a local development cluster. It's roughly equivalent to
# a managed K8s setup.

Expand Down
9 changes: 5 additions & 4 deletions deployment-examples/kubernetes/01_operations.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/usr/bin/env bash
# This script configures a cluster with a few standard deployments.

# TODO(aaronmondal): Add Grafana, OpenTelemetry and the various other standard
Expand All @@ -7,11 +8,11 @@ set -xeuo pipefail

SRC_ROOT=$(git rev-parse --show-toplevel)

kubectl apply -f ${SRC_ROOT}/deployment-examples/kubernetes/gateway.yaml
kubectl apply -f "$SRC_ROOT"/deployment-examples/kubernetes/gateway.yaml

IMAGE_TAG=$(nix eval .#image.imageTag --raw)
$(nix build .#image --print-build-logs --verbose) \
nix build .#image --print-build-logs --verbose \
&& ./result \
| skopeo \
copy \
Expand All @@ -21,9 +22,9 @@ $(nix build .#image --print-build-logs --verbose) \
IMAGE_TAG=$(nix eval .#lre.imageTag --raw)
echo $IMAGE_TAG
echo "$IMAGE_TAG"
$(nix build .#lre --print-build-logs --verbose) \
nix build .#lre --print-build-logs --verbose \
&& ./result \
| skopeo \
copy \
Expand Down
19 changes: 19 additions & 0 deletions deployment-examples/kubernetes/02_application.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/usr/bin/env bash
# Get the nix derivation hash from the toolchain container, change the
# `TOOLCHAIN_TAG` variable in the `worker.json.template` to that hash and apply
# the configuration.
Expand All @@ -13,3 +14,21 @@ kubectl apply -k "$KUSTOMIZE_DIR"
kubectl rollout status deploy/nativelink-cas
kubectl rollout status deploy/nativelink-scheduler
kubectl rollout status deploy/nativelink-worker
# Verify endpoint reachability.
INSECURE_CACHE=$(kubectl get gtw insecure-cache -o=jsonpath='{.status.addresses[0].value}')
SCHEDULER=$(kubectl get gtw scheduler -o=jsonpath='{.status.addresses[0].value}')
CACHE=$(kubectl get gtw cache -o=jsonpath='{.status.addresses[0].value}')
PROMETHEUS=$(kubectl get gtw prometheus -o=jsonpath='{.status.addresses[0].value}')
printf "
Insecure Cache IP: $INSECURE_CACHE -> --remote_cache=grpc://$INSECURE_CACHE:50051
Cache IP: $CACHE
Scheduler IP: $SCHEDULER -> --remote_executor=grpc://$SCHEDULER:50052
Prometheus IP: $PROMETHEUS
Insecure cache status: $(curl http://"$INSECURE_CACHE":50051/status)
Cache status: $(curl https://"$CACHE":50071/status)
Scheduler status: $(curl http://"$SCHEDULER":50052/status)
Prometheus status: $(curl http://"$PROMETHEUS":50061/status)
"
1 change: 1 addition & 0 deletions deployment-examples/kubernetes/03_delete_application.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/usr/bin/env bash
# Get the nix derivation hash from the toolchain container, change the
# `TOOLCHAIN_TAG` variable in the `worker.json.template` to that hash and delete
# the configuration.
Expand Down
35 changes: 35 additions & 0 deletions deployment-examples/kubernetes/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
sh_library(
name = "kustomization",
srcs = [
"cas.json",
"cas.yaml",
"example-do-not-use-in-prod-key.pem",
"example-do-not-use-in-prod-rootca.crt",
"kustomization.yaml",
"routes.yaml",
"scheduler.json",
"scheduler.yaml",
"worker.json.template",
"worker.yaml",
],
visibility = ["//visibility:public"],
)

# This target is used by end-to-end tests running under k8s. To deploy a test
# envirionment, make sure to have a k8s cluster running before invoking the test
# and add this to the testscript:
#
# source $(rlocation nativelink/deployment-examples/kubernetes/k8s)
#
sh_library(
name = "bazel_k8s_prelude",
srcs = ["bazel_k8s_prelude.sh"],
data = [
":kustomization",
],
visibility = ["//visibility:public"],
deps = [
"@bazel_tools//tools/bash/runfiles",
"@nativelink//:current_tag",
],
)
53 changes: 53 additions & 0 deletions deployment-examples/kubernetes/bazel_k8s_prelude.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/usr/bin/env bash
# Copyright 2022 The Native Link Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# --- begin runfiles.bash initialization v3 ---
# Copy-pasted from the Bazel Bash runfiles library v3.
set -uo pipefail; set +e; f=bazel_tools/tools/bash/runfiles/runfiles.bash
source "${RUNFILES_DIR:-/dev/null}/$f" 2>/dev/null || \
source "$(grep -sm1 "^$f " "${RUNFILES_MANIFEST_FILE:-/dev/null}" | cut -f2- -d' ')" 2>/dev/null || \
source "$0.runfiles/$f" 2>/dev/null || \
source "$(grep -sm1 "^$f " "$0.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \
source "$(grep -sm1 "^$f " "$0.exe.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \
{ echo>&2 "ERROR: cannot find $f"; exit 1; }; f=; set -e
# --- end runfiles.bash initialization v3 ---

NATIVELINK_TAG=$(cat "$(rlocation nativelink-current-tag/bin/nativelink-current-tag)")
KUSTOMIZE_DIR=$(rlocation nativelink/deployment-examples/kubernetes)

remove_resources() {
kubectl kustomize \
--load-restrictor LoadRestrictionsNone \
"$KUSTOMIZE_DIR" \
| kubectl delete -f - \
|| echo "Resource cleanup failed. Manually verify your cluster." >&2
}

trap remove_resources EXIT

sed "s/__NATIVELINK_TOOLCHAIN_TAG__/${NATIVELINK_TAG}/g" \
"$KUSTOMIZE_DIR/worker.json.template" \
> "$KUSTOMIZE_DIR/worker.json"

kubectl kustomize \
--load-restrictor LoadRestrictionsNone \
"$KUSTOMIZE_DIR" \
| kubectl apply -f -

kubectl rollout status deploy/nativelink-cas
kubectl rollout status deploy/nativelink-scheduler
kubectl rollout status deploy/nativelink-worker

# Application code will run here.
Loading

0 comments on commit f79a26f

Please sign in to comment.