diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml new file mode 100644 index 00000000..b66ab7f8 --- /dev/null +++ b/.github/workflows/benchmark.yml @@ -0,0 +1,71 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Benchmarks +on: + push: + branches: [main] + pull_request: + branches: [main] + paths: + - ".github/workflows/benchmark.yml" + - "ci/scripts/bench_adapt.py" + workflow_dispatch: +permissions: + contents: read +jobs: + benchmark: + runs-on: ubuntu-latest + container: debian:12 + strategy: + matrix: + go: ['1.22.7'] + arch: ['amd64'] + steps: + - name: Install dependencies + run: | + apt-get update + apt-get install -y git ca-certificates + - name: Checkout repository + uses: actions/checkout@v4 + with: + submodules: recursive + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.9' + - name: Install Go ${{ matrix.go }} for Benchmarks + uses: actions/setup-go@v5 + with: + go-version: ${{ matrix.go }} + cache: true + cache-dependency-path: go.sum + check-latest: false + - name: Run Benchmarks + if: github.event_name != 'push' + run: bash ci/scripts/bench.sh $(pwd) --json + - name: Upload results + if: github.event_name == 'push' && github.repository == 'apache/arrow-go' && github.ref_name == 'main' + env: + CONBENCH_URL: https://conbench.ursa.dev + CONBENCH_EMAIL: ${{ secrets.CONBENCH_EMAIL }} + CONBENCH_PASSWORD: ${{ secrets.CONBENCH_PASS }} + CONBENCH_REF: ${{ github.ref_name }} + CONBENCH_MACHINE_INFO_NAME: ${{ matrix.arch }}-debian-12 + run: | + python3 -m pip install benchadapt@git+https://github.com/conbench/conbench.git@main#subdirectory=benchadapt/python + python3 ci/scripts/bench_adapt.py diff --git a/ci/scripts/bench.sh b/ci/scripts/bench.sh new file mode 100644 index 00000000..597b2a19 --- /dev/null +++ b/ci/scripts/bench.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# this will output the benchmarks to STDOUT but if `-json` is passed +# as the second argument, it will create a file "bench_stats.json" +# in the directory this is called from containing a json representation + +set -ex + +# Validate input arguments +if [ -z "$1" ]; then + echo "Error: Missing source directory argument" + exit 1 +fi + +source_dir="$1" + +PARQUET_TEST_DATA="${source_dir}/parquet-testing/data" +export PARQUET_TEST_DATA + +pushd "${source_dir}" + +# lots of benchmarks, they can take a while +# the timeout is for *ALL* benchmarks together, +# not per benchmark +go test -bench=. -benchmem -timeout 40m -run=^$ ./... | tee bench_stat.dat + +popd + +if [[ "$2" = "-json" ]]; then + go install go.bobheadxi.dev/gobenchdata@latest + PATH=$(go env GOPATH)/bin:$PATH + export PATH + cat "${source_dir}"/bench_*.dat | gobenchdata --json bench_stats.json +fi + +rm "${source_dir}"/bench_*.dat diff --git a/ci/scripts/bench_adapt.py b/ci/scripts/bench_adapt.py new file mode 100644 index 00000000..554538f4 --- /dev/null +++ b/ci/scripts/bench_adapt.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python3 +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import json +import os +import uuid +import logging +from pathlib import Path +from typing import List + +from benchadapt import BenchmarkResult +from benchadapt.adapters import BenchmarkAdapter +from benchadapt.log import log + +log.setLevel(logging.DEBUG) + +ARROW_ROOT = Path(__file__).parent.parent.parent.resolve() +SCRIPTS_PATH = ARROW_ROOT / "ci" / "scripts" + +# `github_commit_info` is meant to communicate GitHub-flavored commit +# information to Conbench. See +# https://github.com/conbench/conbench/blob/cf7931f/benchadapt/python/benchadapt/result.py#L66 +# for a specification. +github_commit_info = {"repository": "https://github.com/apache/arrow-go"} + +if os.environ.get("CONBENCH_REF") == "main": + # Assume GitHub Actions CI. The environment variable lookups below are + # expected to fail when not running in GitHub Actions. + github_commit_info = { + "repository": f'{os.environ["GITHUB_SERVER_URL"]}/{os.environ["GITHUB_REPOSITORY"]}', + "commit": os.environ["GITHUB_SHA"], + "pr_number": None, # implying default branch + } + run_reason = "commit" +else: + # Assume that the environment is not GitHub Actions CI. Error out if that + # assumption seems to be wrong. + assert os.getenv("GITHUB_ACTIONS") is None + + # This is probably a local dev environment, for testing. In this case, it + # does usually not make sense to provide commit information (not a + # controlled CI environment). Explicitly leave out "commit" and "pr_number" to + # reflect that (to not send commit information). + + # Reflect 'local dev' scenario in run_reason. Allow user to (optionally) + # inject a custom piece of information into the run reason here, from + # environment. + run_reason = "localdev" + custom_reason_suffix = os.getenv("CONBENCH_CUSTOM_RUN_REASON") + if custom_reason_suffix is not None: + run_reason += f" {custom_reason_suffix.strip()}" + + +class GoAdapter(BenchmarkAdapter): + result_file = "bench_stats.json" + command = ["bash", SCRIPTS_PATH / "bench.sh", ARROW_ROOT, "-json"] + + def __init__(self, *args, **kwargs) -> None: + super().__init__(command=self.command, *args, **kwargs) + + def _transform_results(self) -> List[BenchmarkResult]: + with open(self.result_file, "r") as f: + raw_results = json.load(f) + + run_id = uuid.uuid4().hex + parsed_results = [] + for suite in raw_results[0]["Suites"]: + batch_id = uuid.uuid4().hex + pkg = suite["Pkg"] + + for benchmark in suite["Benchmarks"]: + data = benchmark["Mem"]["MBPerSec"] * 1e6 + time = 1 / benchmark["NsPerOp"] * 1e9 + + name = benchmark["Name"].removeprefix("Benchmark") + ncpu = name[name.rfind("-") + 1 :] + pieces = name[: -(len(ncpu) + 1)].split("/") + + parsed = BenchmarkResult( + run_id=run_id, + batch_id=batch_id, + stats={ + "data": [data], + "unit": "B/s", + "times": [time], + "time_unit": "i/s", + "iterations": benchmark["Runs"], + }, + context={ + "benchmark_language": "Go", + "goos": suite["Goos"], + "goarch": suite["Goarch"], + }, + tags={ + "pkg": pkg, + "num_cpu": ncpu, + "name": pieces[0], + "params": "/".join(pieces[1:]), + }, + run_reason=run_reason, + github=github_commit_info, + ) + parsed.run_name = ( + f"{parsed.run_reason}: {github_commit_info.get('commit')}" + ) + parsed_results.append(parsed) + + return parsed_results + + +if __name__ == "__main__": + go_adapter = GoAdapter(result_fields_override={"info": {}}) + go_adapter() + \ No newline at end of file