feat(llama.cpp): add distributed llama.cpp inferencing #2064

Workflow file for this run

.github/workflows/image-pr.yml at b33224b

	---
	name: 'build container images tests'

	on:
	pull_request:

	concurrency:
	group: ci-${{ github.head_ref \|\| github.ref }}-${{ github.repository }}
	cancel-in-progress: true

	jobs:
	extras-image-build:
	uses: ./.github/workflows/image_build.yml
	with:
	tag-latest: ${{ matrix.tag-latest }}
	tag-suffix: ${{ matrix.tag-suffix }}
	ffmpeg: ${{ matrix.ffmpeg }}
	image-type: ${{ matrix.image-type }}
	build-type: ${{ matrix.build-type }}
	cuda-major-version: ${{ matrix.cuda-major-version }}
	cuda-minor-version: ${{ matrix.cuda-minor-version }}
	platforms: ${{ matrix.platforms }}
	runs-on: ${{ matrix.runs-on }}
	base-image: ${{ matrix.base-image }}
	grpc-base-image: ${{ matrix.grpc-base-image }}
	makeflags: ${{ matrix.makeflags }}
	secrets:
	dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
	dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
	quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
	quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
	strategy:
	# Pushing with all jobs in parallel
	# eats the bandwidth of all the nodes
	max-parallel: ${{ github.event_name != 'pull_request' && 2 \|\| 4 }}
	matrix:
	include:
	- build-type: ''
	platforms: 'linux/amd64'
	tag-latest: 'false'
	tag-suffix: '-ffmpeg'
	ffmpeg: 'true'
	image-type: 'extras'
	runs-on: 'arc-runner-set'
	base-image: "ubuntu:22.04"
	makeflags: "--jobs=3 --output-sync=target"
	- build-type: 'cublas'
	cuda-major-version: "12"
	cuda-minor-version: "1"
	platforms: 'linux/amd64'
	tag-latest: 'false'
	tag-suffix: '-cublas-cuda12-ffmpeg'
	ffmpeg: 'true'
	image-type: 'extras'
	runs-on: 'arc-runner-set'
	base-image: "ubuntu:22.04"
	makeflags: "--jobs=3 --output-sync=target"
	- build-type: 'hipblas'
	platforms: 'linux/amd64'
	tag-latest: 'false'
	tag-suffix: '-hipblas'
	ffmpeg: 'false'
	image-type: 'extras'
	base-image: "rocm/dev-ubuntu-22.04:6.1"
	grpc-base-image: "ubuntu:22.04"
	runs-on: 'arc-runner-set'
	makeflags: "--jobs=3 --output-sync=target"
	- build-type: 'sycl_f16'
	platforms: 'linux/amd64'
	tag-latest: 'false'
	base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
	grpc-base-image: "ubuntu:22.04"
	tag-suffix: 'sycl-f16-ffmpeg'
	ffmpeg: 'true'
	image-type: 'extras'
	runs-on: 'arc-runner-set'
	makeflags: "--jobs=3 --output-sync=target"
	core-image-build:
	uses: ./.github/workflows/image_build.yml
	with:
	tag-latest: ${{ matrix.tag-latest }}
	tag-suffix: ${{ matrix.tag-suffix }}
	ffmpeg: ${{ matrix.ffmpeg }}
	image-type: ${{ matrix.image-type }}
	build-type: ${{ matrix.build-type }}
	cuda-major-version: ${{ matrix.cuda-major-version }}
	cuda-minor-version: ${{ matrix.cuda-minor-version }}
	platforms: ${{ matrix.platforms }}
	runs-on: ${{ matrix.runs-on }}
	base-image: ${{ matrix.base-image }}
	grpc-base-image: ${{ matrix.grpc-base-image }}
	makeflags: ${{ matrix.makeflags }}
	secrets:
	dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
	dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
	quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
	quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
	strategy:
	matrix:
	include:
	- build-type: ''
	platforms: 'linux/amd64'
	tag-latest: 'false'
	tag-suffix: '-ffmpeg-core'
	ffmpeg: 'true'
	image-type: 'core'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	makeflags: "--jobs=4 --output-sync=target"
	- build-type: 'sycl_f16'
	platforms: 'linux/amd64'
	tag-latest: 'false'
	base-image: "intel/oneapi-basekit:2024.1.0-devel-ubuntu22.04"
	grpc-base-image: "ubuntu:22.04"
	tag-suffix: 'sycl-f16-ffmpeg-core'
	ffmpeg: 'true'
	image-type: 'core'
	runs-on: 'arc-runner-set'
	makeflags: "--jobs=3 --output-sync=target"
	- build-type: 'cublas'
	cuda-major-version: "12"
	cuda-minor-version: "1"
	platforms: 'linux/amd64'
	tag-latest: 'false'
	tag-suffix: '-cublas-cuda12-ffmpeg-core'
	ffmpeg: 'true'
	image-type: 'core'
	runs-on: 'ubuntu-latest'
	base-image: "ubuntu:22.04"
	makeflags: "--jobs=4 --output-sync=target"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

feat(llama.cpp): add distributed llama.cpp inferencing #2064

Workflow file

feat(llama.cpp): add distributed llama.cpp inferencing #2064

Jobs

Run details

Workflow file for this run