diff --git a/.github/workflows/build_docker_image.yaml b/.github/workflows/build_docker_image.yaml index c26b1d48..c93daf04 100644 --- a/.github/workflows/build_docker_image.yaml +++ b/.github/workflows/build_docker_image.yaml @@ -20,22 +20,24 @@ jobs: fail-fast: false matrix: include: - - dockerfile: docker/reginald/Dockerfile - image: ghcr.io/${{ github.repository }}_reginald + - dockerfile: docker/run_all/Dockerfile + image: ghcr.io/${{ github.repository }}_run_all - dockerfile: docker/slack_bot/Dockerfile image: ghcr.io/${{ github.repository }}_slackbot - dockerfile: docker/create_index/Dockerfile image: ghcr.io/${{ github.repository }}_create_index + - dockerfile: docker/reginald_app/Dockerfile + image: ghcr.io/${{ github.repository }}_app permissions: packages: write contents: read steps: - name: Check out the repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Log in to the Container registry - uses: docker/login-action@v2 + uses: docker/login-action@v3.2.0 with: registry: ghcr.io username: ${{ github.actor }} @@ -43,12 +45,12 @@ jobs: - name: Extract metadata (tags, labels) for Docker id: meta - uses: docker/metadata-action@v4 + uses: docker/metadata-action@v5.5.0 with: images: ${{ matrix.image }} - name: Build and push Docker images - uses: docker/build-push-action@v4 + uses: docker/build-push-action@v5.4.0 with: file: ${{ matrix.dockerfile }} push: true diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index e21a1257..3de97512 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -16,7 +16,7 @@ jobs: name: Lint with pre-commit runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: actions/setup-python@v4 with: python-version: 3.11 diff --git a/azure/hack_week/__main__.py b/azure/hack_week/__main__.py index ee6c9b2f..3b014530 100644 --- a/azure/hack_week/__main__.py +++ b/azure/hack_week/__main__.py @@ -7,6 +7,8 @@ storage, ) +from reginald.defaults import DEFAULT_ARGS + # Get some configuration variables stack_name = pulumi.get_stack() config = pulumi.Config() @@ -98,7 +100,7 @@ containers=[ # Reginald chat completion container containerinstance.ContainerArgs( - image="ghcr.io/alan-turing-institute/reginald_reginald:main", + image="ghcr.io/alan-turing-institute/reginald_run_all:main", name="reginald-completion", # maximum of 63 characters environment_variables=[ containerinstance.EnvironmentVariableArgs( @@ -141,7 +143,7 @@ ), # Reginald (public) container containerinstance.ContainerArgs( - image="ghcr.io/alan-turing-institute/reginald_reginald:main", + image="ghcr.io/alan-turing-institute/reginald_run_all:main", name="reginald-gpt-azure", # maximum of 63 characters environment_variables=[ containerinstance.EnvironmentVariableArgs( @@ -166,23 +168,23 @@ ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_MAX_INPUT_SIZE", - value="4096", + value=str(DEFAULT_ARGS["max_input_size"]), ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_K", - value="3", + value=str(DEFAULT_ARGS["k"]), ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_CHUNK_SIZE", - value="512", + value=str(DEFAULT_ARGS["chunk_size"]), ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_CHUNK_OVERLAP_RATIO", - value="0.1", + value=str(DEFAULT_ARGS["chunk_overlap_ratio"]), ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_NUM_OUTPUT", - value="512", + value=str(DEFAULT_ARGS["num_output"]), ), containerinstance.EnvironmentVariableArgs( name="OPENAI_AZURE_API_BASE", @@ -204,8 +206,8 @@ ports=[], resources=containerinstance.ResourceRequirementsArgs( requests=containerinstance.ResourceRequestsArgs( - cpu=1, - memory_in_gb=12, + cpu=3, + memory_in_gb=14, ), ), volume_mounts=[ @@ -253,30 +255,30 @@ ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_MAX_INPUT_SIZE", - value="4096", + value=str(DEFAULT_ARGS["max_input_size"]), ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_K", - value="3", + value=str(DEFAULT_ARGS["k"]), ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_CHUNK_SIZE", - value="512", + value=str(DEFAULT_ARGS["chunk_size"]), ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_CHUNK_OVERLAP_RATIO", - value="0.1", + value=str(DEFAULT_ARGS["chunk_overlap_ratio"]), ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_NUM_OUTPUT", - value="512", + value=str(DEFAULT_ARGS["num_output"]), ), ], ports=[], resources=containerinstance.ResourceRequirementsArgs( requests=containerinstance.ResourceRequestsArgs( - cpu=4, - memory_in_gb=16, + cpu=1, + memory_in_gb=8, ), ), volume_mounts=[ diff --git a/azure/production/__main__.py b/azure/production/__main__.py index 8e5c56aa..1bf31f94 100644 --- a/azure/production/__main__.py +++ b/azure/production/__main__.py @@ -7,6 +7,8 @@ storage, ) +from reginald.defaults import DEFAULT_ARGS + # Get some configuration variables stack_name = pulumi.get_stack() config = pulumi.Config() @@ -142,7 +144,7 @@ # all_data index creation container containerinstance.ContainerArgs( image="ghcr.io/alan-turing-institute/reginald_create_index:main", - name="reginald-create-index", # maximum of 63 characters + name="reginald-create-index-all-data", # maximum of 63 characters environment_variables=[ containerinstance.EnvironmentVariableArgs( name="GITHUB_TOKEN", @@ -154,30 +156,126 @@ ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_MAX_INPUT_SIZE", - value="4096", + value=str(DEFAULT_ARGS["max_input_size"]), + ), + containerinstance.EnvironmentVariableArgs( + name="LLAMA_INDEX_K", + value=str(DEFAULT_ARGS["k"]), + ), + containerinstance.EnvironmentVariableArgs( + name="LLAMA_INDEX_CHUNK_SIZE", + value=str(DEFAULT_ARGS["chunk_size"]), + ), + containerinstance.EnvironmentVariableArgs( + name="LLAMA_INDEX_CHUNK_OVERLAP_RATIO", + value=str(DEFAULT_ARGS["chunk_overlap_ratio"]), + ), + containerinstance.EnvironmentVariableArgs( + name="LLAMA_INDEX_NUM_OUTPUT", + value=str(DEFAULT_ARGS["num_output"]), + ), + ], + ports=[], + resources=containerinstance.ResourceRequirementsArgs( + requests=containerinstance.ResourceRequestsArgs( + cpu=2, + memory_in_gb=6, + ), + ), + volume_mounts=[ + containerinstance.VolumeMountArgs( + mount_path="/app/data", + name="llama-data", + ), + ], + ), + # reg index creation container + containerinstance.ContainerArgs( + image="ghcr.io/alan-turing-institute/reginald_create_index:main", + name="reginald-create-index-reg", # maximum of 63 characters + environment_variables=[ + containerinstance.EnvironmentVariableArgs( + name="GITHUB_TOKEN", + secure_value=config.get_secret("GITHUB_TOKEN"), + ), + containerinstance.EnvironmentVariableArgs( + name="LLAMA_INDEX_WHICH_INDEX", + value="reg", + ), + containerinstance.EnvironmentVariableArgs( + name="LLAMA_INDEX_MAX_INPUT_SIZE", + value=str(DEFAULT_ARGS["max_input_size"]), ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_K", - value="3", + value=str(DEFAULT_ARGS["k"]), ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_CHUNK_SIZE", - value="512", + value=str(DEFAULT_ARGS["chunk_size"]), ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_CHUNK_OVERLAP_RATIO", - value="0.1", + value=str(DEFAULT_ARGS["chunk_overlap_ratio"]), ), containerinstance.EnvironmentVariableArgs( name="LLAMA_INDEX_NUM_OUTPUT", - value="512", + value=str(DEFAULT_ARGS["num_output"]), ), ], ports=[], resources=containerinstance.ResourceRequirementsArgs( requests=containerinstance.ResourceRequestsArgs( - cpu=4, - memory_in_gb=16, + cpu=1, + memory_in_gb=5, + ), + ), + volume_mounts=[ + containerinstance.VolumeMountArgs( + mount_path="/app/data", + name="llama-data", + ), + ], + ), + # public index creation container + containerinstance.ContainerArgs( + image="ghcr.io/alan-turing-institute/reginald_create_index:main", + name="reginald-create-index-public", # maximum of 63 characters + environment_variables=[ + containerinstance.EnvironmentVariableArgs( + name="GITHUB_TOKEN", + secure_value=config.get_secret("GITHUB_TOKEN"), + ), + containerinstance.EnvironmentVariableArgs( + name="LLAMA_INDEX_WHICH_INDEX", + value="public", + ), + containerinstance.EnvironmentVariableArgs( + name="LLAMA_INDEX_MAX_INPUT_SIZE", + value=str(DEFAULT_ARGS["max_input_size"]), + ), + containerinstance.EnvironmentVariableArgs( + name="LLAMA_INDEX_K", + value=str(DEFAULT_ARGS["k"]), + ), + containerinstance.EnvironmentVariableArgs( + name="LLAMA_INDEX_CHUNK_SIZE", + value=str(DEFAULT_ARGS["chunk_size"]), + ), + containerinstance.EnvironmentVariableArgs( + name="LLAMA_INDEX_CHUNK_OVERLAP_RATIO", + value=str(DEFAULT_ARGS["chunk_overlap_ratio"]), + ), + containerinstance.EnvironmentVariableArgs( + name="LLAMA_INDEX_NUM_OUTPUT", + value=str(DEFAULT_ARGS["num_output"]), + ), + ], + ports=[], + resources=containerinstance.ResourceRequirementsArgs( + requests=containerinstance.ResourceRequestsArgs( + cpu=1, + memory_in_gb=5, ), ), volume_mounts=[ @@ -190,7 +288,7 @@ ], os_type=containerinstance.OperatingSystemTypes.LINUX, resource_group_name=resource_group.name, - restart_policy=containerinstance.ContainerGroupRestartPolicy.NEVER, + restart_policy=containerinstance.ContainerGroupRestartPolicy.ON_FAILURE, sku=containerinstance.ContainerGroupSku.STANDARD, volumes=[ containerinstance.VolumeArgs( diff --git a/docker/create_index/Dockerfile b/docker/create_index/Dockerfile index e9de7ee7..0840e7e8 100644 --- a/docker/create_index/Dockerfile +++ b/docker/create_index/Dockerfile @@ -1,14 +1,22 @@ -FROM python:3.11.4 +FROM python:3.11.9 WORKDIR /app # Setup Python prerequisites -RUN pip3 install --upgrade pip poetry setuptools wheel +RUN pip3 install --upgrade pip poetry==1.8.3 setuptools wheel + +ENV POETRY_NO_INTERACTION=1 \ + POETRY_VIRTUALENVS_IN_PROJECT=1 \ + POETRY_VIRTUALENVS_CREATE=1 \ + POETRY_CACHE_DIR=/tmp/poetry_cache # Build Python project -COPY reginald reginald COPY pyproject.toml . -COPY README.md . -RUN poetry install +RUN touch README.md +RUN poetry install --without dev --no-root && rm -rf $POETRY_CACHE_DIR + +COPY reginald reginald + +RUN poetry install --without dev -CMD ["poetry", "run", "reginald_create_index"] +CMD ["poetry", "run", "reginald", "create-index"] diff --git a/docker/reginald/Dockerfile b/docker/reginald/Dockerfile deleted file mode 100644 index 78471694..00000000 --- a/docker/reginald/Dockerfile +++ /dev/null @@ -1,14 +0,0 @@ -FROM python:3.11.4 - -WORKDIR /app - -# Setup Python prerequisites -RUN pip3 install --upgrade pip poetry setuptools wheel - -# Build Python project -COPY reginald reginald -COPY pyproject.toml . -COPY README.md . -RUN poetry install - -CMD ["poetry", "run", "reginald_run"] diff --git a/docker/reginald_app/Dockerfile b/docker/reginald_app/Dockerfile new file mode 100644 index 00000000..097e58b3 --- /dev/null +++ b/docker/reginald_app/Dockerfile @@ -0,0 +1,22 @@ +FROM python:3.11.9 + +WORKDIR /app + +# Setup Python prerequisites +RUN pip3 install --upgrade pip poetry setuptools wheel + +ENV POETRY_NO_INTERACTION=1 \ + POETRY_VIRTUALENVS_IN_PROJECT=1 \ + POETRY_VIRTUALENVS_CREATE=1 \ + POETRY_CACHE_DIR=/tmp/poetry_cache + +# Build Python project +COPY pyproject.toml . +RUN touch README.md +RUN poetry install --without dev --no-root && rm -rf $POETRY_CACHE_DIR + +COPY reginald reginald + +RUN poetry install --extras api_bot --without dev + +CMD ["poetry", "run", "reginald", "app"] diff --git a/docker/run_all/Dockerfile b/docker/run_all/Dockerfile new file mode 100644 index 00000000..0bdeaf04 --- /dev/null +++ b/docker/run_all/Dockerfile @@ -0,0 +1,22 @@ +FROM python:3.11.9 + +WORKDIR /app + +# Setup Python prerequisites +RUN pip3 install --upgrade pip poetry setuptools wheel + +ENV POETRY_NO_INTERACTION=1 \ + POETRY_VIRTUALENVS_IN_PROJECT=1 \ + POETRY_VIRTUALENVS_CREATE=1 \ + POETRY_CACHE_DIR=/tmp/poetry_cache + +# Build Python project +COPY pyproject.toml . +RUN touch README.md +RUN poetry install --without dev --no-root && rm -rf $POETRY_CACHE_DIR + +COPY reginald reginald + +RUN poetry install --without dev + +CMD ["poetry", "run", "reginald", "run-all"] diff --git a/docker/slack_bot/Dockerfile b/docker/slack_bot/Dockerfile index 964c39f7..05c07306 100644 --- a/docker/slack_bot/Dockerfile +++ b/docker/slack_bot/Dockerfile @@ -1,14 +1,22 @@ -FROM python:3.11.4 +FROM python:3.11.9 WORKDIR /app # Setup Python prerequisites RUN pip3 install --upgrade pip poetry setuptools wheel +ENV POETRY_NO_INTERACTION=1 \ + POETRY_VIRTUALENVS_IN_PROJECT=1 \ + POETRY_VIRTUALENVS_CREATE=1 \ + POETRY_CACHE_DIR=/tmp/poetry_cache + # Build Python project -COPY reginald reginald COPY pyproject.toml . -COPY README.md . -RUN poetry install --extras api_bot +RUN touch README.md +RUN poetry install --without dev --no-root && rm -rf $POETRY_CACHE_DIR + +COPY reginald reginald + +RUN poetry install --extras api_bot --without dev -CMD ["poetry", "run", "reginald_run_api_bot"] +CMD ["poetry", "run", "reginald", "bot"] diff --git a/reginald/models/app.py b/reginald/models/app.py index 04808e40..beedae0a 100644 --- a/reginald/models/app.py +++ b/reginald/models/app.py @@ -43,7 +43,7 @@ async def channel_mention(query: Query): return app -async def run_reginald_app(**kwargs) -> None: +def run_reginald_app(**kwargs) -> None: # set up response model response_model = setup_llm(**kwargs) app: FastAPI = create_reginald_app(response_model) diff --git a/reginald/models/llama_index/llama_utils.py b/reginald/models/llama_index/llama_utils.py index 65e5a593..75209a1b 100644 --- a/reginald/models/llama_index/llama_utils.py +++ b/reginald/models/llama_index/llama_utils.py @@ -91,7 +91,7 @@ def setup_settings( # initialise embedding model to use to create the index vectors embed_model = HuggingFaceEmbedding( model_name="sentence-transformers/all-mpnet-base-v2", - embed_batch_size=128, + embed_batch_size=64, ) # construct the prompt helper diff --git a/reginald/run.py b/reginald/run.py index ef1c6fb1..75a3cd69 100644 --- a/reginald/run.py +++ b/reginald/run.py @@ -40,9 +40,7 @@ def main( elif cli == "app": from reginald.models.app import run_reginald_app - asyncio.run( - run_reginald_app(data_dir=data_dir, which_index=which_index, **kwargs) - ) + run_reginald_app(data_dir=data_dir, which_index=which_index, **kwargs) elif cli == "chat": import warnings