From 03541dd765849ec453b501c83dbf4071b317bce1 Mon Sep 17 00:00:00 2001 From: Han Xiao Date: Sun, 19 Jun 2022 23:42:49 +0200 Subject: [PATCH] feat: add cas server dockerfile (#757) * feat: add cas server dockerfile * feat: add cas server dockerfile * feat: add cas server dockerfile * feat: add cas server dockerfile * feat: add cas server dockerfile * feat: add cas server dockerfile * feat: add cas server dockerfile * feat: add cas server dockerfile * feat: add cas server dockerfile * feat: add cas server dockerfile * feat: add cas server dockerfile * feat: add cas server dockerfile * feat: add cas server dockerfile * feat: add cas server dockerfile * feat: add cas server dockerfile * feat: add cas server dockerfile * feat: add cas server dockerfile * fix: absolute path in hub image --- .github/workflows/cd.yml | 7 + .github/workflows/force-docker-build-cas.yml | 146 ++++++++++++++++++ .github/workflows/force-release.yml | 7 + Dockerfiles/base.Dockerfile | 20 +-- Dockerfiles/cuda.Dockerfile | 24 +-- Dockerfiles/server.Dockerfile | 50 ++++++ Dockerfiles/tensorrt.Dockerfile | 21 ++- README.md | 4 +- client/clip_client/__init__.py | 4 +- .../cas-on-colab.ipynb | 0 docs/{playground => hosting}/cas-on-colab.svg | 0 docs/{playground => hosting}/colab-banner.png | Bin docs/{playground => hosting}/colab.md | 2 +- .../images => hosting}/jc-deploy.png | Bin docs/hosting/on-jcloud.md | 65 ++++++++ docs/index.md | 10 +- docs/user-guides/client.md | 2 +- docs/user-guides/server.md | 99 ++---------- server/clip_server/__main__.py | 2 +- server/clip_server/executors/clip_hg.py | 2 +- server/clip_server/executors/clip_onnx.py | 6 +- server/clip_server/executors/clip_tensorrt.py | 6 +- server/clip_server/executors/clip_torch.py | 4 +- server/clip_server/executors/helper.py | 2 +- server/clip_server/helper.py | 2 +- server/clip_server/model/clip.py | 4 +- server/clip_server/model/clip_onnx.py | 2 +- server/clip_server/model/clip_trt.py | 6 +- server/clip_server/model/simple_tokenizer.py | 6 +- 29 files changed, 367 insertions(+), 136 deletions(-) create mode 100644 .github/workflows/force-docker-build-cas.yml create mode 100644 Dockerfiles/server.Dockerfile rename docs/{playground => hosting}/cas-on-colab.ipynb (100%) rename docs/{playground => hosting}/cas-on-colab.svg (100%) rename docs/{playground => hosting}/colab-banner.png (100%) rename docs/{playground => hosting}/colab.md (95%) rename docs/{user-guides/images => hosting}/jc-deploy.png (100%) create mode 100644 docs/hosting/on-jcloud.md diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index bfca2bf6d..ad96912d2 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -134,6 +134,13 @@ jobs: token: ${{ secrets.JINA_DEV_BOT }} env: release_token: ${{ secrets.CAS_RELEASE_TOKEN }} + - uses: benc-uk/workflow-dispatch@v1 + with: + workflow: Manual CAS Docker Build + inputs: '{ "release_token": "${{ env.release_token }}", "triggered_by": "CD"}' + token: ${{ secrets.JINA_DEV_BOT }} + env: + release_token: ${{ secrets.CAS_RELEASE_TOKEN }} - name: Pre-release hub (.devN) uses: benc-uk/workflow-dispatch@v1 with: diff --git a/.github/workflows/force-docker-build-cas.yml b/.github/workflows/force-docker-build-cas.yml new file mode 100644 index 000000000..618e7fdfa --- /dev/null +++ b/.github/workflows/force-docker-build-cas.yml @@ -0,0 +1,146 @@ +name: Manual CAS Docker Build + +on: + workflow_dispatch: + inputs: + release_token: + description: 'Your release token' + required: true + triggered_by: + description: 'CD | TAG | MANUAL' + required: false + default: MANUAL + +jobs: + token-check: + runs-on: ubuntu-latest + steps: + - run: echo "success!" + if: "${{ github.event.inputs.release_token }} == ${{ env.release_token }}" + env: + release_token: ${{ secrets.CAS_RELEASE_TOKEN }} + + regular-release: + needs: token-check + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + pip_tag: [ "", "onnx", "tensorrt"] # default: "" = core + steps: + - uses: actions/checkout@v2 + - name: Set envs and versions + run: | + VCS_REF=${{ github.ref }} + echo "VCS_REF=$VCS_REF" >> $GITHUB_ENV + echo "Will build $VCS_REF" + echo "BUILD_DATE=$(date -u +'%Y-%m-%dT%H:%M:%SZ')" >> $GITHUB_ENV + + if [[ "${{ matrix.pip_tag }}" == "perf" ]]; then + echo "JINA_PIP_INSTALL_PERF=1" >> $GITHUB_ENV + fi + + if [[ "${{ matrix.pip_tag }}" == "" ]]; then + echo "JINA_PIP_INSTALL_CORE=1" >> $GITHUB_ENV + fi + + JINA_VERSION=$(sed -n '/^__version__/p' ./server/clip_server/__init__.py | cut -d \' -f2) + V_JINA_VERSION=v${JINA_VERSION} + JINA_MINOR_VERSION=${JINA_VERSION%.*} + JINA_MAJOR_VERSION=${JINA_MINOR_VERSION%.*} + + PY_TAG=${{matrix.py_version}} + if [ -n "${PY_TAG}" ]; then + PY_TAG=-py${PY_TAG//./} + fi + + PIP_TAG=${{ matrix.pip_tag }} + if [ -n "${PIP_TAG}" ]; then + PIP_TAG=-${PIP_TAG} + fi + + if [[ "${{ github.event.inputs.triggered_by }}" == "CD" ]]; then + + if [[ "${{ matrix.py_version }}" == "$DEFAULT_PY_VERSION" ]]; then + echo "TAG_ALIAS=\ + jinaai/clip-as-service:master${PY_TAG}${PIP_TAG}, \ + jinaai/clip-as-service:master${PIP_TAG}" \ + >> $GITHUB_ENV + else + # on every CD + echo "TAG_ALIAS=\ + jinaai/clip-as-service:master${PY_TAG}${PIP_TAG}" \ + >> $GITHUB_ENV + fi + + elif [[ "${{ github.event.inputs.triggered_by }}" == "TAG" ]]; then + # on every tag release + + if [[ "${{ matrix.py_version }}" == "$DEFAULT_PY_VERSION" ]]; then + echo "TAG_ALIAS=\ + jinaai/clip-as-service:latest${PY_TAG}${PIP_TAG}, \ + jinaai/clip-as-service:${JINA_VERSION}${PY_TAG}${PIP_TAG}, \ + jinaai/clip-as-service:${JINA_MINOR_VERSION}${PY_TAG}${PIP_TAG}, \ + jinaai/clip-as-service:${JINA_MAJOR_VERSION}${PY_TAG}${PIP_TAG}, \ + jinaai/clip-as-service:latest${PIP_TAG}, \ + jinaai/clip-as-service:${JINA_VERSION}${PIP_TAG}, \ + jinaai/clip-as-service:${JINA_MINOR_VERSION}${PIP_TAG}, \ + jinaai/clip-as-service:${JINA_MAJOR_VERSION}${PIP_TAG} \ + " >> $GITHUB_ENV + else + echo "TAG_ALIAS=\ + jinaai/clip-as-service:latest${PY_TAG}${PIP_TAG}, \ + jinaai/clip-as-service:${JINA_VERSION}${PY_TAG}${PIP_TAG}, \ + jinaai/clip-as-service:${JINA_MINOR_VERSION}${PY_TAG}${PIP_TAG}, \ + jinaai/clip-as-service:${JINA_MAJOR_VERSION}${PY_TAG}${PIP_TAG} \ + " >> $GITHUB_ENV + fi + elif [[ "${{ github.event.inputs.triggered_by }}" == "MANUAL" ]]; then + # on every manual release + if [[ "${{ matrix.py_version }}" == "$DEFAULT_PY_VERSION" ]]; then + echo "TAG_ALIAS=\ + jinaai/clip-as-service:${JINA_VERSION}${PIP_TAG}, \ + jinaai/clip-as-service:${JINA_VERSION}${PY_TAG}${PIP_TAG} \ + " >> $GITHUB_ENV + else + echo "TAG_ALIAS=\ + jinaai/clip-as-service:${JINA_VERSION}${PY_TAG}${PIP_TAG} \ + " >> $GITHUB_ENV + fi + else + echo "Bad triggered_by: ${{ github.event.inputs.triggered_by }}!" + exit 1 + fi + + echo "JINA_VERSION=${JINA_VERSION}" >> $GITHUB_ENV + + - name: Set up Docker Buildx + id: buildx + uses: docker/setup-buildx-action@v1 + with: + install: true + - name: Login to DockerHub + uses: docker/login-action@v1 + with: + username: ${{ secrets.DOCKERHUB_DEVBOT_USER }} + password: ${{ secrets.DOCKERHUB_DEVBOT_TOKEN }} + - run: | + # https://github.com/docker/buildx/issues/464#issuecomment-741507760 + # https://github.com/kubernetes-sigs/azuredisk-csi-driver/pull/808/files + docker run --privileged --rm tonistiigi/binfmt --uninstall qemu-aarch64 + docker run --rm --privileged tonistiigi/binfmt --install all + - name: Build and push + uses: docker/build-push-action@v2 + with: + context: . + file: Dockerfiles/server.Dockerfile + platforms: linux/amd64 + push: true + tags: ${{env.TAG_ALIAS}} + build-args: | + BUILD_DATE=${{env.BUILD_DATE}} + JINA_VERSION=${{env.JINA_VERSION}} + VCS_REF=${{env.VCS_REF}} + PIP_INSTALL_CORE=${{env.JINA_PIP_INSTALL_CORE}} + PIP_INSTALL_PERF=${{env.JINA_PIP_INSTALL_PERF}} + PIP_TAG=${{matrix.pip_tag}} diff --git a/.github/workflows/force-release.yml b/.github/workflows/force-release.yml index 34d739395..69b7af77a 100644 --- a/.github/workflows/force-release.yml +++ b/.github/workflows/force-release.yml @@ -52,6 +52,13 @@ jobs: needs: token-check runs-on: ubuntu-latest steps: + - uses: benc-uk/workflow-dispatch@v1 + with: + workflow: Manual CAS Docker Build + inputs: '{ "release_token": "${{ env.release_token }}", "triggered_by": "CD"}' + token: ${{ secrets.JINA_DEV_BOT }} + env: + release_token: ${{ secrets.CAS_RELEASE_TOKEN }} - name: Build and push docker uses: benc-uk/workflow-dispatch@v1 with: diff --git a/Dockerfiles/base.Dockerfile b/Dockerfiles/base.Dockerfile index 665320798..8a0d26331 100644 --- a/Dockerfiles/base.Dockerfile +++ b/Dockerfiles/base.Dockerfile @@ -8,7 +8,7 @@ ARG BACKEND_TAG=torch # constant, wont invalidate cache LABEL org.opencontainers.image.vendor="Jina AI Limited" \ org.opencontainers.image.licenses="Apache 2.0" \ - org.opencontainers.image.title="Clip-As-Service" \ + org.opencontainers.image.title="CLIP-as-Service" \ org.opencontainers.image.description="Embed images and sentences into fixed-length vectors with CLIP" \ org.opencontainers.image.authors="hello@jina.ai" \ org.opencontainers.image.url="clip-as-service" \ @@ -17,20 +17,20 @@ LABEL org.opencontainers.image.vendor="Jina AI Limited" \ RUN pip3 install --no-cache-dir torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu # copy will almost always invalid the cache -COPY . /clip_server/ +COPY . /cas/ -RUN echo "\ +WORKDIR /cas + +RUN if [ "${BACKEND_TAG}" != "torch" ]; then python3 -m pip install --no-cache-dir "./[${BACKEND_TAG}]" ; fi \ + && python3 -m pip install --no-cache-dir . + +RUN CLIP_PATH=$(python -c "import clip_server;print(clip_server.__path__[0])") \ + && echo "\ jtype: CLIPEncoder\n\ metas:\n\ py_modules:\n\ - - clip_server/executors/clip_$BACKEND_TAG.py\n\ + - $CLIP_PATH/executors/clip_$BACKEND_TAG.py\n\ " > /tmp/config.yml -RUN cd /clip_server && \ - if [ "$BACKEND_TAG" != "torch" ]; then pip3 install --no-cache-dir "./[$BACKEND_TAG]" ; fi && \ - pip3 install --no-cache-dir . - -WORKDIR /clip_server - ENTRYPOINT ["jina", "executor", "--uses", "/tmp/config.yml"] diff --git a/Dockerfiles/cuda.Dockerfile b/Dockerfiles/cuda.Dockerfile index ba870a2c2..56ef96bcb 100644 --- a/Dockerfiles/cuda.Dockerfile +++ b/Dockerfiles/cuda.Dockerfile @@ -6,18 +6,29 @@ ENV DEBIAN_FRONTEND=noninteractive ARG JINA_VERSION=3.6.0 ARG BACKEND_TAG=torch +# constant, wont invalidate cache +LABEL org.opencontainers.image.vendor="Jina AI Limited" \ + org.opencontainers.image.licenses="Apache 2.0" \ + org.opencontainers.image.title="CLIP-as-Service" \ + org.opencontainers.image.description="Embed images and sentences into fixed-length vectors with CLIP" \ + org.opencontainers.image.authors="hello@jina.ai" \ + org.opencontainers.image.url="clip-as-service" \ + org.opencontainers.image.documentation="https://clip-as-service.jina.ai/" + RUN apt-get update && apt-get install -y --no-install-recommends \ python3-setuptools python3-wheel python3-pip \ && apt-get clean && rm -rf /var/lib/apt/lists/*; -RUN python3 -m pip install --default-timeout=1000 --no-cache-dir torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113 +RUN python3 -m pip install --default-timeout=1000 --no-cache-dir torch torchvision torchaudio nvidia-pyindex --extra-index-url https://download.pytorch.org/whl/cu113 RUN python3 -m pip install --default-timeout=1000 --no-cache-dir "jina[standard]==${JINA_VERSION}" -RUN python3 -m pip install nvidia-pyindex - # copy will almost always invalid the cache -COPY . /clip_server/ +COPY . /cas/ +WORKDIR /cas + +RUN if [ "${BACKEND_TAG}" != "torch" ]; then python3 -m pip install --no-cache-dir "./[${BACKEND_TAG}]" ; fi \ + && python3 -m pip install --no-cache-dir . RUN echo "\ jtype: CLIPEncoder\n\ @@ -28,11 +39,6 @@ metas:\n\ - clip_server/executors/clip_$BACKEND_TAG.py\n\ " > /tmp/config.yml -RUN cd /clip_server && \ - if [ "${BACKEND_TAG}" != "torch" ]; then python3 -m pip install --no-cache-dir "./[${BACKEND_TAG}]" ; fi && \ - python3 -m pip install --no-cache-dir . - -WORKDIR /clip_server ENTRYPOINT ["jina", "executor", "--uses", "/tmp/config.yml"] diff --git a/Dockerfiles/server.Dockerfile b/Dockerfiles/server.Dockerfile new file mode 100644 index 000000000..7183b2111 --- /dev/null +++ b/Dockerfiles/server.Dockerfile @@ -0,0 +1,50 @@ +ARG CUDA_VERSION=11.6.0 + +FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04 + +ARG CAS_NAME=cas +WORKDIR /${CAS_NAME} + +ENV PIP_NO_CACHE_DIR=1 \ + PIP_DISABLE_PIP_VERSION_CHECK=1 + +# constant, wont invalidate cache +LABEL org.opencontainers.image.vendor="Jina AI Limited" \ + org.opencontainers.image.licenses="Apache 2.0" \ + org.opencontainers.image.title="CLIP-as-Service" \ + org.opencontainers.image.description="Embed images and sentences into fixed-length vectors with CLIP" \ + org.opencontainers.image.authors="hello@jina.ai" \ + org.opencontainers.image.url="clip-as-service" \ + org.opencontainers.image.documentation="https://clip-as-service.jina.ai/" + + +RUN apt-get update \ + && apt-get install -y --no-install-recommends python3 python3-pip wget \ + && ln -sf python3 /usr/bin/python \ + && ln -sf pip3 /usr/bin/pip \ + && pip install --upgrade pip \ + && pip install wheel setuptools nvidia-pyindex \ + && pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113 + +COPY server ./server +# given by builder +ARG PIP_TAG +RUN pip install --default-timeout=1000 --compile ./server/ \ + && if [ -n "${PIP_TAG}" ]; then pip install --default-timeout=1000 --compile "./server[${PIP_TAG}]" ; fi + +ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64 + +ARG USER_ID=1000 +ARG GROUP_ID=1000 +ARG USER_NAME=${CAS_NAME} +ARG GROUP_NAME=${CAS_NAME} + +RUN groupadd -g ${GROUP_ID} ${USER_NAME} &&\ + useradd -l -u ${USER_ID} -g ${USER_NAME} ${GROUP_NAME} &&\ + mkdir /home/${USER_NAME} &&\ + chown ${USER_NAME}:${GROUP_NAME} /home/${USER_NAME} &&\ + chown -R ${USER_NAME}:${GROUP_NAME} /${CAS_NAME}/ + +USER ${USER_NAME} + +ENTRYPOINT ["python", "-m", "clip_server"] \ No newline at end of file diff --git a/Dockerfiles/tensorrt.Dockerfile b/Dockerfiles/tensorrt.Dockerfile index 8e4e234f9..aff8db526 100644 --- a/Dockerfiles/tensorrt.Dockerfile +++ b/Dockerfiles/tensorrt.Dockerfile @@ -7,22 +7,33 @@ FROM nvcr.io/nvidia/tensorrt:${TENSORRT_VERSION}-py3 ARG JINA_VERSION=3.6.0 ARG BACKEND_TAG=tensorrt +# constant, wont invalidate cache +LABEL org.opencontainers.image.vendor="Jina AI Limited" \ + org.opencontainers.image.licenses="Apache 2.0" \ + org.opencontainers.image.title="CLIP-as-Service" \ + org.opencontainers.image.description="Embed images and sentences into fixed-length vectors with CLIP" \ + org.opencontainers.image.authors="hello@jina.ai" \ + org.opencontainers.image.url="clip-as-service" \ + org.opencontainers.image.documentation="https://clip-as-service.jina.ai/" + RUN pip3 install --default-timeout=1000 --no-cache-dir torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113 RUN pip3 -m pip install --default-timeout=1000 --no-cache-dir "jina[standard]==${JINA_VERSION}" # copy will almost always invalid the cache -COPY . /clip_server/ +COPY . /cas/ +WORKDIR /cas + +RUN python3 -m pip install --no-cache-dir "./[$BACKEND_TAG]" -RUN echo '\ +RUN CLIP_PATH=$(python -c "import clip_server;print(clip_server.__path__[0])") \ + && echo '\ jtype: CLIPEncoder\n\ metas:\n\ py_modules:\n\ - - clip_server/executors/clip_$BACKEND_TAG.py\n\ + - $CLIP_PATH/executors/clip_$BACKEND_TAG.py\n\ ' > /tmp/config.yml -RUN cd /clip_server/ && python3 -m pip install --no-cache-dir "./[$BACKEND_TAG]" -WORKDIR /clip_server ENTRYPOINT ["jina", "executor", "--uses", "/tmp/config.yml"] diff --git a/README.md b/README.md index 1e764a165..507b64ba8 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ PyPI Codecov branch -Host on Google Colab with GPU/TPU support +Host on Google Colab with GPU/TPU support

@@ -240,7 +240,7 @@ pip install "clip-server[tensorrt]" -You can also [host the server on Google Colab](https://clip-as-service.jina.ai/playground/colab/), leveraging its free GPU/TPU. +You can also [host the server on Google Colab](https://clip-as-service.jina.ai/hosting/colab/), leveraging its free GPU/TPU. ### Install client diff --git a/client/clip_client/__init__.py b/client/clip_client/__init__.py index 2fd5ca10e..b5c877006 100644 --- a/client/clip_client/__init__.py +++ b/client/clip_client/__init__.py @@ -2,9 +2,9 @@ import os -from .client import Client +from clip_client.client import Client if 'NO_VERSION_CHECK' not in os.environ: - from .helper import is_latest_version + from clip_server.helper import is_latest_version is_latest_version(github_repo='clip-as-service') diff --git a/docs/playground/cas-on-colab.ipynb b/docs/hosting/cas-on-colab.ipynb similarity index 100% rename from docs/playground/cas-on-colab.ipynb rename to docs/hosting/cas-on-colab.ipynb diff --git a/docs/playground/cas-on-colab.svg b/docs/hosting/cas-on-colab.svg similarity index 100% rename from docs/playground/cas-on-colab.svg rename to docs/hosting/cas-on-colab.svg diff --git a/docs/playground/colab-banner.png b/docs/hosting/colab-banner.png similarity index 100% rename from docs/playground/colab-banner.png rename to docs/hosting/colab-banner.png diff --git a/docs/playground/colab.md b/docs/hosting/colab.md similarity index 95% rename from docs/playground/colab.md rename to docs/hosting/colab.md index e799acbe1..e66cce0b4 100644 --- a/docs/playground/colab.md +++ b/docs/hosting/colab.md @@ -19,7 +19,7 @@ Specifically, the architecture is illustrated below: :width: 70% ``` -```{button-link} https://colab.research.google.com/github/jina-ai/clip-as-service/blob/main/docs/playground/cas-on-colab.ipynb +```{button-link} https://colab.research.google.com/github/jina-ai/clip-as-service/blob/main/docs/hosting/cas-on-colab.ipynb :color: primary :align: center diff --git a/docs/user-guides/images/jc-deploy.png b/docs/hosting/jc-deploy.png similarity index 100% rename from docs/user-guides/images/jc-deploy.png rename to docs/hosting/jc-deploy.png diff --git a/docs/hosting/on-jcloud.md b/docs/hosting/on-jcloud.md new file mode 100644 index 000000000..e8afac16b --- /dev/null +++ b/docs/hosting/on-jcloud.md @@ -0,0 +1,65 @@ +# Host on JCloud + +```{warning} +JCloud does not support GPU hosting yet. Hence `clip_server` deployed on JCloud will be run on CPU. +``` + +Essentially `clip_server` is a Jina [Flow](https://docs.jina.ai/fundamentals/flow/). Any Jina Flow can be hosted on [JCloud](https://docs.jina.ai/fundamentals/jcloud/), hence `clip_server` can be hosted on JCloud as well. Learn more about [JCloud here](https://docs.jina.ai/fundamentals/jcloud/). + + +First, you need a Flow YAML file for deploy. A minimum YAML file is as follows: + +````{tab} torch-flow.yml + +```yaml +jtype: Flow +executors: + - uses: jinahub+docker://CLIPTorchEncoder +``` + +```` +````{tab} onnx-flow.yml + +```yaml +jtype: Flow +executors: + - uses: jinahub+docker://CLIPOnnxEncoder +``` + +```` + +Note that, `port` is unnecessary here as JCloud will assign a new URL for any deployed service. + +Executors now must start with `jinahub+docker://` as it is required by JCloud. We currently provide containerized executors [`jinahub+docker://CLIPTorchEncoder`](https://hub.jina.ai/executor/gzpbl8jh) and [`jinahub+docker://CLIPOnnxEncoder`](https://hub.jina.ai/executor/2a7auwg2) on Jina Hub. They are automatically synced on the new release of `clip_server` module. + +To deploy, + +````{tab} PyTorch-backed +```bash +jc deploy torch-flow.yml +``` +```` + +````{tab} ONNX-backed +```bash +jc deploy onnx-flow.yml +``` +```` + + +If Flow is successfully deployed you will see: + +```{figure} jc-deploy.png +:width: 60% +``` + +You can now connect to it via client by setting `server` as the URL given by JCloud: + +```python +from clip_client import Client + +c = Client( + 'grpcs://174eb69ba3.wolf.jina.ai' +) # This is the URL you get from previous step +c.profile() +``` diff --git a/docs/index.md b/docs/index.md index 3d97c79da..6887fb6f2 100644 --- a/docs/index.md +++ b/docs/index.md @@ -88,7 +88,7 @@ pip install "clip_server[tensorrt]" ````{tab} Server on Google Colab -```{button-link} https://colab.research.google.com/github/jina-ai/clip-as-service/blob/main/docs/playground/cas-on-colab.ipynb +```{button-link} https://colab.research.google.com/github/jina-ai/clip-as-service/blob/main/docs/hosting/cas-on-colab.ipynb :color: primary :align: center @@ -181,6 +181,13 @@ user-guides/faq ``` +```{toctree} +:caption: Hosting +:hidden: + +hosting/colab +hosting/on-jcloud +``` ```{toctree} :caption: Playground @@ -188,7 +195,6 @@ user-guides/faq playground/embedding playground/reasoning -playground/colab ``` diff --git a/docs/user-guides/client.md b/docs/user-guides/client.md index bd9a0d9d0..9f136ec3a 100644 --- a/docs/user-guides/client.md +++ b/docs/user-guides/client.md @@ -435,7 +435,7 @@ curl -X POST https://demo-cas.jina.ai:8443/post \ jq -c '.data[] | .embedding' ``` -```json +```text [-0.022064208984375,0.1044921875,...] [-0.0750732421875,-0.166015625,...] ``` \ No newline at end of file diff --git a/docs/user-guides/server.md b/docs/user-guides/server.md index 14329f331..9fce40647 100644 --- a/docs/user-guides/server.md +++ b/docs/user-guides/server.md @@ -11,6 +11,7 @@ This chapter introduces the API of the server. You will need to install server first in Python 3.7+: `pip install clip-server`. ``` +(start-server)= ## Start server @@ -487,102 +488,34 @@ r = c.encode( ) ``` -## Deploy on JCloud +## Serve in Docker Container -The `clip_server` can be smoothly deployed and hosted as a [Flow](https://docs.jina.ai/fundamentals/flow/) on [JCloud](https://docs.jina.ai/fundamentals/jcloud/) to utilize the free computational and storage resources provided by Jina. +You can run the server inside a Docker container. We provide a Dockerfile in the repository, which is CUDA-enabled with optimized package installation. -You need a YAML file to config the `clip_server` executor in the Flow in order to deploy. -The executors are hosted on [Jina Hub](https://hub.jina.ai) and are automatically sync with `clip_server` Python module. -We currently support [PyTorch-backed CLIP](https://hub.jina.ai/executor/gzpbl8jh) and [ONNX-backed CLIP](https://hub.jina.ai/executor/2a7auwg2). +### Build -A minimum YAML file is as follows. You can also config the same parameters in executors via `with` [described here](#clip-model-config). - -````{tab} pytorch-flow.yml - -```yaml ---- -emphasize-lines: 5 ---- - -jtype: Flow -executors: - - name: CLIPTorchEncoder - uses: jinahub+docker://CLIPTorchEncoder - with: +```bash +git clone https://github.com/jina-ai/clip-as-service.git +docker build . -f Dockerfiles/server.Dockerfile --build-arg GROUP_ID=$(id -g ${USER}) --build-arg USER_ID=$(id -u ${USER}) -t jinaai/clip-as-service ``` -```` -````{tab} onnx-flow.yml ---- -emphasize-lines: 5 ---- -```yaml -jtype: Flow -executors: - - name: CLIPOnnxEncoder - uses: jinahub+docker://CLIPOnnxEncoder - with: +```{tip} +The build argument `--build-arg GROUP_ID=$(id -g ${USER}) --build-arg USER_ID=$(id -u ${USER})` is optional, but having them is highly recommended as it allows you to reuse host's cache with the correct access. ``` -```` -```{warning} -All Executors' `uses` must follow the format `jinahub+docker://MyExecutor` (from [Jina Hub](https://hub.jina.ai)) to avoid any local file dependencies. -``` +### Run -To deploy, - -````{tab} PyTorch-backed ```bash -$ jc deploy pytorch-flow.yml +docker run -p 51009:51000 -v $HOME/.cache:/home/cas/.cache --gpus all jinaai/clip-as-service ``` -```` -````{tab} ONNX-backed -```bash -$ jc deploy onnx-flow.yml -``` -```` -Here `jc deploy` is the command to deploy a Jina project to JCloud. -Learn more about [JCloud usage](https://docs.jina.ai/fundamentals/jcloud/). +Here, `51009` is the public port on the host and `51000` is the {ref}`in-container port defined inside YAML`. The argument `-v $HOME/.cache:/home/cas/.cache` leverages host's cache and prevents you to download the same model next time on start. -The Flow is successfully deployed when you see: +Due to the limitation of the terminal inside Docker container, you will **not** see the classic Jina progress bar on start. Instead, you will face a few minutes awkward silent while model downloading and then see "Flow is ready to serve" dialog. -```{figure} images/jc-deploy.png -:width: 60% -``` - -After deploying on jcloud, you can connect to it via client by setting `grpcs://` generated from previous step as follows: +The CLI usage is the same {ref}`as described here `. -```python -from clip_client import Client - -c = Client( - 'grpcs://174eb69ba3.wolf.jina.ai' -) # This is the URL you get from previous step - -r = c.encode( - [ - 'First do it', - 'then do it right', - 'then do it better', - 'https://picsum.photos/200', - ] -) -print(r) -``` - -will give you: - -```text -[[ 0.03480401 -0.23519686 0.01041038 ... -0.5229086 -0.10081214 - -0.08695138] - [-0.0683605 -0.00324154 0.01490371 ... -0.50309485 -0.06193433 - -0.08574048] - [ 0.15041807 -0.07933374 -0.06650036 ... -0.46410388 -0.08535041 - 0.04270519] - [-0.16183889 0.10636599 -0.2062868 ... -0.41244072 0.19485454 - 0.05658712]] +```{tip} +You can enable debug logging via: `docker run --env JINA_LOG_LEVEL=debug ...` ``` - -It means the client and the JCloud server are now connected. Well done! \ No newline at end of file diff --git a/server/clip_server/__main__.py b/server/clip_server/__main__.py index 28340fdf8..41ccfd2c2 100644 --- a/server/clip_server/__main__.py +++ b/server/clip_server/__main__.py @@ -4,7 +4,7 @@ if __name__ == '__main__': if 'NO_VERSION_CHECK' not in os.environ: - from .helper import is_latest_version + from clip_server.helper import is_latest_version is_latest_version(github_repo='clip-as-service') diff --git a/server/clip_server/executors/clip_hg.py b/server/clip_server/executors/clip_hg.py index bc98266d2..a5edab62e 100644 --- a/server/clip_server/executors/clip_hg.py +++ b/server/clip_server/executors/clip_hg.py @@ -5,7 +5,7 @@ import numpy as np import torch from transformers import CLIPFeatureExtractor, CLIPModel, CLIPTokenizer -from .helper import ( +from clip_server.executors.helper import ( split_img_txt_da, set_rank, ) diff --git a/server/clip_server/executors/clip_onnx.py b/server/clip_server/executors/clip_onnx.py index db38cb647..deed22328 100644 --- a/server/clip_server/executors/clip_onnx.py +++ b/server/clip_server/executors/clip_onnx.py @@ -4,14 +4,14 @@ from typing import Optional, Dict import onnxruntime as ort -from .helper import ( +from clip_server.executors.helper import ( split_img_txt_da, preproc_image, preproc_text, set_rank, ) -from ..model import clip -from ..model.clip_onnx import CLIPOnnxModel +from clip_server.model import clip +from clip_server.model.clip_onnx import CLIPOnnxModel from jina import Executor, requests, DocumentArray diff --git a/server/clip_server/executors/clip_tensorrt.py b/server/clip_server/executors/clip_tensorrt.py index c8dba1873..5dc9af251 100644 --- a/server/clip_server/executors/clip_tensorrt.py +++ b/server/clip_server/executors/clip_tensorrt.py @@ -2,14 +2,14 @@ from typing import Dict import numpy as np -from .helper import ( +from clip_server.executors.helper import ( split_img_txt_da, preproc_image, preproc_text, set_rank, ) -from ..model import clip -from ..model.clip_trt import CLIPTensorRTModel +from clip_server.model import clip +from clip_server.model.clip_trt import CLIPTensorRTModel from jina import Executor, requests, DocumentArray diff --git a/server/clip_server/executors/clip_torch.py b/server/clip_server/executors/clip_torch.py index b123ac138..a4701004a 100644 --- a/server/clip_server/executors/clip_torch.py +++ b/server/clip_server/executors/clip_torch.py @@ -5,13 +5,13 @@ import numpy as np import torch -from .helper import ( +from clip_server.executors.helper import ( split_img_txt_da, preproc_image, preproc_text, set_rank, ) -from ..model import clip +from clip_server.model import clip from jina import Executor, requests, DocumentArray diff --git a/server/clip_server/executors/helper.py b/server/clip_server/executors/helper.py index 36d6e3194..4e1ddecb3 100644 --- a/server/clip_server/executors/helper.py +++ b/server/clip_server/executors/helper.py @@ -4,7 +4,7 @@ from docarray import Document, DocumentArray from docarray.math.distance.numpy import cosine -from ..model import clip +from clip_server.model import clip def numpy_softmax(x: 'np.ndarray', axis: int = -1) -> 'np.ndarray': diff --git a/server/clip_server/helper.py b/server/clip_server/helper.py index e4b20d266..16983c1de 100644 --- a/server/clip_server/helper.py +++ b/server/clip_server/helper.py @@ -49,7 +49,7 @@ def _version_check(package: str = None, github_repo: str = None): width=50, ) ) - except Exception: + except: # no network, too slow, PyPi is down pass diff --git a/server/clip_server/model/clip.py b/server/clip_server/model/clip.py index 35a01e498..315003b99 100644 --- a/server/clip_server/model/clip.py +++ b/server/clip_server/model/clip.py @@ -11,8 +11,8 @@ from PIL import Image from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize -from .model import build_model -from .simple_tokenizer import SimpleTokenizer as _Tokenizer +from clip_server.model.model import build_model +from clip_server.model.simple_tokenizer import SimpleTokenizer as _Tokenizer try: from torchvision.transforms import InterpolationMode diff --git a/server/clip_server/model/clip_onnx.py b/server/clip_server/model/clip_onnx.py index aa022e19a..9326bcaaa 100644 --- a/server/clip_server/model/clip_onnx.py +++ b/server/clip_server/model/clip_onnx.py @@ -1,6 +1,6 @@ import os -from .clip import _download, available_models +from clip_server.model.clip import _download, available_models _S3_BUCKET = 'https://clip-as-service.s3.us-east-2.amazonaws.com/models/onnx/' _MODELS = { diff --git a/server/clip_server/model/clip_trt.py b/server/clip_server/model/clip_trt.py index bfcb7c7c6..c1e945a2a 100644 --- a/server/clip_server/model/clip_trt.py +++ b/server/clip_server/model/clip_trt.py @@ -4,7 +4,7 @@ import tensorrt as trt from tensorrt.tensorrt import Logger, Runtime - from .trt_utils import load_engine, build_engine, save_engine + from clip_server.model.trt_utils import load_engine, build_engine, save_engine except ImportError: raise ImportError( "It seems that TensorRT is not yet installed. " @@ -13,7 +13,7 @@ "https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html" ) -from .clip import _download, MODEL_SIZE +from clip_server.model.clip import _download, MODEL_SIZE _S3_BUCKET = 'https://clip-as-service.s3.us-east-2.amazonaws.com/models/tensorrt/' _MODELS = { @@ -55,7 +55,7 @@ def start_engines(self): f'The engine plan file is generated on an incompatible device, expecting compute {compute_capacity} ' 'got compute 8.6, will rebuild the TensorRT engine.' ) - from .clip_onnx import CLIPOnnxModel + from clip_server.model.clip_onnx import CLIPOnnxModel onnx_model = CLIPOnnxModel(self._name) diff --git a/server/clip_server/model/simple_tokenizer.py b/server/clip_server/model/simple_tokenizer.py index de2a86659..a5f6a5478 100644 --- a/server/clip_server/model/simple_tokenizer.py +++ b/server/clip_server/model/simple_tokenizer.py @@ -8,7 +8,7 @@ import ftfy -from ..helper import __resources_path__ +from clip_server.helper import __resources_path__ @lru_cache() @@ -34,10 +34,10 @@ def bytes_to_unicode(): ) cs = bs[:] n = 0 - for b in range(2 ** 8): + for b in range(2**8): if b not in bs: bs.append(b) - cs.append(2 ** 8 + n) + cs.append(2**8 + n) n += 1 cs = [chr(n) for n in cs] return dict(zip(bs, cs))