From 4d069a84ac0414059acce322f00815bf0cd12536 Mon Sep 17 00:00:00 2001 From: Ziniu Yu Date: Wed, 15 Jun 2022 18:00:14 +0800 Subject: [PATCH] feat: upload torch executor (#723) * feat: add hub push runner * fix: hub push yaml * fix: hub push yaml * fix: debug script * fix: debug script * fix: debug script * fix: debug script * fix: debug script * fix: debug script * fix: debug script * fix: debug script * fix: debug script * fix: comment manifest * fix: revert manifest * fix: use relative import * fix: change base folder * fix: hub push * fix: bumb jina version * fix: get requirments.txt * fix: turnon workflow on PR * fix: update dockerfile * fix: error * fix: executor name * fix: use jinahub auth token * fix: test torch upload * fix: docker * fix: upload gpu executor * fix: gpu tag * fix: gpu tag * feat: upload onnx executor * fix: debug onnx upload * fix: debug onnx upload * fix: minor revision * fix: add torch exec readme * fix: add onnx exec readme * chore: update exec readme * fix: update readme * chore: update readme * chore: onnx readme * chore: update readme * docs: fix batch_size * docs: fix batch_size * chore: updates * chore: upload pytorch and onnx runtime based executors * fix: use relative imports Co-authored-by: numb3r3 --- .github/README-exec/onnx.readme.md | 177 ++++++++++++++++ .github/README-exec/torch.readme.md | 179 ++++++++++++++++ .github/workflows/force-docker-build.yml | 8 +- .github/workflows/force-hub-push.yml | 197 ++++++++++++++++++ .github/workflows/force-release.yml | 15 +- Dockerfiles/base.Dockerfile | 15 +- Dockerfiles/cuda.Dockerfile | 15 +- Dockerfiles/tensorrt.Dockerfile | 14 +- scripts/get-requirements.py | 12 ++ server/clip_server/executors/clip_hg.py | 2 +- server/clip_server/executors/clip_onnx.py | 6 +- server/clip_server/executors/clip_tensorrt.py | 6 +- server/clip_server/executors/clip_torch.py | 4 +- server/clip_server/executors/helper.py | 2 +- server/clip_server/model/clip_trt.py | 4 +- 15 files changed, 614 insertions(+), 42 deletions(-) create mode 100644 .github/README-exec/onnx.readme.md create mode 100644 .github/README-exec/torch.readme.md create mode 100644 .github/workflows/force-hub-push.yml create mode 100644 scripts/get-requirements.py diff --git a/.github/README-exec/onnx.readme.md b/.github/README-exec/onnx.readme.md new file mode 100644 index 000000000..958e034b1 --- /dev/null +++ b/.github/README-exec/onnx.readme.md @@ -0,0 +1,177 @@ +# CLIPOnnxEncoder + +**CLIPOnnxEncoder** is the executor implemented in [clip-as-service](https://github.com/jina-ai/clip-as-service). +It serves OpenAI released [CLIP](https://github.com/openai/CLIP) models with ONNX runtime (๐Ÿš€ **3x** speed up). +The introduction of the CLIP model [can be found here](https://openai.com/blog/clip/). + +- ๐Ÿ”€ **Automatic**: Auto-detect image and text documents depending on their content. +- โšก **Efficiency**: Faster CLIP model inference on CPU and GPU via ONNX runtime. +- ๐Ÿ“ˆ **Observability**: Monitoring the serving via Prometheus and Grafana (see [Usage Guide](https://docs.jina.ai/how-to/monitoring/#deploying-locally)). + + +## Model support + +Open AI has released 9 models so far. `ViT-B/32` is used as default model. Please also note that different model give **different size of output dimensions**. + +| Model | ONNX | Output dimension | +|----------------|-----| --- | +| RN50 | โœ… | 1024 | +| RN101 | โœ… | 512 | +| RN50x4 | โœ… | 640 | +| RN50x16 | โœ… | 768 | +| RN50x64 | โœ… | 1024 | +| ViT-B/32 | โœ… | 512 | +| ViT-B/16 | โœ… | 512 | +| ViT-L/14 | โœ… | 768 | +| ViT-L/14@336px | โœ… | 768 | + +## Usage + +### Use in Jina Flow + +- **via Docker image (recommended)** + +```python +from jina import Flow +from docarray import Document +import numpy as np + +f = Flow().add( + uses='jinahub+docker://CLIPOnnxEncoder', +) +``` + +- **via source code** + +```python +from jina import Flow +from docarray import Document +import numpy as np + +f = Flow().add( + uses='jinahub://CLIPOnnxEncoder', +) +``` + +You can set the following parameters via `with`: + +| Parameter | Description | +|-----------|-------------------------------------------------------------------------------------------------------------------------------| +| `name` | Model weights, default is `ViT-B/32`. Support all OpenAI released pretrained models. | +| `num_worker_preprocess` | The number of CPU workers for image & text prerpocessing, default 4. | +| `minibatch_size` | The size of a minibatch for CPU preprocessing and GPU encoding, default 16. Reduce the size of it if you encounter OOM on GPU. | +| `device` | `cuda` or `cpu`. Default is `None` means auto-detect. | + +### Encoding + +Encoding here means getting the fixed-length vector representation of a sentence or image. + +```python +from jina import Flow +from docarray import Document, DocumentArray + +da = DocumentArray( + [ + Document(text='she smiled, with pain'), + Document(uri='apple.png'), + Document(uri='apple.png').load_uri_to_image_tensor(), + Document(blob=open('apple.png', 'rb').read()), + Document(uri='https://clip-as-service.jina.ai/_static/favicon.png'), + Document( + uri='' + ), + ] +) + +f = Flow().add( + uses='jinahub+docker://CLIPTorchEncoder', +) +with f: + f.post(on='/', inputs=da) + da.summary() +``` + +From the output, you will see all the text and image docs have `embedding` attached. + +```text +โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ Documents Summary โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ +โ”‚ โ”‚ +โ”‚ Length 6 โ”‚ +โ”‚ Homogenous Documents False โ”‚ +โ”‚ 4 Documents have attributes ('id', 'mime_type', 'uri', 'embedding') โ”‚ +โ”‚ 1 Document has attributes ('id', 'mime_type', 'text', 'embedding') โ”‚ +โ”‚ 1 Document has attributes ('id', 'embedding') โ”‚ +โ”‚ โ”‚ +โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ +โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ Attributes Summary โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ +โ”‚ โ”‚ +โ”‚ Attribute Data type #Unique values Has empty value โ”‚ +โ”‚ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”‚ +โ”‚ embedding ('ndarray',) 6 False โ”‚ +โ”‚ id ('str',) 6 False โ”‚ +โ”‚ mime_type ('str',) 5 False โ”‚ +โ”‚ text ('str',) 2 False โ”‚ +โ”‚ uri ('str',) 4 False โ”‚ +โ”‚ โ”‚ +โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ +``` + +๐Ÿ‘‰ Access the embedding playground in **clip-as-service** [doc](https://clip-as-service.jina.ai/playground/embedding), type sentence or image URL and see **live embedding**! + +### Ranking + +One can also rank cross-modal matches via `/rank` endpoint. +First construct a *cross-modal* Document where the root contains an image and `.matches` contain sentences to rerank. + +```python +from docarray import Document + +d = Document( + uri='rerank.png', + matches=[ + Document(text=f'a photo of a {p}') + for p in ( + 'control room', + 'lecture room', + 'conference room', + 'podium indoor', + 'television studio', + ) + ], +) +``` + +Then send the request via `/rank` endpoint: + +```python +f = Flow().add( + uses='jinahub+docker://CLIPTorchEncoder', +) +with f: + r = f.post(on='/rank', inputs=da) + print(r['@m', ['text', 'scores__clip_score__value']]) +``` + +Finally, in the return you can observe the matches are re-ranked according to `.scores['clip_score']`: + +```bash +[['a photo of a television studio', 'a photo of a conference room', 'a photo of a lecture room', 'a photo of a control room', 'a photo of a podium indoor'], +[0.9920725226402283, 0.006038925610482693, 0.0009973491542041302, 0.00078492151806131, 0.00010626466246321797]] +``` + +One can also construct `text-to-image` rerank as below: + +```python +from docarray import Document + +d = Document( + text='a photo of conference room', + matches=[ + Document(uri='https://picsum.photos/300'), + Document(uri='https://picsum.photos/id/331/50'), + Document(uri='https://clip-as-service.jina.ai/_static/favicon.png'), + ], +) +``` + +๐Ÿ‘‰ Access the ranking playground in **clip-as-service** [doc](https://clip-as-service.jina.ai/playground/reasoning/). Just input the reasoning texts as prompts, the server will rank the prompts and return sorted prompts with scores. \ No newline at end of file diff --git a/.github/README-exec/torch.readme.md b/.github/README-exec/torch.readme.md new file mode 100644 index 000000000..ae6e99a6f --- /dev/null +++ b/.github/README-exec/torch.readme.md @@ -0,0 +1,179 @@ +# CLIPTorchEncoder + +**CLIPTorchEncoder** is the executor implemented in [clip-as-service](https://github.com/jina-ai/clip-as-service). +It serves OpenAI released [CLIP](https://github.com/openai/CLIP) models with PyTorch runtime. +The introduction of the CLIP model [can be found here](https://openai.com/blog/clip/). + +- ๐Ÿ”€ **Automatic**: Auto-detect image and text documents depending on their content. +- โšก **Efficiency**: Faster CLIP model inference on CPU and GPU via leveraging the best practices. +- ๐Ÿ“ˆ **Observability**: Monitoring the serving via Prometheus and Grafana (see [Usage Guide](https://docs.jina.ai/how-to/monitoring/#deploying-locally)). + +With advances of ONNX runtime, you can use `CLIPOnnxEncoder` (see [link](https://hub.jina.ai/executor/2a7auwg2)) instead to achieve **3x** model inference speed up. + +## Model support + +Open AI has released **9 models** so far. `ViT-B/32` is used as default model. Please also note that different models give **the different sizes of output dimensions**. + +| Model | PyTorch | Output dimension | +|----------------|---------|------------------| +| RN50 | โœ… | 1024 | +| RN101 | โœ… | 512 | +| RN50x4 | โœ… | 640 | +| RN50x16 | โœ… | 768 | +| RN50x64 | โœ… | 1024 | +| ViT-B/32 | โœ… | 512 | +| ViT-B/16 | โœ… | 512 | +| ViT-L/14 | โœ… | 768 | +| ViT-L/14@336px | โœ… | 768 | + +## Usage + +### Use in Jina Flow + +- **via Docker image (recommended)** + +```python +from jina import Flow +from docarray import Document +import numpy as np + +f = Flow().add( + uses='jinahub+docker://CLIPTorchEncoder', +) +``` + +- **via source code** + +```python +from jina import Flow +from docarray import Document +import numpy as np + +f = Flow().add( + uses='jinahub://CLIPTorchEncoder', +) +``` + +You can set the following parameters via `with`: + +| Parameter | Description | +|-------------------------|--------------------------------------------------------------------------------------------------------------------------------| +| `name` | Model weights, default is `ViT-B/32`. Support all OpenAI released pretrained models. | +| `num_worker_preprocess` | The number of CPU workers for image & text prerpocessing, default 4. | +| `minibatch_size` | The size of a minibatch for CPU preprocessing and GPU encoding, default 32. Reduce the size of it if you encounter OOM on GPU. | +| `device` | `cuda` or `cpu`. Default is `None` means auto-detect. | +| `jit` | If to enable Torchscript JIT, default is `False`. | + +### Encoding + +Encoding here means getting the fixed-length vector representation of a sentence or image. + +```python +from jina import Flow +from docarray import Document, DocumentArray + +da = DocumentArray( + [ + Document(text='she smiled, with pain'), + Document(uri='apple.png'), + Document(uri='apple.png').load_uri_to_image_tensor(), + Document(blob=open('apple.png', 'rb').read()), + Document(uri='https://clip-as-service.jina.ai/_static/favicon.png'), + Document( + uri='' + ), + ] +) + +f = Flow().add( + uses='jinahub+docker://CLIPTorchEncoder', +) +with f: + f.post(on='/', inputs=da) + da.summary() +``` + +From the output, you will see all the text and image docs have `embedding` attached. + +```text +โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ Documents Summary โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ +โ”‚ โ”‚ +โ”‚ Length 6 โ”‚ +โ”‚ Homogenous Documents False โ”‚ +โ”‚ 4 Documents have attributes ('id', 'mime_type', 'uri', 'embedding') โ”‚ +โ”‚ 1 Document has attributes ('id', 'mime_type', 'text', 'embedding') โ”‚ +โ”‚ 1 Document has attributes ('id', 'embedding') โ”‚ +โ”‚ โ”‚ +โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ +โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ Attributes Summary โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎ +โ”‚ โ”‚ +โ”‚ Attribute Data type #Unique values Has empty value โ”‚ +โ”‚ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”‚ +โ”‚ embedding ('ndarray',) 6 False โ”‚ +โ”‚ id ('str',) 6 False โ”‚ +โ”‚ mime_type ('str',) 5 False โ”‚ +โ”‚ text ('str',) 2 False โ”‚ +โ”‚ uri ('str',) 4 False โ”‚ +โ”‚ โ”‚ +โ•ฐโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฏ +``` + +๐Ÿ‘‰ Access the embedding playground in **clip-as-service** [doc](https://clip-as-service.jina.ai/playground/embedding), type sentence or image URL and see **live embedding**! + +### Ranking + +One can also rank cross-modal matches via `/rank` endpoint. +First construct a *cross-modal* Document where the root contains an image and `.matches` contain sentences to rerank. + +```python +from docarray import Document + +d = Document( + uri='rerank.png', + matches=[ + Document(text=f'a photo of a {p}') + for p in ( + 'control room', + 'lecture room', + 'conference room', + 'podium indoor', + 'television studio', + ) + ], +) +``` + +Then send the request via `/rank` endpoint: + +```python +f = Flow().add( + uses='jinahub+docker://CLIPTorchEncoder', +) +with f: + r = f.post(on='/rank', inputs=da) + print(r['@m', ['text', 'scores__clip_score__value']]) +``` + +Finally, you can observe the matches are re-ranked based on `.scores['clip_score']`: + +```bash +[['a photo of a television studio', 'a photo of a conference room', 'a photo of a lecture room', 'a photo of a control room', 'a photo of a podium indoor'], +[0.9920725226402283, 0.006038925610482693, 0.0009973491542041302, 0.00078492151806131, 0.00010626466246321797]] +``` + +One can also construct `text-to-image` rerank as below: + +```python +from docarray import Document + +d = Document( + text='a photo of conference room', + matches=[ + Document(uri='https://picsum.photos/300'), + Document(uri='https://picsum.photos/id/331/50'), + Document(uri='https://clip-as-service.jina.ai/_static/favicon.png'), + ], +) +``` + +๐Ÿ‘‰ Access the ranking playground in **clip-as-service** [doc](https://clip-as-service.jina.ai/playground/reasoning/). Just input the reasoning texts as prompts, the server will rank the prompts and return sorted prompts with scores. \ No newline at end of file diff --git a/.github/workflows/force-docker-build.yml b/.github/workflows/force-docker-build.yml index e30aba6eb..eb9a8f9d1 100644 --- a/.github/workflows/force-docker-build.yml +++ b/.github/workflows/force-docker-build.yml @@ -21,7 +21,7 @@ jobs: env: release_token: ${{ secrets.CAS_RELEASE_TOKEN }} - regular-release: + docker-release: needs: token-check runs-on: ubuntu-latest strategy: @@ -104,7 +104,7 @@ jobs: if: ${{ matrix.engine_tag == '' && matrix.pip_tag != 'tensorrt' }} uses: docker/build-push-action@v2 with: - context: . + context: server file: Dockerfiles/base.Dockerfile platforms: linux/amd64 cache-from: type=registry,ref=jinaai/clip_executor:latest @@ -116,13 +116,12 @@ jobs: CAS_VERSION=${{env.CAS_VERSION}} VCS_REF=${{env.VCS_REF}} BACKEND_TAG=${{env.BACKEND_TAG}} - PIP_TAG=${{matrix.pip_tag}} - name: CUDA Build and push id: cuda_docker_build if: ${{ matrix.engine_tag == 'cuda' }} uses: docker/build-push-action@v2 with: - context: . + context: server file: Dockerfiles/cuda.Dockerfile platforms: linux/amd64 cache-from: type=registry,ref=jinaai/clip_executor:latest-cuda @@ -134,4 +133,3 @@ jobs: CAS_VERSION=${{env.CAS_VERSION}} VCS_REF=${{env.VCS_REF}} BACKEND_TAG=${{env.BACKEND_TAG}} - PIP_TAG=${{matrix.pip_tag}} diff --git a/.github/workflows/force-hub-push.yml b/.github/workflows/force-hub-push.yml new file mode 100644 index 000000000..ad3bfec99 --- /dev/null +++ b/.github/workflows/force-hub-push.yml @@ -0,0 +1,197 @@ +name: Manual Hub Push + +on: + workflow_dispatch: + inputs: + release_token: + description: 'Your release token' + required: true + triggered_by: + description: 'CD | TAG | MANUAL' + required: false + default: MANUAL + +#on: +# pull_request: + +jobs: + token-check: + runs-on: ubuntu-latest + steps: + - run: echo "success!" + if: "${{ github.event.inputs.release_token }} == ${{ env.release_token }}" + env: + release_token: ${{ secrets.CAS_RELEASE_TOKEN }} + + hub-release: + needs: token-check + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Set envs and versions + run: | + VCS_REF=${{ github.ref }} + echo "VCS_REF=$VCS_REF" >> $GITHUB_ENV + echo "Will push $VCS_REF" + + CAS_VERSION=$(sed -n '/^__version__/p' ./server/clip_server/__init__.py | cut -d \' -f2) + V_CAS_VERSION=v${CAS_VERSION} + CAS_MINOR_VERSION=${CAS_VERSION%.*} + CAS_MAJOR_VERSION=${CAS_MINOR_VERSION%.*} + + if [[ "${{ github.event.inputs.triggered_by }}" == "CD" ]]; then + # on every CD release + echo "TAG_ALIAS=\ + -t latest \ + " >> $GITHUB_ENV + echo "GPU_TAG_ALIAS=\ + -t latest-gpu \ + " >> $GITHUB_ENV + + elif [[ "${{ github.event.inputs.triggered_by }}" == "TAG" ]]; then + # on every tag release + echo "TAG_ALIAS=\ + -t latest \ + -t ${CAS_VERSION} \ + -t ${CAS_MINOR_VERSION} \ + -t ${CAS_MAJOR_VERSION} \ + " >> $GITHUB_ENV + echo "GPU_TAG_ALIAS=\ + -t latest-gpu \ + -t ${CAS_VERSION}-gpu \ + -t ${CAS_MINOR_VERSION}-gpu \ + -t ${CAS_MAJOR_VERSION}-gpu \ + " >> $GITHUB_ENV + + elif [[ "${{ github.event.inputs.triggered_by }}" == "MANUAL" ]]; then + # on every manual release + echo "TAG_ALIAS=\ + -t ${CAS_VERSION} \ + " >> $GITHUB_ENV + echo "GPU_TAG_ALIAS=\ + -t ${CAS_VERSION}-gpu \ + " >> $GITHUB_ENV + else + echo "TAG_ALIAS=\ + -t latest \ + " >> $GITHUB_ENV + echo "GPU_TAG_ALIAS=\ + -t latest-gpu \ + " >> $GITHUB_ENV + fi + + echo "CAS_VERSION=${CAS_VERSION}" >> $GITHUB_ENV + + - name: Prepare enviroment + run: | + python -m pip install --upgrade jina yq + + - name: Push Torch Executor + id: push_torch_executor + run: | + # FIX the import issue + echo -e "\ + __version__ = '$CAS_VERSION' + from .executors.clip_torch import CLIPEncoder\n\ + " > server/clip_server/__init__.py + + echo -e "\ + jtype: CLIPEncoder\n\ + metas:\n\ + py_modules:\n\ + - clip_server/__init__.py\n\ + " > server/config.yml + + echo -e "\ + manifest_version: 1\n\ + name: CLIPTorchEncoder\n\ + description: Embed images and sentences into fixed-length vectors with CLIP\n\ + url: https://github.com/jina-ai/clip-as-service\n\ + keywords: [clip, clip-model, clip-as-service, pytorch]\n\ + " > server/manifest.yml + + python scripts/get-requirements.py "" server/requirements.txt + + cp .github/README-exec/torch.readme.md server/README.md + + exec_name=`yq -r .name server/manifest.yml` + echo executor name is $exec_name + + cp Dockerfiles/base.Dockerfile server/Dockerfile + JINA_AUTH_TOKEN=${{secrets.JINAHUB_TOKEN}} jina hub push --force $exec_name --secret ${{secrets.TORCH_EXEC_SECRET}} server ${{env.TAG_ALIAS}} + + cp Dockerfiles/cuda.Dockerfile server/Dockerfile + JINA_AUTH_TOKEN=${{secrets.JINAHUB_TOKEN}} jina hub push --force $exec_name --secret ${{secrets.TORCH_EXEC_SECRET}} server ${{env.GPU_TAG_ALIAS}} + + - name: Push Onnx Executor + id: push_onnx_executor + run: | + # FIX the import issue + echo -e "\ + __version__ = '$CAS_VERSION' + from .executors.clip_onnx import CLIPEncoder\n\ + " > server/clip_server/__init__.py + + echo -e "\ + jtype: CLIPEncoder\n\ + metas:\n\ + py_modules:\n\ + - clip_server/__init__.py\n\ + " > server/config.yml + + echo -e "\ + manifest_version: 1\n\ + name: CLIPOnnxEncoder\n\ + description: Embed images and sentences into fixed-length vectors with CLIP\n\ + url: https://github.com/jina-ai/clip-as-service\n\ + keywords: [clip, clip-model, clip-as-service, onnx, onnx-runtime]\n\ + " > server/manifest.yml + + python scripts/get-requirements.py onnx server/requirements.txt + + cp .github/README-exec/onnx.readme.md server/README.md + + exec_name=`yq -r .name server/manifest.yml` + echo executor name is $exec_name + + cp Dockerfiles/base.Dockerfile server/Dockerfile + sed -i 's/ARG BACKEND_TAG=torch/ARG BACKEND_TAG=onnx/g' server/Dockerfile + JINA_AUTH_TOKEN=${{secrets.JINAHUB_TOKEN}} jina hub push --force $exec_name --secret ${{secrets.ONNX_EXEC_SECRET}} server ${{env.TAG_ALIAS}} + + cp Dockerfiles/cuda.Dockerfile server/Dockerfile + sed -i 's/ARG BACKEND_TAG=torch/ARG BACKEND_TAG=onnx/g' server/Dockerfile + JINA_AUTH_TOKEN=${{secrets.JINAHUB_TOKEN}} jina hub push --force $exec_name --secret ${{secrets.ONNX_EXEC_SECRET}} server ${{env.GPU_TAG_ALIAS}} + + - name: Push TensorRT Executor + id: push_tensorrt_executor + run: | + # FIX the import issue + echo -e "\ + __version__ = '$CAS_VERSION' + from .executors.clip_tensorrt import CLIPEncoder\n\ + " > server/clip_server/__init__.py + + echo -e "\ + jtype: CLIPEncoder\n\ + metas:\n\ + py_modules:\n\ + - clip_server/__init__.py\n\ + " > server/config.yml + + echo -e "\ + manifest_version: 1\n\ + name: CLIPTensorRTEncoder\n\ + description: Embed images and sentences into fixed-length vectors with CLIP\n\ + url: https://github.com/jina-ai/clip-as-service\n\ + keywords: [clip, clip-model, clip-as-service, onnx, tensorrt]\n\ + " > server/manifest.yml + + python scripts/get-requirements.py tensorrt server/requirements.txt + + cp Dockerfiles/tensorrt.Dockerfile server/Dockerfile + + exec_name=`yq -r .name server/manifest.yml` + echo executor name is $exec_name + + # FIXME: disable uploading at debugging + # JINA_AUTH_TOKEN=${{secrets.JINAHUB_TOKEN}} jina hub push --force $exec_name --secret ${{secrets.TENSORRT_EXEC_SECRET}} server ${{env.TAG_ALIAS}} diff --git a/.github/workflows/force-release.yml b/.github/workflows/force-release.yml index 96ea1488a..e16d287f8 100644 --- a/.github/workflows/force-release.yml +++ b/.github/workflows/force-release.yml @@ -56,7 +56,20 @@ jobs: uses: benc-uk/workflow-dispatch@v1 with: workflow: Manual Docker Build - inputs: '{ "release_token": "${{ env.release_token }}", "triggered_by": "MANUAL"}' + inputs: '{ "release_token": "${{ env.release_token }}", "triggered_by": "CD"}' token: ${{ secrets.JINA_DEV_BOT }} env: release_token: ${{ secrets.CAS_RELEASE_TOKEN }} + + hub-release: + needs: token-check + runs-on: ubuntu-latest + steps: + - name: upload executors to hub + uses: benc-uk/workflow-dispatch@v1 + with: + workflow: Manual Hub Push + inputs: '{ "release_token": "${{ env.release_token }}", "triggered_by": "CD"}' + token: ${{ secrets.JINA_DEV_BOT }} + env: + release_token: ${{ secrets.CAS_RELEASE_TOKEN }} \ No newline at end of file diff --git a/Dockerfiles/base.Dockerfile b/Dockerfiles/base.Dockerfile index afbdc69b1..665320798 100644 --- a/Dockerfiles/base.Dockerfile +++ b/Dockerfiles/base.Dockerfile @@ -1,9 +1,8 @@ # !!! An ARG declared before a FROM is outside of a build stage, so it canโ€™t be used in any instruction after a FROM -ARG JINA_VERSION=3.3.25 +ARG JINA_VERSION=3.6.0 FROM jinaai/jina:${JINA_VERSION}-py38-standard -ARG PIP_TAG ARG BACKEND_TAG=torch # constant, wont invalidate cache @@ -18,20 +17,20 @@ LABEL org.opencontainers.image.vendor="Jina AI Limited" \ RUN pip3 install --no-cache-dir torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu # copy will almost always invalid the cache -COPY . /clip-as-service/ +COPY . /clip_server/ RUN echo "\ jtype: CLIPEncoder\n\ metas:\n\ py_modules:\n\ - - server/clip_server/executors/clip_$BACKEND_TAG.py\n\ + - clip_server/executors/clip_$BACKEND_TAG.py\n\ " > /tmp/config.yml -RUN cd /clip-as-service && \ - if [ -n "$PIP_TAG" ]; then pip3 install --no-cache-dir server/"[$PIP_TAG]" ; fi && \ - pip3 install --no-cache-dir "server/" +RUN cd /clip_server && \ + if [ "$BACKEND_TAG" != "torch" ]; then pip3 install --no-cache-dir "./[$BACKEND_TAG]" ; fi && \ + pip3 install --no-cache-dir . -WORKDIR /clip-as-service +WORKDIR /clip_server ENTRYPOINT ["jina", "executor", "--uses", "/tmp/config.yml"] diff --git a/Dockerfiles/cuda.Dockerfile b/Dockerfiles/cuda.Dockerfile index dc6261936..ba870a2c2 100644 --- a/Dockerfiles/cuda.Dockerfile +++ b/Dockerfiles/cuda.Dockerfile @@ -3,8 +3,7 @@ ARG CUDA_VERSION=11.4.2 FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-cudnn8-runtime-ubuntu20.04 ENV DEBIAN_FRONTEND=noninteractive -ARG JINA_VERSION=3.3.25 -ARG PIP_TAG +ARG JINA_VERSION=3.6.0 ARG BACKEND_TAG=torch RUN apt-get update && apt-get install -y --no-install-recommends \ @@ -17,7 +16,7 @@ RUN python3 -m pip install --default-timeout=1000 --no-cache-dir "jina[standard] RUN python3 -m pip install nvidia-pyindex # copy will almost always invalid the cache -COPY . /clip-as-service/ +COPY . /clip_server/ RUN echo "\ @@ -26,14 +25,14 @@ with:\n\ device: cuda\n\ metas:\n\ py_modules:\n\ - - server/clip_server/executors/clip_$BACKEND_TAG.py\n\ + - clip_server/executors/clip_$BACKEND_TAG.py\n\ " > /tmp/config.yml -RUN cd /clip-as-service && \ - if [ -n "${PIP_TAG}" ]; then python3 -m pip install --no-cache-dir server/"[${PIP_TAG}]" ; fi && \ - python3 -m pip install --no-cache-dir "server/" +RUN cd /clip_server && \ + if [ "${BACKEND_TAG}" != "torch" ]; then python3 -m pip install --no-cache-dir "./[${BACKEND_TAG}]" ; fi && \ + python3 -m pip install --no-cache-dir . -WORKDIR /clip-as-service +WORKDIR /clip_server ENTRYPOINT ["jina", "executor", "--uses", "/tmp/config.yml"] diff --git a/Dockerfiles/tensorrt.Dockerfile b/Dockerfiles/tensorrt.Dockerfile index a2a7fbf9c..8e4e234f9 100644 --- a/Dockerfiles/tensorrt.Dockerfile +++ b/Dockerfiles/tensorrt.Dockerfile @@ -4,27 +4,25 @@ ARG TENSORRT_VERSION=22.04 FROM nvcr.io/nvidia/tensorrt:${TENSORRT_VERSION}-py3 -ARG JINA_VERSION=3.3.25 -ARG PIP_VERSION +ARG JINA_VERSION=3.6.0 +ARG BACKEND_TAG=tensorrt RUN pip3 install --default-timeout=1000 --no-cache-dir torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113 RUN pip3 -m pip install --default-timeout=1000 --no-cache-dir "jina[standard]==${JINA_VERSION}" - # copy will almost always invalid the cache -COPY . /clip-as-service/ - -RUN pip3 install --no-cache-dir "server/[tensorrt]" +COPY . /clip_server/ RUN echo '\ jtype: CLIPEncoder\n\ metas:\n\ py_modules:\n\ - - server/clip_server/executors/clip_${{ env.ENGINE }}.py\n\ + - clip_server/executors/clip_$BACKEND_TAG.py\n\ ' > /tmp/config.yml +RUN cd /clip_server/ && python3 -m pip install --no-cache-dir "./[$BACKEND_TAG]" -WORKDIR /clip-as-service +WORKDIR /clip_server ENTRYPOINT ["jina", "executor", "--uses", "/tmp/config.yml"] diff --git a/scripts/get-requirements.py b/scripts/get-requirements.py new file mode 100644 index 000000000..c17066694 --- /dev/null +++ b/scripts/get-requirements.py @@ -0,0 +1,12 @@ +## under clip-as-service root dir +# python scripts/get-requirments.py $PIP_TAG /path/to/requirements.txt + +import sys +from distutils.core import run_setup + +result = run_setup("./server/setup.py", stop_after="init") + +with open(sys.argv[2], 'w') as fp: + fp.write('\n'.join(result.install_requires) + '\n') + if sys.argv[1]: + fp.write('\n'.join(result.extras_require[sys.argv[1]]) + '\n') diff --git a/server/clip_server/executors/clip_hg.py b/server/clip_server/executors/clip_hg.py index a5edab62e..bc98266d2 100644 --- a/server/clip_server/executors/clip_hg.py +++ b/server/clip_server/executors/clip_hg.py @@ -5,7 +5,7 @@ import numpy as np import torch from transformers import CLIPFeatureExtractor, CLIPModel, CLIPTokenizer -from clip_server.executors.helper import ( +from .helper import ( split_img_txt_da, set_rank, ) diff --git a/server/clip_server/executors/clip_onnx.py b/server/clip_server/executors/clip_onnx.py index deed22328..db38cb647 100644 --- a/server/clip_server/executors/clip_onnx.py +++ b/server/clip_server/executors/clip_onnx.py @@ -4,14 +4,14 @@ from typing import Optional, Dict import onnxruntime as ort -from clip_server.executors.helper import ( +from .helper import ( split_img_txt_da, preproc_image, preproc_text, set_rank, ) -from clip_server.model import clip -from clip_server.model.clip_onnx import CLIPOnnxModel +from ..model import clip +from ..model.clip_onnx import CLIPOnnxModel from jina import Executor, requests, DocumentArray diff --git a/server/clip_server/executors/clip_tensorrt.py b/server/clip_server/executors/clip_tensorrt.py index 5dc9af251..c8dba1873 100644 --- a/server/clip_server/executors/clip_tensorrt.py +++ b/server/clip_server/executors/clip_tensorrt.py @@ -2,14 +2,14 @@ from typing import Dict import numpy as np -from clip_server.executors.helper import ( +from .helper import ( split_img_txt_da, preproc_image, preproc_text, set_rank, ) -from clip_server.model import clip -from clip_server.model.clip_trt import CLIPTensorRTModel +from ..model import clip +from ..model.clip_trt import CLIPTensorRTModel from jina import Executor, requests, DocumentArray diff --git a/server/clip_server/executors/clip_torch.py b/server/clip_server/executors/clip_torch.py index a4701004a..b123ac138 100644 --- a/server/clip_server/executors/clip_torch.py +++ b/server/clip_server/executors/clip_torch.py @@ -5,13 +5,13 @@ import numpy as np import torch -from clip_server.executors.helper import ( +from .helper import ( split_img_txt_da, preproc_image, preproc_text, set_rank, ) -from clip_server.model import clip +from ..model import clip from jina import Executor, requests, DocumentArray diff --git a/server/clip_server/executors/helper.py b/server/clip_server/executors/helper.py index 4e1ddecb3..36d6e3194 100644 --- a/server/clip_server/executors/helper.py +++ b/server/clip_server/executors/helper.py @@ -4,7 +4,7 @@ from docarray import Document, DocumentArray from docarray.math.distance.numpy import cosine -from clip_server.model import clip +from ..model import clip def numpy_softmax(x: 'np.ndarray', axis: int = -1) -> 'np.ndarray': diff --git a/server/clip_server/model/clip_trt.py b/server/clip_server/model/clip_trt.py index 0ae5f6b7c..bfcb7c7c6 100644 --- a/server/clip_server/model/clip_trt.py +++ b/server/clip_server/model/clip_trt.py @@ -4,7 +4,7 @@ import tensorrt as trt from tensorrt.tensorrt import Logger, Runtime - from clip_server.model.trt_utils import load_engine, build_engine, save_engine + from .trt_utils import load_engine, build_engine, save_engine except ImportError: raise ImportError( "It seems that TensorRT is not yet installed. " @@ -55,7 +55,7 @@ def start_engines(self): f'The engine plan file is generated on an incompatible device, expecting compute {compute_capacity} ' 'got compute 8.6, will rebuild the TensorRT engine.' ) - from clip_server.model.clip_onnx import CLIPOnnxModel + from .clip_onnx import CLIPOnnxModel onnx_model = CLIPOnnxModel(self._name)