From 4d069a84ac0414059acce322f00815bf0cd12536 Mon Sep 17 00:00:00 2001
From: Ziniu Yu <ziniuyu@gmail.com>
Date: Wed, 15 Jun 2022 18:00:14 +0800
Subject: [PATCH] feat: upload torch executor (#723)

* feat: add hub push runner

* fix: hub push yaml

* fix: hub push yaml

* fix: debug script

* fix: debug script

* fix: debug script

* fix: debug script

* fix: debug script

* fix: debug script

* fix: debug script

* fix: debug script

* fix: debug script

* fix: comment manifest

* fix: revert manifest

* fix: use relative import

* fix: change base folder

* fix: hub push

* fix: bumb jina version

* fix: get requirments.txt

* fix: turnon workflow on PR

* fix: update dockerfile

* fix: error

* fix: executor name

* fix: use jinahub auth token

* fix: test torch upload

* fix: docker

* fix: upload gpu executor

* fix: gpu tag

* fix: gpu tag

* feat: upload onnx executor

* fix: debug onnx upload

* fix: debug onnx upload

* fix: minor revision

* fix: add torch exec readme

* fix: add onnx exec readme

* chore: update exec readme

* fix: update readme

* chore: update readme

* chore: onnx readme

* chore: update readme

* docs: fix batch_size

* docs: fix batch_size

* chore: updates

* chore: upload pytorch and onnx runtime based executors

* fix: use relative imports

Co-authored-by: numb3r3 <wangfelix87@gmail.com>
---
 .github/README-exec/onnx.readme.md            | 177 ++++++++++++++++
 .github/README-exec/torch.readme.md           | 179 ++++++++++++++++
 .github/workflows/force-docker-build.yml      |   8 +-
 .github/workflows/force-hub-push.yml          | 197 ++++++++++++++++++
 .github/workflows/force-release.yml           |  15 +-
 Dockerfiles/base.Dockerfile                   |  15 +-
 Dockerfiles/cuda.Dockerfile                   |  15 +-
 Dockerfiles/tensorrt.Dockerfile               |  14 +-
 scripts/get-requirements.py                   |  12 ++
 server/clip_server/executors/clip_hg.py       |   2 +-
 server/clip_server/executors/clip_onnx.py     |   6 +-
 server/clip_server/executors/clip_tensorrt.py |   6 +-
 server/clip_server/executors/clip_torch.py    |   4 +-
 server/clip_server/executors/helper.py        |   2 +-
 server/clip_server/model/clip_trt.py          |   4 +-
 15 files changed, 614 insertions(+), 42 deletions(-)
 create mode 100644 .github/README-exec/onnx.readme.md
 create mode 100644 .github/README-exec/torch.readme.md
 create mode 100644 .github/workflows/force-hub-push.yml
 create mode 100644 scripts/get-requirements.py

diff --git a/.github/README-exec/onnx.readme.md b/.github/README-exec/onnx.readme.md
new file mode 100644
index 000000000..958e034b1
--- /dev/null
+++ b/.github/README-exec/onnx.readme.md
@@ -0,0 +1,177 @@
+# CLIPOnnxEncoder
+
+**CLIPOnnxEncoder** is the executor implemented in [clip-as-service](https://github.com/jina-ai/clip-as-service). 
+It serves OpenAI released [CLIP](https://github.com/openai/CLIP) models with ONNX runtime (🚀 **3x** speed up). 
+The introduction of the CLIP model [can be found here](https://openai.com/blog/clip/).
+
+- 🔀 **Automatic**: Auto-detect image and text documents depending on their content.
+- ⚡ **Efficiency**: Faster CLIP model inference on CPU and GPU via ONNX runtime. 
+- 📈 **Observability**: Monitoring the serving via Prometheus and Grafana (see [Usage Guide](https://docs.jina.ai/how-to/monitoring/#deploying-locally)).
+
+
+## Model support
+
+Open AI has released 9 models so far. `ViT-B/32` is used as default model. Please also note that different model give **different size of output dimensions**. 
+
+| Model          | ONNX   | Output dimension | 
+|----------------|-----| --- |
+| RN50           | ✅   | 1024 | 
+| RN101          | ✅   | 512 | 
+| RN50x4         | ✅   | 640 |
+| RN50x16        | ✅   | 768 |
+| RN50x64        | ✅   | 1024 |
+| ViT-B/32       | ✅   | 512 |
+| ViT-B/16       | ✅   | 512 |
+| ViT-L/14       | ✅   | 768 |
+| ViT-L/14@336px | ✅   | 768 |
+
+## Usage
+
+### Use in Jina Flow 
+
+- **via Docker image (recommended)**
+
+```python
+from jina import Flow
+from docarray import Document
+import numpy as np
+
+f = Flow().add(
+    uses='jinahub+docker://CLIPOnnxEncoder',
+)
+```
+
+- **via source code**
+
+```python
+from jina import Flow
+from docarray import Document
+import numpy as np
+
+f = Flow().add(
+    uses='jinahub://CLIPOnnxEncoder',
+)
+```
+
+You can set the following parameters via `with`:
+
+| Parameter | Description                                                                                                                   |
+|-----------|-------------------------------------------------------------------------------------------------------------------------------|
+| `name`    | Model weights, default is `ViT-B/32`. Support all OpenAI released pretrained models.                                          |
+| `num_worker_preprocess` | The number of CPU workers for image & text prerpocessing, default 4.                                                          | 
+| `minibatch_size` | The size of a minibatch for CPU preprocessing and GPU encoding, default 16. Reduce the size of it if you encounter OOM on GPU. |
+| `device`  | `cuda` or `cpu`. Default is `None` means auto-detect.                                                                         |
+
+### Encoding
+
+Encoding here means getting the fixed-length vector representation of a sentence or image.
+
+```python
+from jina import Flow
+from docarray import Document, DocumentArray
+
+da = DocumentArray(
+    [
+        Document(text='she smiled, with pain'),
+        Document(uri='apple.png'),
+        Document(uri='apple.png').load_uri_to_image_tensor(),
+        Document(blob=open('apple.png', 'rb').read()),
+        Document(uri='https://clip-as-service.jina.ai/_static/favicon.png'),
+        Document(
+            uri='data:image/gif;base64,R0lGODlhEAAQAMQAAORHHOVSKudfOulrSOp3WOyDZu6QdvCchPGolfO0o/XBs/fNwfjZ0frl3/zy7////wAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACH5BAkAABAALAAAAAAQABAAAAVVICSOZGlCQAosJ6mu7fiyZeKqNKToQGDsM8hBADgUXoGAiqhSvp5QAnQKGIgUhwFUYLCVDFCrKUE1lBavAViFIDlTImbKC5Gm2hB0SlBCBMQiB0UjIQA7'
+        ),
+    ]
+)
+
+f = Flow().add(
+    uses='jinahub+docker://CLIPTorchEncoder',
+)
+with f:
+    f.post(on='/', inputs=da)
+    da.summary()
+```
+
+From the output, you will see all the text and image docs have `embedding` attached.
+
+```text
+╭──────────────────────────── Documents Summary ─────────────────────────────╮
+│                                                                            │
+│   Length                        6                                          │
+│   Homogenous Documents          False                                      │
+│   4 Documents have attributes   ('id', 'mime_type', 'uri', 'embedding')    │
+│   1 Document has attributes     ('id', 'mime_type', 'text', 'embedding')   │
+│   1 Document has attributes     ('id', 'embedding')                        │
+│                                                                            │
+╰────────────────────────────────────────────────────────────────────────────╯
+╭────────────────────── Attributes Summary ───────────────────────╮
+│                                                                 │
+│   Attribute   Data type      #Unique values   Has empty value   │
+│  ─────────────────────────────────────────────────────────────  │
+│   embedding   ('ndarray',)   6                False             │
+│   id          ('str',)       6                False             │
+│   mime_type   ('str',)       5                False             │
+│   text        ('str',)       2                False             │
+│   uri         ('str',)       4                False             │
+│                                                                 │
+╰─────────────────────────────────────────────────────────────────╯
+```
+
+👉 Access the embedding playground in **clip-as-service** [doc](https://clip-as-service.jina.ai/playground/embedding), type sentence or image URL and see **live embedding**!
+
+### Ranking
+
+One can also rank cross-modal matches via `/rank` endpoint. 
+First construct a *cross-modal* Document where the root contains an image and `.matches` contain sentences to rerank. 
+
+```python
+from docarray import Document
+
+d = Document(
+    uri='rerank.png',
+    matches=[
+        Document(text=f'a photo of a {p}')
+        for p in (
+            'control room',
+            'lecture room',
+            'conference room',
+            'podium indoor',
+            'television studio',
+        )
+    ],
+)
+```
+
+Then send the request via `/rank` endpoint:
+
+```python
+f = Flow().add(
+    uses='jinahub+docker://CLIPTorchEncoder',
+)
+with f:
+    r = f.post(on='/rank', inputs=da)
+    print(r['@m', ['text', 'scores__clip_score__value']])
+```
+
+Finally, in the return you can observe the matches are re-ranked according to `.scores['clip_score']`:
+
+```bash
+[['a photo of a television studio', 'a photo of a conference room', 'a photo of a lecture room', 'a photo of a control room', 'a photo of a podium indoor'], 
+[0.9920725226402283, 0.006038925610482693, 0.0009973491542041302, 0.00078492151806131, 0.00010626466246321797]]
+```
+
+One can also construct `text-to-image` rerank as below:
+
+```python
+from docarray import Document
+
+d = Document(
+    text='a photo of conference room',
+    matches=[
+        Document(uri='https://picsum.photos/300'),
+        Document(uri='https://picsum.photos/id/331/50'),
+        Document(uri='https://clip-as-service.jina.ai/_static/favicon.png'),
+    ],
+)
+```
+
+👉 Access the ranking playground in **clip-as-service** [doc](https://clip-as-service.jina.ai/playground/reasoning/). Just input the reasoning texts as prompts, the server will rank the prompts and return sorted prompts with scores.
\ No newline at end of file
diff --git a/.github/README-exec/torch.readme.md b/.github/README-exec/torch.readme.md
new file mode 100644
index 000000000..ae6e99a6f
--- /dev/null
+++ b/.github/README-exec/torch.readme.md
@@ -0,0 +1,179 @@
+# CLIPTorchEncoder
+
+**CLIPTorchEncoder** is the executor implemented in [clip-as-service](https://github.com/jina-ai/clip-as-service). 
+It serves OpenAI released [CLIP](https://github.com/openai/CLIP) models with PyTorch runtime. 
+The introduction of the CLIP model [can be found here](https://openai.com/blog/clip/).
+
+- 🔀 **Automatic**: Auto-detect image and text documents depending on their content.
+- ⚡ **Efficiency**: Faster CLIP model inference on CPU and GPU via leveraging the best practices. 
+- 📈 **Observability**: Monitoring the serving via Prometheus and Grafana (see [Usage Guide](https://docs.jina.ai/how-to/monitoring/#deploying-locally)).
+
+With advances of ONNX runtime, you can use `CLIPOnnxEncoder` (see [link](https://hub.jina.ai/executor/2a7auwg2)) instead to achieve **3x** model inference speed up.   
+
+## Model support
+
+Open AI has released **9 models** so far. `ViT-B/32` is used as default model. Please also note that different models give **the different sizes of output dimensions**. 
+
+| Model          | PyTorch | Output dimension | 
+|----------------|---------|------------------|
+| RN50           | ✅       | 1024             | 
+| RN101          | ✅       | 512              | 
+| RN50x4         | ✅       | 640              |
+| RN50x16        | ✅       | 768              |
+| RN50x64        | ✅       | 1024             |
+| ViT-B/32       | ✅       | 512              |
+| ViT-B/16       | ✅       | 512              |
+| ViT-L/14       | ✅       | 768              |
+| ViT-L/14@336px | ✅       | 768              |
+
+## Usage
+
+### Use in Jina Flow 
+
+- **via Docker image (recommended)**
+
+```python
+from jina import Flow
+from docarray import Document
+import numpy as np
+
+f = Flow().add(
+    uses='jinahub+docker://CLIPTorchEncoder',
+)
+```
+
+- **via source code**
+
+```python
+from jina import Flow
+from docarray import Document
+import numpy as np
+
+f = Flow().add(
+    uses='jinahub://CLIPTorchEncoder',
+)
+```
+
+You can set the following parameters via `with`:
+
+| Parameter               | Description                                                                                                                    |
+|-------------------------|--------------------------------------------------------------------------------------------------------------------------------|
+| `name`                  | Model weights, default is `ViT-B/32`. Support all OpenAI released pretrained models.                                           |
+| `num_worker_preprocess` | The number of CPU workers for image & text prerpocessing, default 4.                                                           | 
+| `minibatch_size`        | The size of a minibatch for CPU preprocessing and GPU encoding, default 32. Reduce the size of it if you encounter OOM on GPU. |
+| `device`                | `cuda` or `cpu`. Default is `None` means auto-detect.                                                                          |
+| `jit`                   | If to enable Torchscript JIT, default is `False`.                                                                              |
+
+### Encoding
+
+Encoding here means getting the fixed-length vector representation of a sentence or image.
+
+```python
+from jina import Flow
+from docarray import Document, DocumentArray
+
+da = DocumentArray(
+    [
+        Document(text='she smiled, with pain'),
+        Document(uri='apple.png'),
+        Document(uri='apple.png').load_uri_to_image_tensor(),
+        Document(blob=open('apple.png', 'rb').read()),
+        Document(uri='https://clip-as-service.jina.ai/_static/favicon.png'),
+        Document(
+            uri='data:image/gif;base64,R0lGODlhEAAQAMQAAORHHOVSKudfOulrSOp3WOyDZu6QdvCchPGolfO0o/XBs/fNwfjZ0frl3/zy7////wAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACH5BAkAABAALAAAAAAQABAAAAVVICSOZGlCQAosJ6mu7fiyZeKqNKToQGDsM8hBADgUXoGAiqhSvp5QAnQKGIgUhwFUYLCVDFCrKUE1lBavAViFIDlTImbKC5Gm2hB0SlBCBMQiB0UjIQA7'
+        ),
+    ]
+)
+
+f = Flow().add(
+    uses='jinahub+docker://CLIPTorchEncoder',
+)
+with f:
+    f.post(on='/', inputs=da)
+    da.summary()
+```
+
+From the output, you will see all the text and image docs have `embedding` attached.
+
+```text
+╭──────────────────────────── Documents Summary ─────────────────────────────╮
+│                                                                            │
+│   Length                        6                                          │
+│   Homogenous Documents          False                                      │
+│   4 Documents have attributes   ('id', 'mime_type', 'uri', 'embedding')    │
+│   1 Document has attributes     ('id', 'mime_type', 'text', 'embedding')   │
+│   1 Document has attributes     ('id', 'embedding')                        │
+│                                                                            │
+╰────────────────────────────────────────────────────────────────────────────╯
+╭────────────────────── Attributes Summary ───────────────────────╮
+│                                                                 │
+│   Attribute   Data type      #Unique values   Has empty value   │
+│  ─────────────────────────────────────────────────────────────  │
+│   embedding   ('ndarray',)   6                False             │
+│   id          ('str',)       6                False             │
+│   mime_type   ('str',)       5                False             │
+│   text        ('str',)       2                False             │
+│   uri         ('str',)       4                False             │
+│                                                                 │
+╰─────────────────────────────────────────────────────────────────╯
+```
+
+👉 Access the embedding playground in **clip-as-service** [doc](https://clip-as-service.jina.ai/playground/embedding), type sentence or image URL and see **live embedding**!
+
+### Ranking
+
+One can also rank cross-modal matches via `/rank` endpoint. 
+First construct a *cross-modal* Document where the root contains an image and `.matches` contain sentences to rerank. 
+
+```python
+from docarray import Document
+
+d = Document(
+    uri='rerank.png',
+    matches=[
+        Document(text=f'a photo of a {p}')
+        for p in (
+            'control room',
+            'lecture room',
+            'conference room',
+            'podium indoor',
+            'television studio',
+        )
+    ],
+)
+```
+
+Then send the request via `/rank` endpoint:
+
+```python
+f = Flow().add(
+    uses='jinahub+docker://CLIPTorchEncoder',
+)
+with f:
+    r = f.post(on='/rank', inputs=da)
+    print(r['@m', ['text', 'scores__clip_score__value']])
+```
+
+Finally, you can observe the matches are re-ranked based on `.scores['clip_score']`:
+
+```bash
+[['a photo of a television studio', 'a photo of a conference room', 'a photo of a lecture room', 'a photo of a control room', 'a photo of a podium indoor'], 
+[0.9920725226402283, 0.006038925610482693, 0.0009973491542041302, 0.00078492151806131, 0.00010626466246321797]]
+```
+
+One can also construct `text-to-image` rerank as below:
+
+```python
+from docarray import Document
+
+d = Document(
+    text='a photo of conference room',
+    matches=[
+        Document(uri='https://picsum.photos/300'),
+        Document(uri='https://picsum.photos/id/331/50'),
+        Document(uri='https://clip-as-service.jina.ai/_static/favicon.png'),
+    ],
+)
+```
+
+👉 Access the ranking playground in **clip-as-service** [doc](https://clip-as-service.jina.ai/playground/reasoning/). Just input the reasoning texts as prompts, the server will rank the prompts and return sorted prompts with scores.
\ No newline at end of file
diff --git a/.github/workflows/force-docker-build.yml b/.github/workflows/force-docker-build.yml
index e30aba6eb..eb9a8f9d1 100644
--- a/.github/workflows/force-docker-build.yml
+++ b/.github/workflows/force-docker-build.yml
@@ -21,7 +21,7 @@ jobs:
         env:
           release_token: ${{ secrets.CAS_RELEASE_TOKEN }}
 
-  regular-release:
+  docker-release:
     needs: token-check
     runs-on: ubuntu-latest
     strategy:
@@ -104,7 +104,7 @@ jobs:
         if: ${{ matrix.engine_tag == '' && matrix.pip_tag != 'tensorrt' }}
         uses: docker/build-push-action@v2
         with:
-          context: .
+          context: server
           file: Dockerfiles/base.Dockerfile
           platforms: linux/amd64
           cache-from: type=registry,ref=jinaai/clip_executor:latest
@@ -116,13 +116,12 @@ jobs:
             CAS_VERSION=${{env.CAS_VERSION}}
             VCS_REF=${{env.VCS_REF}}
             BACKEND_TAG=${{env.BACKEND_TAG}}
-            PIP_TAG=${{matrix.pip_tag}}
       - name: CUDA Build and push
         id: cuda_docker_build
         if: ${{ matrix.engine_tag == 'cuda' }}
         uses: docker/build-push-action@v2
         with:
-          context: .
+          context: server
           file: Dockerfiles/cuda.Dockerfile
           platforms: linux/amd64
           cache-from: type=registry,ref=jinaai/clip_executor:latest-cuda
@@ -134,4 +133,3 @@ jobs:
             CAS_VERSION=${{env.CAS_VERSION}}
             VCS_REF=${{env.VCS_REF}}
             BACKEND_TAG=${{env.BACKEND_TAG}}
-            PIP_TAG=${{matrix.pip_tag}}
diff --git a/.github/workflows/force-hub-push.yml b/.github/workflows/force-hub-push.yml
new file mode 100644
index 000000000..ad3bfec99
--- /dev/null
+++ b/.github/workflows/force-hub-push.yml
@@ -0,0 +1,197 @@
+name: Manual Hub Push
+
+on:
+  workflow_dispatch:
+    inputs:
+      release_token:
+        description: 'Your release token'
+        required: true
+      triggered_by:
+        description: 'CD | TAG | MANUAL'
+        required: false
+        default: MANUAL
+
+#on:
+#  pull_request:
+
+jobs:
+  token-check:
+    runs-on: ubuntu-latest
+    steps:
+      - run: echo "success!"
+        if: "${{ github.event.inputs.release_token }} == ${{ env.release_token }}"
+        env:
+          release_token: ${{ secrets.CAS_RELEASE_TOKEN }}
+
+  hub-release:
+    needs: token-check
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set envs and versions
+        run: |
+          VCS_REF=${{ github.ref }}
+          echo "VCS_REF=$VCS_REF" >> $GITHUB_ENV
+          echo "Will push $VCS_REF"
+
+          CAS_VERSION=$(sed -n '/^__version__/p' ./server/clip_server/__init__.py | cut -d \' -f2)
+          V_CAS_VERSION=v${CAS_VERSION}
+          CAS_MINOR_VERSION=${CAS_VERSION%.*}
+          CAS_MAJOR_VERSION=${CAS_MINOR_VERSION%.*}
+
+          if [[ "${{ github.event.inputs.triggered_by }}" == "CD" ]]; then
+            # on every CD release
+            echo "TAG_ALIAS=\
+                            -t latest \
+                            " >> $GITHUB_ENV
+            echo "GPU_TAG_ALIAS=\
+                            -t latest-gpu \
+                            " >> $GITHUB_ENV
+
+          elif [[ "${{ github.event.inputs.triggered_by }}" == "TAG" ]]; then
+            # on every tag release
+            echo "TAG_ALIAS=\
+                            -t latest \
+                            -t ${CAS_VERSION} \
+                            -t ${CAS_MINOR_VERSION} \
+                            -t ${CAS_MAJOR_VERSION} \
+                            " >> $GITHUB_ENV
+            echo "GPU_TAG_ALIAS=\
+                            -t latest-gpu \
+                            -t ${CAS_VERSION}-gpu \
+                            -t ${CAS_MINOR_VERSION}-gpu \
+                            -t ${CAS_MAJOR_VERSION}-gpu \
+                            " >> $GITHUB_ENV
+
+          elif [[ "${{ github.event.inputs.triggered_by }}" == "MANUAL" ]]; then
+            # on every manual release
+            echo "TAG_ALIAS=\
+                            -t ${CAS_VERSION} \
+                            " >> $GITHUB_ENV
+            echo "GPU_TAG_ALIAS=\
+                            -t ${CAS_VERSION}-gpu \
+                            " >> $GITHUB_ENV
+          else
+            echo "TAG_ALIAS=\
+                            -t latest \
+                            " >> $GITHUB_ENV
+            echo "GPU_TAG_ALIAS=\
+                            -t latest-gpu \
+                            " >> $GITHUB_ENV
+          fi
+
+          echo "CAS_VERSION=${CAS_VERSION}" >> $GITHUB_ENV
+
+      - name: Prepare enviroment
+        run: |
+          python -m pip install --upgrade jina yq
+
+      - name: Push Torch Executor
+        id: push_torch_executor
+        run: |
+          # FIX the import issue
+          echo -e "\
+          __version__ = '$CAS_VERSION'
+          from .executors.clip_torch import CLIPEncoder\n\
+          " > server/clip_server/__init__.py
+                    
+          echo -e "\
+          jtype: CLIPEncoder\n\
+          metas:\n\
+            py_modules:\n\
+              - clip_server/__init__.py\n\
+          " > server/config.yml
+          
+          echo -e "\
+          manifest_version: 1\n\
+          name: CLIPTorchEncoder\n\
+          description: Embed images and sentences into fixed-length vectors with CLIP\n\
+          url: https://github.com/jina-ai/clip-as-service\n\
+          keywords: [clip, clip-model, clip-as-service, pytorch]\n\
+          " > server/manifest.yml
+          
+          python scripts/get-requirements.py "" server/requirements.txt
+          
+          cp .github/README-exec/torch.readme.md server/README.md
+          
+          exec_name=`yq -r .name server/manifest.yml`
+          echo executor name is $exec_name
+
+          cp Dockerfiles/base.Dockerfile server/Dockerfile
+          JINA_AUTH_TOKEN=${{secrets.JINAHUB_TOKEN}} jina hub push --force $exec_name --secret ${{secrets.TORCH_EXEC_SECRET}} server ${{env.TAG_ALIAS}}
+
+          cp Dockerfiles/cuda.Dockerfile server/Dockerfile
+          JINA_AUTH_TOKEN=${{secrets.JINAHUB_TOKEN}} jina hub push --force $exec_name --secret ${{secrets.TORCH_EXEC_SECRET}} server ${{env.GPU_TAG_ALIAS}}
+
+      - name: Push Onnx Executor
+        id: push_onnx_executor
+        run: |
+          # FIX the import issue
+          echo -e "\
+          __version__ = '$CAS_VERSION'
+          from .executors.clip_onnx import CLIPEncoder\n\
+          " > server/clip_server/__init__.py
+          
+          echo -e "\
+          jtype: CLIPEncoder\n\
+          metas:\n\
+            py_modules:\n\
+              - clip_server/__init__.py\n\
+          " > server/config.yml
+          
+          echo -e "\
+          manifest_version: 1\n\
+          name: CLIPOnnxEncoder\n\
+          description: Embed images and sentences into fixed-length vectors with CLIP\n\
+          url: https://github.com/jina-ai/clip-as-service\n\
+          keywords: [clip, clip-model, clip-as-service, onnx, onnx-runtime]\n\
+          " > server/manifest.yml
+          
+          python scripts/get-requirements.py onnx server/requirements.txt
+          
+          cp .github/README-exec/onnx.readme.md server/README.md
+          
+          exec_name=`yq -r .name server/manifest.yml`
+          echo executor name is $exec_name
+          
+          cp Dockerfiles/base.Dockerfile server/Dockerfile
+          sed -i 's/ARG BACKEND_TAG=torch/ARG BACKEND_TAG=onnx/g' server/Dockerfile          
+          JINA_AUTH_TOKEN=${{secrets.JINAHUB_TOKEN}} jina hub push --force $exec_name --secret ${{secrets.ONNX_EXEC_SECRET}} server ${{env.TAG_ALIAS}}
+          
+          cp Dockerfiles/cuda.Dockerfile server/Dockerfile
+          sed -i 's/ARG BACKEND_TAG=torch/ARG BACKEND_TAG=onnx/g' server/Dockerfile
+          JINA_AUTH_TOKEN=${{secrets.JINAHUB_TOKEN}} jina hub push --force $exec_name --secret ${{secrets.ONNX_EXEC_SECRET}} server ${{env.GPU_TAG_ALIAS}}
+
+      - name: Push TensorRT Executor
+        id: push_tensorrt_executor
+        run: |
+          # FIX the import issue
+          echo -e "\
+          __version__ = '$CAS_VERSION'
+          from .executors.clip_tensorrt import CLIPEncoder\n\
+          " > server/clip_server/__init__.py
+          
+          echo -e "\
+          jtype: CLIPEncoder\n\
+          metas:\n\
+            py_modules:\n\
+              - clip_server/__init__.py\n\
+          " > server/config.yml
+          
+          echo -e "\
+          manifest_version: 1\n\
+          name: CLIPTensorRTEncoder\n\
+          description: Embed images and sentences into fixed-length vectors with CLIP\n\
+          url: https://github.com/jina-ai/clip-as-service\n\
+          keywords: [clip, clip-model, clip-as-service, onnx, tensorrt]\n\
+          " > server/manifest.yml
+          
+          python scripts/get-requirements.py tensorrt server/requirements.txt
+          
+          cp Dockerfiles/tensorrt.Dockerfile server/Dockerfile
+          
+          exec_name=`yq -r .name server/manifest.yml`
+          echo executor name is $exec_name
+          
+          # FIXME: disable uploading at debugging
+          # JINA_AUTH_TOKEN=${{secrets.JINAHUB_TOKEN}} jina hub push --force $exec_name --secret ${{secrets.TENSORRT_EXEC_SECRET}} server ${{env.TAG_ALIAS}}
diff --git a/.github/workflows/force-release.yml b/.github/workflows/force-release.yml
index 96ea1488a..e16d287f8 100644
--- a/.github/workflows/force-release.yml
+++ b/.github/workflows/force-release.yml
@@ -56,7 +56,20 @@ jobs:
         uses: benc-uk/workflow-dispatch@v1
         with:
           workflow: Manual Docker Build
-          inputs: '{ "release_token": "${{ env.release_token }}", "triggered_by": "MANUAL"}'
+          inputs: '{ "release_token": "${{ env.release_token }}", "triggered_by": "CD"}'
           token: ${{ secrets.JINA_DEV_BOT }}
         env:
           release_token: ${{ secrets.CAS_RELEASE_TOKEN }}
+
+  hub-release:
+    needs: token-check
+    runs-on: ubuntu-latest
+    steps:
+      - name: upload executors to hub
+        uses: benc-uk/workflow-dispatch@v1
+        with:
+          workflow: Manual Hub Push
+          inputs: '{ "release_token": "${{ env.release_token }}", "triggered_by": "CD"}'
+          token: ${{ secrets.JINA_DEV_BOT }}
+        env:
+          release_token: ${{ secrets.CAS_RELEASE_TOKEN }}
\ No newline at end of file
diff --git a/Dockerfiles/base.Dockerfile b/Dockerfiles/base.Dockerfile
index afbdc69b1..665320798 100644
--- a/Dockerfiles/base.Dockerfile
+++ b/Dockerfiles/base.Dockerfile
@@ -1,9 +1,8 @@
 # !!! An ARG declared before a FROM is outside of a build stage, so it can’t be used in any instruction after a FROM
-ARG JINA_VERSION=3.3.25
+ARG JINA_VERSION=3.6.0
 
 FROM jinaai/jina:${JINA_VERSION}-py38-standard
 
-ARG PIP_TAG
 ARG BACKEND_TAG=torch
 
 # constant, wont invalidate cache
@@ -18,20 +17,20 @@ LABEL org.opencontainers.image.vendor="Jina AI Limited" \
 RUN pip3 install --no-cache-dir torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
 
 # copy will almost always invalid the cache
-COPY . /clip-as-service/
+COPY . /clip_server/
 
 RUN echo "\
 jtype: CLIPEncoder\n\
 metas:\n\
   py_modules:\n\
-    - server/clip_server/executors/clip_$BACKEND_TAG.py\n\
+    - clip_server/executors/clip_$BACKEND_TAG.py\n\
 " > /tmp/config.yml
 
-RUN cd /clip-as-service && \
-    if [ -n "$PIP_TAG" ]; then pip3 install --no-cache-dir server/"[$PIP_TAG]" ; fi && \
-    pip3 install --no-cache-dir "server/"
+RUN cd /clip_server && \
+    if [ "$BACKEND_TAG" != "torch" ]; then pip3 install --no-cache-dir "./[$BACKEND_TAG]" ; fi && \
+    pip3 install --no-cache-dir .
 
-WORKDIR /clip-as-service
+WORKDIR /clip_server
 
 
 ENTRYPOINT ["jina", "executor", "--uses", "/tmp/config.yml"]
diff --git a/Dockerfiles/cuda.Dockerfile b/Dockerfiles/cuda.Dockerfile
index dc6261936..ba870a2c2 100644
--- a/Dockerfiles/cuda.Dockerfile
+++ b/Dockerfiles/cuda.Dockerfile
@@ -3,8 +3,7 @@ ARG CUDA_VERSION=11.4.2
 FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-cudnn8-runtime-ubuntu20.04
 ENV DEBIAN_FRONTEND=noninteractive
 
-ARG JINA_VERSION=3.3.25
-ARG PIP_TAG
+ARG JINA_VERSION=3.6.0
 ARG BACKEND_TAG=torch
 
 RUN apt-get update && apt-get install -y --no-install-recommends \
@@ -17,7 +16,7 @@ RUN python3 -m pip install --default-timeout=1000 --no-cache-dir "jina[standard]
 RUN python3 -m pip install nvidia-pyindex
 
 # copy will almost always invalid the cache
-COPY . /clip-as-service/
+COPY . /clip_server/
 
 
 RUN echo "\
@@ -26,14 +25,14 @@ with:\n\
   device: cuda\n\
 metas:\n\
   py_modules:\n\
-    - server/clip_server/executors/clip_$BACKEND_TAG.py\n\
+    - clip_server/executors/clip_$BACKEND_TAG.py\n\
 " > /tmp/config.yml
 
-RUN cd /clip-as-service && \
-    if [ -n "${PIP_TAG}" ]; then python3 -m pip install --no-cache-dir server/"[${PIP_TAG}]" ; fi && \
-    python3 -m pip install --no-cache-dir "server/"
+RUN cd /clip_server && \
+    if [ "${BACKEND_TAG}" != "torch" ]; then python3 -m pip install --no-cache-dir "./[${BACKEND_TAG}]" ; fi && \
+    python3 -m pip install --no-cache-dir .
 
-WORKDIR /clip-as-service
+WORKDIR /clip_server
 
 ENTRYPOINT ["jina", "executor", "--uses", "/tmp/config.yml"]
 
diff --git a/Dockerfiles/tensorrt.Dockerfile b/Dockerfiles/tensorrt.Dockerfile
index a2a7fbf9c..8e4e234f9 100644
--- a/Dockerfiles/tensorrt.Dockerfile
+++ b/Dockerfiles/tensorrt.Dockerfile
@@ -4,27 +4,25 @@ ARG TENSORRT_VERSION=22.04
 
 FROM nvcr.io/nvidia/tensorrt:${TENSORRT_VERSION}-py3
 
-ARG JINA_VERSION=3.3.25
-ARG PIP_VERSION
+ARG JINA_VERSION=3.6.0
+ARG BACKEND_TAG=tensorrt
 
 RUN pip3 install --default-timeout=1000 --no-cache-dir torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113
 RUN pip3 -m pip install --default-timeout=1000 --no-cache-dir "jina[standard]==${JINA_VERSION}"
 
-
 # copy will almost always invalid the cache
-COPY . /clip-as-service/
-
-RUN pip3 install --no-cache-dir "server/[tensorrt]"
+COPY . /clip_server/
 
 RUN echo '\
 jtype: CLIPEncoder\n\
 metas:\n\
   py_modules:\n\
-    - server/clip_server/executors/clip_${{ env.ENGINE }}.py\n\
+    - clip_server/executors/clip_$BACKEND_TAG.py\n\
 ' > /tmp/config.yml
 
+RUN cd /clip_server/ && python3 -m pip install --no-cache-dir "./[$BACKEND_TAG]"
 
-WORKDIR /clip-as-service
+WORKDIR /clip_server
 
 ENTRYPOINT ["jina", "executor", "--uses", "/tmp/config.yml"]
 
diff --git a/scripts/get-requirements.py b/scripts/get-requirements.py
new file mode 100644
index 000000000..c17066694
--- /dev/null
+++ b/scripts/get-requirements.py
@@ -0,0 +1,12 @@
+## under clip-as-service root dir
+# python scripts/get-requirments.py $PIP_TAG /path/to/requirements.txt
+
+import sys
+from distutils.core import run_setup
+
+result = run_setup("./server/setup.py", stop_after="init")
+
+with open(sys.argv[2], 'w') as fp:
+    fp.write('\n'.join(result.install_requires) + '\n')
+    if sys.argv[1]:
+        fp.write('\n'.join(result.extras_require[sys.argv[1]]) + '\n')
diff --git a/server/clip_server/executors/clip_hg.py b/server/clip_server/executors/clip_hg.py
index a5edab62e..bc98266d2 100644
--- a/server/clip_server/executors/clip_hg.py
+++ b/server/clip_server/executors/clip_hg.py
@@ -5,7 +5,7 @@
 import numpy as np
 import torch
 from transformers import CLIPFeatureExtractor, CLIPModel, CLIPTokenizer
-from clip_server.executors.helper import (
+from .helper import (
     split_img_txt_da,
     set_rank,
 )
diff --git a/server/clip_server/executors/clip_onnx.py b/server/clip_server/executors/clip_onnx.py
index deed22328..db38cb647 100644
--- a/server/clip_server/executors/clip_onnx.py
+++ b/server/clip_server/executors/clip_onnx.py
@@ -4,14 +4,14 @@
 from typing import Optional, Dict
 
 import onnxruntime as ort
-from clip_server.executors.helper import (
+from .helper import (
     split_img_txt_da,
     preproc_image,
     preproc_text,
     set_rank,
 )
-from clip_server.model import clip
-from clip_server.model.clip_onnx import CLIPOnnxModel
+from ..model import clip
+from ..model.clip_onnx import CLIPOnnxModel
 from jina import Executor, requests, DocumentArray
 
 
diff --git a/server/clip_server/executors/clip_tensorrt.py b/server/clip_server/executors/clip_tensorrt.py
index 5dc9af251..c8dba1873 100644
--- a/server/clip_server/executors/clip_tensorrt.py
+++ b/server/clip_server/executors/clip_tensorrt.py
@@ -2,14 +2,14 @@
 from typing import Dict
 
 import numpy as np
-from clip_server.executors.helper import (
+from .helper import (
     split_img_txt_da,
     preproc_image,
     preproc_text,
     set_rank,
 )
-from clip_server.model import clip
-from clip_server.model.clip_trt import CLIPTensorRTModel
+from ..model import clip
+from ..model.clip_trt import CLIPTensorRTModel
 from jina import Executor, requests, DocumentArray
 
 
diff --git a/server/clip_server/executors/clip_torch.py b/server/clip_server/executors/clip_torch.py
index a4701004a..b123ac138 100644
--- a/server/clip_server/executors/clip_torch.py
+++ b/server/clip_server/executors/clip_torch.py
@@ -5,13 +5,13 @@
 
 import numpy as np
 import torch
-from clip_server.executors.helper import (
+from .helper import (
     split_img_txt_da,
     preproc_image,
     preproc_text,
     set_rank,
 )
-from clip_server.model import clip
+from ..model import clip
 from jina import Executor, requests, DocumentArray
 
 
diff --git a/server/clip_server/executors/helper.py b/server/clip_server/executors/helper.py
index 4e1ddecb3..36d6e3194 100644
--- a/server/clip_server/executors/helper.py
+++ b/server/clip_server/executors/helper.py
@@ -4,7 +4,7 @@
 from docarray import Document, DocumentArray
 from docarray.math.distance.numpy import cosine
 
-from clip_server.model import clip
+from ..model import clip
 
 
 def numpy_softmax(x: 'np.ndarray', axis: int = -1) -> 'np.ndarray':
diff --git a/server/clip_server/model/clip_trt.py b/server/clip_server/model/clip_trt.py
index 0ae5f6b7c..bfcb7c7c6 100644
--- a/server/clip_server/model/clip_trt.py
+++ b/server/clip_server/model/clip_trt.py
@@ -4,7 +4,7 @@
     import tensorrt as trt
     from tensorrt.tensorrt import Logger, Runtime
 
-    from clip_server.model.trt_utils import load_engine, build_engine, save_engine
+    from .trt_utils import load_engine, build_engine, save_engine
 except ImportError:
     raise ImportError(
         "It seems that TensorRT is not yet installed. "
@@ -55,7 +55,7 @@ def start_engines(self):
                 f'The engine plan file is generated on an incompatible device, expecting compute {compute_capacity} '
                 'got compute 8.6, will rebuild the TensorRT engine.'
             )
-            from clip_server.model.clip_onnx import CLIPOnnxModel
+            from .clip_onnx import CLIPOnnxModel
 
             onnx_model = CLIPOnnxModel(self._name)