From f96ce5433dc1ec473ae89e22f01520b93abc6071 Mon Sep 17 00:00:00 2001
From: Ziniu Yu <ziniuyu@gmail.com>
Date: Wed, 9 Nov 2022 12:22:58 +0800
Subject: [PATCH] fix: install transformers for executor docker images (#851)

* fix: install transformers for torch executor docker images

* docs: fix typo

* docs: fix typo

* fix: typo

* fix: bump jina version and install transformers in onnx

* fix: upper case onnx
---
 .github/README-exec/onnx.readme.md  | 36 +++++++++++++++++------------
 .github/README-exec/torch.readme.md |  4 +++-
 Dockerfiles/base.Dockerfile         |  4 ++--
 Dockerfiles/cuda.Dockerfile         |  4 ++--
 4 files changed, 28 insertions(+), 20 deletions(-)

diff --git a/.github/README-exec/onnx.readme.md b/.github/README-exec/onnx.readme.md
index f4a3d29ab..54c4676cc 100644
--- a/.github/README-exec/onnx.readme.md
+++ b/.github/README-exec/onnx.readme.md
@@ -13,18 +13,24 @@ The introduction of the CLIP model [can be found here](https://openai.com/blog/c
 
  `ViT-B-32::openai` is used as the default model. To use specific pretrained models provided by `open_clip`, please use `::` to separate model name and pretrained weight name, e.g. `ViT-B-32::laion2b_e16`. Please also note that **different models give different sizes of output dimensions**.
 
-| Model             | ONNX | Output dimension | 
-|-------------------|------|------------------|
-| RN50              | ✅    | 1024             | 
-| RN101             | ✅    | 512              | 
-| RN50x4            | ✅    | 640              |
-| RN50x16           | ✅    | 768              |
-| RN50x64           | ✅    | 1024             |
-| ViT-B-32          | ✅    | 512              |
-| ViT-B-16          | ✅    | 512              |
-| ViT-B-16-plus-240 | ✅    | 640              |
-| ViT-L-14          | ✅    | 768              |
-| ViT-L-14-336      | ✅    | 768              |
+| Model                                 | ONNX | Output dimension | 
+|---------------------------------------|------|------------------|
+| RN50                                  | ✅    | 1024             | 
+| RN101                                 | ✅    | 512              | 
+| RN50x4                                | ✅    | 640              |
+| RN50x16                               | ✅    | 768              |
+| RN50x64                               | ✅    | 1024             |
+| ViT-B-32                              | ✅    | 512              |
+| ViT-B-16                              | ✅    | 512              |
+| ViT-B-16-plus-240                     | ✅    | 640              |
+| ViT-L-14                              | ✅    | 768              |
+| ViT-L-14-336                          | ✅    | 768              |
+| ViT-H-14                              | ✅    | 1024             |
+| ViT-g-14                              | ✅    | 1024             |
+| M-CLIP/XLM_Roberta-Large-Vit-B-32     | ✅    | 512              |
+| M-CLIP/XLM-Roberta-Large-Vit-L-14     | ✅    | 768              |
+| M-CLIP/XLM-Roberta-Large-Vit-B-16Plus | ✅    | 640              |
+| M-CLIP/LABSE-Vit-L-14                 | ✅    | 768              |
 
 ✅ = First class support 
 
@@ -93,7 +99,7 @@ da = DocumentArray(
 )
 
 f = Flow().add(
-    uses='jinahub+docker://CLIPTorchEncoder',
+    uses='jinahub+docker://CLIPOnnxEncoder',
 )
 with f:
     f.post(on='/', inputs=da)
@@ -154,10 +160,10 @@ Then send the request via `/rank` endpoint:
 
 ```python
 f = Flow().add(
-    uses='jinahub+docker://CLIPTorchEncoder',
+    uses='jinahub+docker://CLIPOnnxEncoder',
 )
 with f:
-    r = f.post(on='/rank', inputs=da)
+    r = f.post(on='/rank', inputs=[d])
     print(r['@m', ['text', 'scores__clip_score__value']])
 ```
 
diff --git a/.github/README-exec/torch.readme.md b/.github/README-exec/torch.readme.md
index 18e8549d8..21712dc53 100644
--- a/.github/README-exec/torch.readme.md
+++ b/.github/README-exec/torch.readme.md
@@ -26,6 +26,8 @@ With advances of ONNX runtime, you can use `CLIPOnnxEncoder` (see [link](https:/
 | ViT-B-16-plus-240                     | ✅       | 640              |
 | ViT-L-14                              | ✅       | 768              |
 | ViT-L-14-336                          | ✅       | 768              |
+| ViT-H-14                              | ✅       | 1024             |
+| ViT-g-14                              | ✅       | 1024             |
 | M-CLIP/XLM_Roberta-Large-Vit-B-32     | ✅       | 512              |
 | M-CLIP/XLM-Roberta-Large-Vit-L-14     | ✅       | 768              |
 | M-CLIP/XLM-Roberta-Large-Vit-B-16Plus | ✅       | 640              |
@@ -165,7 +167,7 @@ f = Flow().add(
     uses='jinahub+docker://CLIPTorchEncoder',
 )
 with f:
-    r = f.post(on='/rank', inputs=da)
+    r = f.post(on='/rank', inputs=[d])
     print(r['@m', ['text', 'scores__clip_score__value']])
 ```
 
diff --git a/Dockerfiles/base.Dockerfile b/Dockerfiles/base.Dockerfile
index 30121a27f..10a298ac6 100644
--- a/Dockerfiles/base.Dockerfile
+++ b/Dockerfiles/base.Dockerfile
@@ -1,5 +1,5 @@
 # !!! An ARG declared before a FROM is outside of a build stage, so it can’t be used in any instruction after a FROM
-ARG JINA_VERSION=3.7.0
+ARG JINA_VERSION=3.11.0
 
 FROM jinaai/jina:${JINA_VERSION}-py38-standard
 
@@ -14,7 +14,7 @@ LABEL org.opencontainers.image.vendor="Jina AI Limited" \
       org.opencontainers.image.url="clip-as-service" \
       org.opencontainers.image.documentation="https://clip-as-service.jina.ai/"
 
-RUN pip3 install --no-cache-dir torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
+RUN pip3 install --no-cache-dir torch torchvision torchaudio transformers --extra-index-url https://download.pytorch.org/whl/cpu
 
 # copy will almost always invalid the cache
 COPY . /cas/
diff --git a/Dockerfiles/cuda.Dockerfile b/Dockerfiles/cuda.Dockerfile
index 85c16c7f0..33090d84d 100644
--- a/Dockerfiles/cuda.Dockerfile
+++ b/Dockerfiles/cuda.Dockerfile
@@ -3,7 +3,7 @@ ARG CUDA_VERSION=11.4.2
 FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-cudnn8-runtime-ubuntu20.04
 ENV DEBIAN_FRONTEND=noninteractive
 
-ARG JINA_VERSION=3.7.0
+ARG JINA_VERSION=3.11.0
 ARG BACKEND_TAG=torch
 
 # constant, wont invalidate cache
@@ -19,7 +19,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     python3-setuptools python3-wheel python3-pip \
     && apt-get clean && rm -rf /var/lib/apt/lists/*;
 
-RUN python3 -m pip install --default-timeout=1000 --no-cache-dir torch torchvision torchaudio nvidia-pyindex --extra-index-url https://download.pytorch.org/whl/cu113
+RUN python3 -m pip install --default-timeout=1000 --no-cache-dir torch torchvision torchaudio nvidia-pyindex transformers --extra-index-url https://download.pytorch.org/whl/cu113
 RUN python3 -m pip install --default-timeout=1000 --no-cache-dir "jina[standard]==${JINA_VERSION}"
 
 # copy will almost always invalid the cache