Skip to content

Commit

Permalink
feat: support openclip&mclip models + refactor model loader (#774)
Browse files Browse the repository at this point in the history
* fix: draft commits

* feat: add openclip+mclip support

* feat: add openclip+mclip support

* fix: update openclip model list

* fix: import error

* fix: import error

* fix: import error

* fix: remove executor clip_oc

* fix: recovered procs in helper for older version onnx&trt executors

* fix: recovered procs in helper 

for older version onnx&trt executors

* fix: add openclip requirement

* fix: recover helper.py

* fix: add openclip requirement

* fix: add openclip requirement

* fix: recover helper.py

* fix: refactor preprocessor (#776)

* fix: refactor preprocessor

* fix: error

* fix: missing functions

* fix: revert commit

* fix: tests

* fix: simple tokenizer

* fix: clip model mixin

* feat: support customized download

* feat: support customized download

* feat: support customized download

* fix: init kwargs

* fix: minor revision

* fix: errors

* f

* fix: error

* fix: update open ai model loading

* fix: clean codes

* fix: clean codes

* fix: open clip base model name

* fix: minor revision

* fix: clean unused codes

* fix: mclip image size

* fix: mclip tokenizer error

* fix: set padding

* fix: set truncation

* fix: update tokenizer api

* fix: add unittest

* fix: update license

* fix: unittest

Co-authored-by: numb3r3 <[email protected]>
Co-authored-by: felix-wang <[email protected]>
  • Loading branch information
3 people authored Jul 21, 2022
1 parent 32b11cd commit fa62d8e
Show file tree
Hide file tree
Showing 17 changed files with 505 additions and 795 deletions.
3 changes: 1 addition & 2 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
Copyright 2020-2022 Jina AI Limited. All rights reserved.

The following three files are licensed under MIT License via https://github.com/openai/CLIP Copyright (c) 2021 OpenAI
The following two files are licensed under MIT License via https://github.com/openai/CLIP Copyright (c) 2021 OpenAI
server/clip_server/model/clip.py
server/clip_server/model/model.py
server/clip_server/model/simple_tokenizer.py


Expand Down
9 changes: 6 additions & 3 deletions server/clip_server/executors/clip_onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
)
from clip_server.model import clip
from clip_server.model.clip_onnx import CLIPOnnxModel
from clip_server.model.tokenization import Tokenizer
from jina import Executor, requests, DocumentArray


Expand All @@ -31,10 +32,12 @@ def __init__(
self._minibatch_size = minibatch_size
self._traversal_paths = traversal_paths

self._preprocess_tensor = clip._transform_ndarray(clip.MODEL_SIZE[name])
self._pool = ThreadPool(processes=num_worker_preprocess)

self._model = CLIPOnnxModel(name, model_path)
self._tokenizer = Tokenizer(name)

self._image_transform = clip._transform_ndarray(clip.MODEL_SIZE[name])

import torch

Expand Down Expand Up @@ -84,15 +87,15 @@ def _preproc_images(self, docs: 'DocumentArray'):
documentation='images preprocess time in seconds',
):
return preproc_image(
docs, preprocess_fn=self._preprocess_tensor, return_np=True
docs, preprocess_fn=self._image_transform, return_np=True
)

def _preproc_texts(self, docs: 'DocumentArray'):
with self.monitor(
name='preprocess_texts_seconds',
documentation='texts preprocess time in seconds',
):
return preproc_text(docs, return_np=True)
return preproc_text(docs, tokenizer=self._tokenizer, return_np=True)

@requests(on='/rank')
async def rank(self, docs: 'DocumentArray', parameters: Dict, **kwargs):
Expand Down
11 changes: 8 additions & 3 deletions server/clip_server/executors/clip_tensorrt.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
set_rank,
)
from clip_server.model import clip
from clip_server.model.tokenization import Tokenizer
from clip_server.model.clip_trt import CLIPTensorRTModel
from jina import Executor, requests, DocumentArray

Expand All @@ -25,7 +26,6 @@ def __init__(
):
super().__init__(**kwargs)

self._preprocess_tensor = clip._transform_ndarray(clip.MODEL_SIZE[name])
self._pool = ThreadPool(processes=num_worker_preprocess)

self._minibatch_size = minibatch_size
Expand All @@ -48,14 +48,17 @@ def __init__(

self._model.start_engines()

self._tokenizer = Tokenizer(name)
self._image_transform = clip._transform_ndarray(clip.MODEL_SIZE[name])

def _preproc_images(self, docs: 'DocumentArray'):
with self.monitor(
name='preprocess_images_seconds',
documentation='images preprocess time in seconds',
):
return preproc_image(
docs,
preprocess_fn=self._preprocess_tensor,
preprocess_fn=self._image_transform,
device=self._device,
return_np=False,
)
Expand All @@ -65,7 +68,9 @@ def _preproc_texts(self, docs: 'DocumentArray'):
name='preprocess_texts_seconds',
documentation='texts preprocess time in seconds',
):
return preproc_text(docs, device=self._device, return_np=False)
return preproc_text(
docs, tokenizer=self._tokenizer, device=self._device, return_np=False
)

@requests(on='/rank')
async def rank(self, docs: 'DocumentArray', parameters: Dict, **kwargs):
Expand Down
22 changes: 13 additions & 9 deletions server/clip_server/executors/clip_torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,15 @@
set_rank,
)
from clip_server.model import clip
from clip_server.model.clip_model import CLIPModel
from clip_server.model.tokenization import Tokenizer
from jina import Executor, requests, DocumentArray


class CLIPEncoder(Executor):
def __init__(
self,
name: str = 'ViT-B/32',
name: str = 'ViT-B-32-quickgelu::openai',
device: Optional[str] = None,
jit: bool = False,
num_worker_preprocess: int = 4,
Expand Down Expand Up @@ -53,12 +55,12 @@ def __init__(
# For more details, please see https://pytorch.org/docs/stable/generated/torch.set_num_threads.html
torch.set_num_threads(max(num_threads, 1))
torch.set_num_interop_threads(1)
self._pool = ThreadPool(processes=num_worker_preprocess)

self._model, self._preprocess_tensor = clip.load(
name, device=self._device, jit=jit
)
self._model = CLIPModel(name, device=self._device, jit=jit, **kwargs)
self._tokenizer = Tokenizer(name)

self._pool = ThreadPool(processes=num_worker_preprocess)
self._image_transform = clip._transform_ndarray(self._model.image_size)

def _preproc_images(self, docs: 'DocumentArray'):
with self.monitor(
Expand All @@ -67,7 +69,7 @@ def _preproc_images(self, docs: 'DocumentArray'):
):
return preproc_image(
docs,
preprocess_fn=self._preprocess_tensor,
preprocess_fn=self._image_transform,
device=self._device,
return_np=False,
)
Expand All @@ -77,7 +79,9 @@ def _preproc_texts(self, docs: 'DocumentArray'):
name='preprocess_texts_seconds',
documentation='texts preprocess time in seconds',
):
return preproc_text(docs, device=self._device, return_np=False)
return preproc_text(
docs, tokenizer=self._tokenizer, device=self._device, return_np=False
)

@requests(on='/rank')
async def rank(self, docs: 'DocumentArray', parameters: Dict, **kwargs):
Expand Down Expand Up @@ -108,7 +112,7 @@ async def encode(self, docs: 'DocumentArray', parameters: Dict = {}, **kwargs):
documentation='images encode time in seconds',
):
minibatch.embeddings = (
self._model.encode_image(batch_data['pixel_values'])
self._model.encode_image(**batch_data)
.cpu()
.numpy()
.astype(np.float32)
Expand All @@ -126,7 +130,7 @@ async def encode(self, docs: 'DocumentArray', parameters: Dict = {}, **kwargs):
documentation='texts encode time in seconds',
):
minibatch.embeddings = (
self._model.encode_text(batch_data['input_ids'])
self._model.encode_text(**batch_data)
.cpu()
.numpy()
.astype(np.float32)
Expand Down
16 changes: 13 additions & 3 deletions server/clip_server/executors/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
from docarray import Document, DocumentArray
from docarray.math.distance.numpy import cosine

from clip_server.model import clip

from clip_server.model.tokenization import Tokenizer


def numpy_softmax(x: 'np.ndarray', axis: int = -1) -> 'np.ndarray':
Expand Down Expand Up @@ -49,10 +50,13 @@ def preproc_image(


def preproc_text(
da: 'DocumentArray', device: str = 'cpu', return_np: bool = False
da: 'DocumentArray',
tokenizer: 'Tokenizer',
device: str = 'cpu',
return_np: bool = False,
) -> Tuple['DocumentArray', Dict]:

inputs = clip.tokenize(da.texts)
inputs = tokenizer(da.texts)
inputs['input_ids'] = inputs['input_ids'].detach()

if return_np:
Expand Down Expand Up @@ -113,3 +117,9 @@ def set_rank(docs, _logit_scale=np.exp(4.60517)):
)

q.matches = final


def get_image_size(name: str):
from clip_server.model.pretrained_models import _VISUAL_MODEL_IMAGE_SIZE

return _VISUAL_MODEL_IMAGE_SIZE[name]
Loading

0 comments on commit fa62d8e

Please sign in to comment.