Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add traversal paths #750

Merged
merged 15 commits into from
Jun 13, 2022
16 changes: 9 additions & 7 deletions client/clip_client/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def _gather_result(self, r):

@property
def _unboxed_result(self):
if self._results.embeddings is None:
if self._return_plain and self._results.embeddings is None:
raise ValueError(
'empty embedding returned from the server. '
'This often due to a mis-config of the server, '
Expand All @@ -158,14 +158,11 @@ def _iter_doc(self, content) -> Generator['Document', None, None]:
else:
yield Document(text=c)
elif isinstance(c, Document):
if c.content_type in ('text', 'blob'):
self._return_plain = False
self._return_plain = False
if c.content_type in ('text', 'blob', 'tensor'):
yield c
elif not c.blob and c.uri:
c.load_uri_to_blob()
self._return_plain = False
yield c
elif c.tensor is not None:
yield c
else:
raise TypeError(f'unsupported input type {c!r} {c.content_type}')
Expand All @@ -184,10 +181,15 @@ def _iter_doc(self, content) -> Generator['Document', None, None]:
)

def _get_post_payload(self, content, kwargs):
parameters = {}
if 'batch_size' in kwargs:
parameters['minibatch_size'] = kwargs['batch_size']
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i disagree the exposing minibatch_size to public client. It can easily overload a CAS server. Imagine user now has the capability of controlling both request_size and minibatch_size, the user can easily occupy the full GPU usage on our Berlin GPU server. It can easily make our GPU OOM by setting large request_size and minibatch_size

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In C-S architecture, one should not aim to expose every server args to client, it is very risky.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, that makes sense. Then we need to update the document about how to control batch size.


return dict(
on='/',
inputs=self._iter_doc(content),
request_size=kwargs.get('batch_size', 8),
request_size=kwargs.get('batch_size', 32),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
request_size=kwargs.get('batch_size', 32),
request_size=kwargs.get('batch_size', 8),

parameters=parameters,
numb3r3 marked this conversation as resolved.
Show resolved Hide resolved
total_docs=len(content) if hasattr(content, '__len__') else None,
)

Expand Down
50 changes: 32 additions & 18 deletions server/clip_server/executors/clip_hg.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ def __init__(
use_default_preprocessing: bool = True,
max_length: int = 77,
device: str = 'cpu',
overwrite_embeddings: bool = False,
num_worker_preprocess: int = 4,
minibatch_size: int = 32,
traversal_paths: str = '@r',
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
traversal_paths: str = '@r',

*args,
**kwargs,
):
Expand Down Expand Up @@ -52,12 +52,15 @@ def __init__(
:param num_worker_preprocess: Number of cpu processes used in preprocessing step.
:param minibatch_size: Default batch size for encoding, used if the
batch size is not passed as a parameter with the request.
:param traversal_paths: Default traversal paths for encoding, used if
the traversal path is not passed as a parameter with the request.
Comment on lines +55 to +56
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
:param traversal_paths: Default traversal paths for encoding, used if
the traversal path is not passed as a parameter with the request.

"""
super().__init__(*args, **kwargs)
self._minibatch_size = minibatch_size

self._use_default_preprocessing = use_default_preprocessing
self._max_length = max_length
self._traversal_paths = traversal_paths
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
self._traversal_paths = traversal_paths


# self.device = device
if not device:
Expand Down Expand Up @@ -110,32 +113,36 @@ def _preproc_images(self, docs: 'DocumentArray'):
name='preprocess_images_seconds',
documentation='images preprocess time in seconds',
):
tensors_batch = []
if self._use_default_preprocessing:
tensors_batch = []

for d in docs:
content = d.content
for d in docs:
content = d.content

if d.blob:
d.convert_blob_to_image_tensor()
elif d.uri:
d.load_uri_to_image_tensor()
if d.blob:
d.convert_blob_to_image_tensor()
elif d.tensor is None and d.uri:
# in case user uses HTTP protocol and send data via curl not using .blob (base64), but in .uri
d.load_uri_to_image_tensor()

tensors_batch.append(d.tensor)
tensors_batch.append(d.tensor)

# recover content
d.content = content
# recover content
d.content = content

if self._use_default_preprocessing:
batch_data = self._vision_preprocessor(
images=tensors_batch,
return_tensors='pt',
)
batch_data = {k: v.to(self._device) for k, v in batch_data.items()}
batch_data = {
k: v.type(torch.float32).to(self._device)
for k, v in batch_data.items()
}

else:
batch_data = {
'pixel_values': torch.tensor(
tensors_batch, dtype=torch.float32, device=self._device
docs.tensors, dtype=torch.float32, device=self._device
)
}

Expand Down Expand Up @@ -163,7 +170,7 @@ async def rank(self, docs: 'DocumentArray', parameters: Dict, **kwargs):
set_rank(docs)

@requests
async def encode(self, docs: DocumentArray, **kwargs):
async def encode(self, docs: DocumentArray, parameters: Dict = {}, **kwargs):
"""
Encode all documents with `text` or image content using the corresponding CLIP
encoder. Store the embeddings in the `embedding` attribute.
Expand All @@ -181,18 +188,25 @@ async def encode(self, docs: DocumentArray, **kwargs):
the CLIP model was trained on images of the size ``224 x 224``, and that
they are of the shape ``[3, H, W]`` with ``dtype=float32``. They should
also be normalized (values between 0 and 1).
:param parameters: A dictionary that contains parameters to control encoding.
The accepted keys are ``traversal_paths`` and ``minibatch_size`` - in their
absence their corresponding default values are used.
"""

traversal_paths = parameters.get('traversal_paths', self._traversal_paths)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
traversal_paths = parameters.get('traversal_paths', self._traversal_paths)
traversal_paths = parameters.get('traversal_paths', '@r')

Copy link
Member Author

@numb3r3 numb3r3 Jun 10, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@hanxiao I don't agree with this suggestion. It will break the following use case:

gateway -> encoder #1 (work on root_level) -> encoder #2(work on chunk_level)

It is impossible to pass the proper parameters:

client.post(on='/', parameters={'traversal_paths': '?????'})

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By defining the default traversal path in __init__, client.pos(on='/') works

gateway -> encoder #1 (traversal_paths='@r') -> encoder #2(traversal_paths='@c')

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why it is impossible? i dont get it

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@hanxiao From the above flow example, there are two encoders both working on different level documents (one on root-level, another on chunk-level).

  • use @r on request parameter at client: client.post(on='/', parameters={'traversal_paths': '@r'})
    -> encoder 2 cannot work
  • use @c on request parameter at client: client.post(on='/', parameters={'traversal_paths': '@c'})
    -> encoder 1 cannot work

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

but you should be able to send parameter to one particular Executor

minibatch_size = parameters.get('minibatch_size', self._minibatch_size)

_img_da = DocumentArray()
_txt_da = DocumentArray()
for d in docs:
for d in docs[traversal_paths]:
split_img_txt_da(d, _img_da, _txt_da)

with torch.inference_mode():
# for image
if _img_da:
for minibatch, batch_data in _img_da.map_batch(
self._preproc_images,
batch_size=self._minibatch_size,
batch_size=minibatch_size,
pool=self._pool,
):
with self.monitor(
Expand All @@ -210,7 +224,7 @@ async def encode(self, docs: DocumentArray, **kwargs):
if _txt_da:
for minibatch, batch_data in _txt_da.map_batch(
self._preproc_texts,
batch_size=self._minibatch_size,
batch_size=minibatch_size,
pool=self._pool,
):
with self.monitor(
Expand Down
51 changes: 19 additions & 32 deletions server/clip_server/executors/clip_onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,18 @@ def __init__(
name: str = 'ViT-B/32',
device: Optional[str] = None,
num_worker_preprocess: int = 4,
minibatch_size: int = 16,
minibatch_size: int = 32,
traversal_paths: str = '@r',
**kwargs,
):
super().__init__(**kwargs)

self._minibatch_size = minibatch_size
self._traversal_paths = traversal_paths

self._preprocess_tensor = clip._transform_ndarray(clip.MODEL_SIZE[name])
self._pool = ThreadPool(processes=num_worker_preprocess)

self._minibatch_size = minibatch_size

self._model = CLIPOnnxModel(name)

import torch
Expand Down Expand Up @@ -59,7 +61,7 @@ def __init__(
and hasattr(self.runtime_args, 'replicas')
):
replicas = getattr(self.runtime_args, 'replicas', 1)
num_threads = max(1, torch.get_num_threads() // replicas)
num_threads = max(1, torch.get_num_threads() * 2 // replicas)
if num_threads < 2:
warnings.warn(
f'Too many replicas ({replicas}) vs too few threads {num_threads} may result in '
Expand Down Expand Up @@ -98,55 +100,40 @@ async def rank(self, docs: 'DocumentArray', parameters: Dict, **kwargs):
set_rank(docs)

@requests
async def encode(self, docs: 'DocumentArray', **kwargs):
async def encode(self, docs: 'DocumentArray', parameters: Dict = {}, **kwargs):

traversal_paths = parameters.get('traversal_paths', self._traversal_paths)
minibatch_size = parameters.get('minibatch_size', self._minibatch_size)

_img_da = DocumentArray()
_txt_da = DocumentArray()
for d in docs:
for d in docs[traversal_paths]:
split_img_txt_da(d, _img_da, _txt_da)

# for image
if _img_da:
for minibatch, _contents in _img_da.map_batch(
for minibatch, batch_data in _img_da.map_batch(
self._preproc_images,
batch_size=self._minibatch_size,
batch_size=minibatch_size,
pool=self._pool,
):
with self.monitor(
name='encode_images_seconds',
documentation='images encode time in seconds',
):
minibatch.embeddings = self._model.encode_image(minibatch.tensors)

# recover original content
try:
_ = iter(_contents)
for _d, _ct in zip(minibatch, _contents):
_d.content = _ct
except TypeError:
pass
minibatch.embeddings = self._model.encode_image(batch_data)

# for text
# for text
if _txt_da:
for minibatch, _contents in _txt_da.map_batch(
for minibatch, batch_data in _txt_da.map_batch(
self._preproc_texts,
batch_size=self._minibatch_size,
batch_size=minibatch_size,
pool=self._pool,
):
with self.monitor(
name='encode_texts_seconds',
documentation='texts encode time in seconds',
):
minibatch.embeddings = self._model.encode_text(minibatch.tensors)

# recover original content
try:
_ = iter(_contents)
for _d, _ct in zip(minibatch, _contents):
_d.content = _ct
except TypeError:
pass

# drop tensors
docs.tensors = None
minibatch.embeddings = self._model.encode_text(batch_data)

return docs
43 changes: 15 additions & 28 deletions server/clip_server/executors/clip_tensorrt.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ def __init__(
name: str = 'ViT-B/32',
device: str = 'cuda',
num_worker_preprocess: int = 4,
minibatch_size: int = 64,
minibatch_size: int = 32,
traversal_paths: str = '@r',
**kwargs,
):
super().__init__(**kwargs)
Expand All @@ -28,6 +29,8 @@ def __init__(
self._pool = ThreadPool(processes=num_worker_preprocess)

self._minibatch_size = minibatch_size
self._traversal_paths = traversal_paths

self._device = device

import torch
Expand Down Expand Up @@ -71,67 +74,51 @@ async def rank(self, docs: 'DocumentArray', parameters: Dict, **kwargs):
set_rank(docs)

@requests
async def encode(self, docs: 'DocumentArray', **kwargs):
async def encode(self, docs: 'DocumentArray', parameters: Dict = {}, **kwargs):
traversal_paths = parameters.get('traversal_paths', self._traversal_paths)
minibatch_size = parameters.get('minibatch_size', self._minibatch_size)

_img_da = DocumentArray()
_txt_da = DocumentArray()
for d in docs:
for d in docs[traversal_paths]:
split_img_txt_da(d, _img_da, _txt_da)

# for image
if _img_da:
for minibatch, _contents in _img_da.map_batch(
for minibatch, batch_data in _img_da.map_batch(
self._preproc_images,
batch_size=self._minibatch_size,
batch_size=minibatch_size,
pool=self._pool,
):
with self.monitor(
name='encode_images_seconds',
documentation='images encode time in seconds',
):
minibatch.embeddings = (
self._model.encode_image(minibatch.tensors)
self._model.encode_image(batch_data)
.detach()
.cpu()
.numpy()
.astype(np.float32)
)

# recover original content
try:
_ = iter(_contents)
for _d, _ct in zip(minibatch, _contents):
_d.content = _ct
except TypeError:
pass

# for text
if _txt_da:
for minibatch, _contents in _txt_da.map_batch(
for minibatch, batch_data in _txt_da.map_batch(
self._preproc_texts,
batch_size=self._minibatch_size,
batch_size=minibatch_size,
pool=self._pool,
):
with self.monitor(
name='encode_texts_seconds',
documentation='texts encode time in seconds',
):
minibatch.embeddings = (
self._model.encode_text(minibatch.tensors)
self._model.encode_text(batch_data)
.detach()
.cpu()
.numpy()
.astype(np.float32)
)

# recover original content
try:
_ = iter(_contents)
for _d, _ct in zip(minibatch, _contents):
_d.content = _ct
except TypeError:
pass

# drop tensors
docs.tensors = None

return docs
Loading