Skip to content

Commit

Permalink
refactor: parameter rename and set default batch_size
Browse files Browse the repository at this point in the history
  • Loading branch information
hanxiao committed Jun 13, 2022
1 parent 2331d91 commit ada03bc
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 8 deletions.
4 changes: 3 additions & 1 deletion client/clip_client/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,9 @@ def _get_post_payload(self, content, kwargs):
return dict(
on='/',
inputs=self._iter_doc(content),
request_size=kwargs.get('batch_size', 32),
request_size=kwargs.get(
'batch_size', 8
), # the default `batch_size` is very subjective. i would set it 8 based on 2 considerations (1) play safe on most GPUs (2) ease the load to our demo server
total_docs=len(content) if hasattr(content, '__len__') else None,
)

Expand Down
12 changes: 6 additions & 6 deletions server/clip_server/executors/clip_hg.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def __init__(
finetuned_checkpoint_path: Optional[str] = None,
base_feature_extractor: Optional[str] = None,
base_tokenizer_model: Optional[str] = None,
use_default_preprocessing: bool = True,
preprocessing: bool = True,
max_length: int = 77,
device: str = 'cpu',
num_worker_preprocess: int = 4,
Expand All @@ -41,7 +41,7 @@ def __init__(
Defaults to ``pretrained_model_name_or_path`` if None.
:param base_tokenizer_model: Base tokenizer model.
Defaults to ``pretrained_model_name_or_path`` if None.
:param use_default_preprocessing: Whether to use the `base_feature_extractor`
:param preprocessing: Whether to use the `base_feature_extractor`
on images (tensors) before encoding them. If you disable this, you must
ensure that the images you pass in have the correct format, see the
``encode`` method for details.
Expand All @@ -58,7 +58,7 @@ def __init__(
super().__init__(*args, **kwargs)
self._minibatch_size = minibatch_size

self._use_default_preprocessing = use_default_preprocessing
self._preprocessing = preprocessing
self._max_length = max_length
self._traversal_paths = traversal_paths

Expand Down Expand Up @@ -113,7 +113,7 @@ def _preproc_images(self, docs: 'DocumentArray'):
name='preprocess_images_seconds',
documentation='images preprocess time in seconds',
):
if self._use_default_preprocessing:
if self._preprocessing:
tensors_batch = []

for d in docs:
Expand Down Expand Up @@ -179,10 +179,10 @@ async def encode(self, docs: DocumentArray, parameters: Dict = {}, **kwargs):
``tensor`` of the
shape ``Height x Width x 3``. By default, the input ``tensor`` must
be an ``ndarray`` with ``dtype=uint8`` or ``dtype=float32``.
If you set ``use_default_preprocessing=True`` when creating this encoder,
If you set ``preprocessing=True`` when creating this encoder,
then the ``tensor`` arrays should have the shape ``[H, W, 3]``, and be in
the RGB color format with ``dtype=uint8``.
If you set ``use_default_preprocessing=False`` when creating this encoder,
If you set ``preprocessing=False`` when creating this encoder,
then you need to ensure that the images you pass in are already
pre-processed. This means that they are all the same size (for batching) -
the CLIP model was trained on images of the size ``224 x 224``, and that
Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def make_hg_flow_no_default(port_generator, request):
f = Flow(port=port_generator()).add(
name=request.param,
uses=CLIPEncoder,
uses_with={'use_default_preprocessing': False},
uses_with={'preprocessing': False},
)
with f:
yield f

0 comments on commit ada03bc

Please sign in to comment.