refactor: parameter rename and set default batch_size

jina-ai · Jun 13, 2022 · ada03bc · ada03bc
1 parent 2331d91
commit ada03bc
Show file tree

Hide file tree

Showing 3 changed files with 10 additions and 8 deletions.
diff --git a/client/clip_client/client.py b/client/clip_client/client.py
@@ -184,7 +184,9 @@ def _get_post_payload(self, content, kwargs):
         return dict(
             on='/',
             inputs=self._iter_doc(content),
-            request_size=kwargs.get('batch_size', 32),
+            request_size=kwargs.get(
+                'batch_size', 8
+            ),  # the default `batch_size` is very subjective. i would set it 8 based on 2 considerations (1) play safe on most GPUs (2) ease the load to our demo server
             total_docs=len(content) if hasattr(content, '__len__') else None,
         )
 

diff --git a/server/clip_server/executors/clip_hg.py b/server/clip_server/executors/clip_hg.py
@@ -19,7 +19,7 @@ def __init__(
         finetuned_checkpoint_path: Optional[str] = None,
         base_feature_extractor: Optional[str] = None,
         base_tokenizer_model: Optional[str] = None,
-        use_default_preprocessing: bool = True,
+        preprocessing: bool = True,
         max_length: int = 77,
         device: str = 'cpu',
         num_worker_preprocess: int = 4,
@@ -41,7 +41,7 @@ def __init__(
             Defaults to ``pretrained_model_name_or_path`` if None.
         :param base_tokenizer_model: Base tokenizer model.
             Defaults to ``pretrained_model_name_or_path`` if None.
-        :param use_default_preprocessing: Whether to use the `base_feature_extractor`
+        :param preprocessing: Whether to use the `base_feature_extractor`
             on images (tensors) before encoding them. If you disable this, you must
             ensure that the images you pass in have the correct format, see the
             ``encode`` method for details.
@@ -58,7 +58,7 @@ def __init__(
         super().__init__(*args, **kwargs)
         self._minibatch_size = minibatch_size
 
-        self._use_default_preprocessing = use_default_preprocessing
+        self._preprocessing = preprocessing
         self._max_length = max_length
         self._traversal_paths = traversal_paths
 
@@ -113,7 +113,7 @@ def _preproc_images(self, docs: 'DocumentArray'):
             name='preprocess_images_seconds',
             documentation='images preprocess time in seconds',
         ):
-            if self._use_default_preprocessing:
+            if self._preprocessing:
                 tensors_batch = []
 
                 for d in docs:
@@ -179,10 +179,10 @@ async def encode(self, docs: DocumentArray, parameters: Dict = {}, **kwargs):
             ``tensor`` of the
             shape ``Height x Width x 3``. By default, the input ``tensor`` must
             be an ``ndarray`` with ``dtype=uint8`` or ``dtype=float32``.
-            If you set ``use_default_preprocessing=True`` when creating this encoder,
+            If you set ``preprocessing=True`` when creating this encoder,
             then the ``tensor`` arrays should have the shape ``[H, W, 3]``, and be in
             the RGB color format with ``dtype=uint8``.
-            If you set ``use_default_preprocessing=False`` when creating this encoder,
+            If you set ``preprocessing=False`` when creating this encoder,
             then you need to ensure that the images you pass in are already
             pre-processed. This means that they are all the same size (for batching) -
             the CLIP model was trained on images of the size ``224 x 224``, and that

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -64,7 +64,7 @@ def make_hg_flow_no_default(port_generator, request):
     f = Flow(port=port_generator()).add(
         name=request.param,
         uses=CLIPEncoder,
-        uses_with={'use_default_preprocessing': False},
+        uses_with={'preprocessing': False},
     )
     with f:
         yield f