[CT-1878] add custom headers (#100)

* add header option * rename to custom headers --------- Co-authored-by: Patrick <[email protected]>
brighter-ai · Feb 9, 2024 · 29cdd8a · 29cdd8a
1 parent 33ea482
commit 29cdd8a
Show file tree

Hide file tree

Showing 18 changed files with 224 additions and 17 deletions.
diff --git a/redact/commons/utils.py b/redact/commons/utils.py
@@ -103,3 +103,26 @@ def setup_logging(verbose_logging: bool) -> None:
     level = logging.DEBUG if verbose_logging else Settings().log_level
 
     logging.basicConfig(format=format, level=level)
+
+
+def parse_key_value_pairs(kv_pairs: List[str]) -> dict:
+    """Parse a list of key-value strings into a dictionary with error handling."""
+    result = {}
+    for item in kv_pairs:
+        # Check if the item contains an equal sign
+        if "=" not in item:
+            raise ValueError(
+                f"Invalid key-value pair: {item}. Expected format: key=value"
+            )
+
+        key, value = item.split("=", 1)  # Split only on the first equal sign
+
+        # Validate key and value
+        if not key:
+            raise ValueError(f"Empty key in pair: {item}")
+        if not value:
+            raise ValueError(f"Empty value in pair: {item}")
+
+        result[key] = value
+
+    return result
diff --git a/redact/tools/v3.py b/redact/tools/v3.py
@@ -1,8 +1,8 @@
-from typing import Optional
+from typing import List, Optional
 
 import typer
 
-from redact.commons.utils import setup_logging
+from redact.commons.utils import parse_key_value_pairs, setup_logging
 from redact.settings import Settings
 from redact.v3 import InputType, JobArguments, OutputType, Region, ServiceType
 from redact.v3.tools.redact_file import redact_file as rdct_file
@@ -99,9 +99,15 @@ def redact_file(
         True, help="Specify whether to automatically delete the job from the backend"
     ),
     verbose_logging: bool = typer.Option(False, help="Enable very noisy logging."),
+    custom_headers: List[str] = typer.Option(
+        [],
+        help="Key-value pairs in the format key=value which will be added to allr equest header",
+    ),
 ):
     setup_logging(verbose_logging)
 
+    parsed_header = parse_key_value_pairs(custom_headers)
+
     job_args = JobArguments(
         region=region,
         face=face,
@@ -127,6 +133,7 @@ def redact_file(
         skip_existing=skip_existing,
         save_labels=save_labels,
         auto_delete_job=auto_delete_job,
+        custom_headers=parsed_header,
     )
 
 
@@ -220,9 +227,15 @@ def redact_folder(
         "from the input folder after processing of a file completed.",
     ),
     verbose_logging: bool = typer.Option(False, help="Enable very noisy logging."),
+    custom_headers: List[str] = typer.Option(
+        [],
+        help="Key-value pairs in the format key=value which will be added to allr equest header",
+    ),
 ):
     setup_logging(verbose_logging)
 
+    parsed_header = parse_key_value_pairs(custom_headers)
+
     job_args = JobArguments(
         region=region,
         face=face,
@@ -250,4 +263,5 @@ def redact_folder(
         skip_existing=skip_existing,
         auto_delete_job=auto_delete_job,
         auto_delete_input_file=auto_delete_input_file,
+        custom_headers=parsed_header,
     )
diff --git a/redact/tools/v4.py b/redact/tools/v4.py
@@ -2,7 +2,7 @@
 
 import typer
 
-from redact.commons.utils import setup_logging
+from redact.commons.utils import parse_key_value_pairs, setup_logging
 from redact.settings import Settings
 from redact.v4 import InputType, JobArguments, OutputType, Region, ServiceType
 from redact.v4.tools.redact_file import redact_file as rdct_file
@@ -133,9 +133,15 @@ def redact_file(
         ),
         show_default=False,
     ),
+    custom_headers: List[str] = typer.Option(
+        [],
+        help="Key-value pairs in the format key=value which will be added to allr equest header",
+    ),
 ):
     setup_logging(verbose_logging)
 
+    parsed_header = parse_key_value_pairs(custom_headers)
+
     job_args = JobArguments(
         region=region,
         face=face,
@@ -163,6 +169,7 @@ def redact_file(
         ignore_warnings=ignore_warnings,
         skip_existing=skip_existing,
         auto_delete_job=auto_delete_job,
+        custom_headers=parsed_header,
     )
 
 
@@ -283,9 +290,15 @@ def redact_folder(
             f"{EXPERIMENTAL_WARNING}"
         ),
     ),
+    custom_headers: List[str] = typer.Option(
+        [],
+        help="Key-value pairs in the format key=value which will be added to allr equest header",
+    ),
 ):
     setup_logging(verbose_logging)
 
+    parsed_header = parse_key_value_pairs(custom_headers)
+
     job_args = JobArguments(
         region=region,
         face=face,
@@ -316,4 +329,5 @@ def redact_folder(
         skip_existing=skip_existing,
         auto_delete_job=auto_delete_job,
         auto_delete_input_file=auto_delete_input_file,
+        custom_headers=parsed_header,
     )
diff --git a/redact/v3/redact_instance.py b/redact/v3/redact_instance.py
@@ -1,4 +1,4 @@
-from typing import IO, BinaryIO, Optional, Union
+from typing import IO, BinaryIO, Dict, Optional, Union
 
 from redact.settings import Settings
 from redact.v3.data_models import JobArguments, JobLabels, OutputType, ServiceType
@@ -35,12 +35,16 @@ def create(
         redact_url: str = settings.redact_url_default,
         subscription_id: Optional[str] = None,
         api_key: Optional[str] = None,
+        custom_headers: Optional[Dict] = None,
     ) -> "RedactInstance":
         """
         The default way of creating RedactInstance objects.
         """
         redact_requests = RedactRequests(
-            redact_url=redact_url, subscription_id=subscription_id, api_key=api_key
+            redact_url=redact_url,
+            subscription_id=subscription_id,
+            api_key=api_key,
+            custom_headers=custom_headers,
         )
         return cls(redact_requests=redact_requests, service=service, out_type=out_type)
 

diff --git a/redact/v3/redact_requests.py b/redact/v3/redact_requests.py
@@ -57,13 +57,17 @@ def __init__(
         subscription_id: Optional[str] = None,
         api_key: Optional[str] = None,
         httpx_client: Optional[httpx.Client] = None,
+        custom_headers: Optional[Dict] = None,
     ):
         self.redact_url = normalize_url(redact_url)
         self.api_key = api_key
         self.subscription_id = subscription_id
-        self._headers = {"Accept": "*/*"}
         self.retry_total_time_limit: float = 600  # 10 minutes in seconds
 
+        self._headers = {"Accept": "*/*"}
+        if custom_headers is not None:
+            self._headers.update(custom_headers)
+
         if self.api_key:
             self._headers["api-key"] = self.api_key
         if self.subscription_id:

diff --git a/redact/v3/tools/redact_file.py b/redact/v3/tools/redact_file.py
@@ -1,6 +1,6 @@
 import logging
 from pathlib import Path
-from typing import Optional, Union
+from typing import Dict, Optional, Union
 
 from redact.commons.utils import normalize_path
 from redact.settings import Settings
@@ -39,6 +39,7 @@ def redact_file(
     auto_delete_input_file: bool = False,
     waiting_time_between_job_status_checks: Optional[float] = None,
     redact_requests_param: Optional[RedactRequests] = None,
+    custom_headers: Optional[Dict[str, str]] = None,
 ) -> Optional[JobStatus]:
     """
     If no out_path is given, <input_filename_redacted> will be used.
@@ -88,6 +89,7 @@ def redact_file(
                 out_type=output_type,
                 redact_url=redact_url,
                 api_key=api_key,
+                custom_headers=custom_headers,
             )
         with open(file_path, "rb") as file:
             job: RedactJob = redact.start_job(

diff --git a/redact/v3/tools/redact_folder.py b/redact/v3/tools/redact_folder.py
@@ -3,7 +3,7 @@
 import os
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from pathlib import Path
-from typing import Any, List, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple, Union
 
 import tqdm
 from tqdm.contrib.logging import logging_redirect_tqdm
@@ -45,6 +45,7 @@ def redact_folder(
     skip_existing: bool = True,
     auto_delete_job: bool = True,
     auto_delete_input_file: bool = False,
+    custom_headers: Optional[Dict[str, str]] = None,
 ) -> JobsSummary:
     # Normalize paths, e.g.: '~/..' -> '/home'
     in_dir_path = normalize_path(input_dir)
@@ -83,6 +84,7 @@ def redact_folder(
         skip_existing=skip_existing,
         auto_delete_job=auto_delete_job,
         auto_delete_input_file=auto_delete_input_file,
+        custom_headers=custom_headers,
     )
 
     log.info(f"Starting {n_parallel_jobs} parallel jobs to anonymize files ...")

diff --git a/redact/v4/redact_instance.py b/redact/v4/redact_instance.py
@@ -1,4 +1,4 @@
-from typing import BinaryIO, Optional
+from typing import BinaryIO, Dict, Optional
 
 from redact.settings import Settings
 from redact.v4.data_models import JobArguments, OutputType, ServiceType
@@ -35,12 +35,16 @@ def create(
         redact_url: str = settings.redact_url_default,
         subscription_id: Optional[str] = None,
         api_key: Optional[str] = None,
+        custom_headers: Optional[Dict] = None,
     ) -> "RedactInstance":
         """
         The default way of creating RedactInstance objects.
         """
         redact_requests = RedactRequests(
-            redact_url=redact_url, subscription_id=subscription_id, api_key=api_key
+            redact_url=redact_url,
+            subscription_id=subscription_id,
+            api_key=api_key,
+            custom_headers=custom_headers,
         )
         return cls(redact_requests=redact_requests, service=service, out_type=out_type)
 

diff --git a/redact/v4/redact_requests.py b/redact/v4/redact_requests.py
@@ -56,13 +56,17 @@ def __init__(
         subscription_id: Optional[str] = None,
         api_key: Optional[str] = None,
         httpx_client: Optional[httpx.Client] = None,
+        custom_headers: Optional[Dict] = None,
     ):
         self.redact_url = normalize_url(redact_url)
         self.api_key = api_key
         self.subscription_id = subscription_id
-        self._headers = {"Accept": "*/*"}
         self.retry_total_time_limit: float = 600  # 10 minutes in seconds
 
+        self._headers = {"Accept": "*/*"}
+        if custom_headers is not None:
+            self._headers.update(custom_headers)
+
         if self.api_key:
             self._headers["api-key"] = self.api_key
         if self.subscription_id:

diff --git a/redact/v4/tools/redact_file.py b/redact/v4/tools/redact_file.py
@@ -1,6 +1,6 @@
 import logging
 from pathlib import Path
-from typing import Optional, Union
+from typing import Dict, Optional, Union
 
 from redact.commons.utils import normalize_path
 from redact.settings import Settings
@@ -36,6 +36,7 @@ def redact_file(
     auto_delete_input_file: bool = False,
     waiting_time_between_job_status_checks: Optional[float] = None,
     redact_requests_param: Optional[RedactRequests] = None,
+    custom_headers: Optional[Dict[str, str]] = None,
 ) -> Optional[JobStatus]:
     """
     If no out_path is given, <input_filename_redacted> will be used.
@@ -79,6 +80,7 @@ def redact_file(
                 out_type=output_type,
                 redact_url=redact_url,
                 api_key=api_key,
+                custom_headers=custom_headers,
             )
         with open(file_path, "rb") as file:
             job: RedactJob = redact.start_job(

diff --git a/redact/v4/tools/redact_folder.py b/redact/v4/tools/redact_folder.py
@@ -3,7 +3,7 @@
 import os
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from pathlib import Path
-from typing import Any, List, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple, Union
 
 import tqdm
 from tqdm.contrib.logging import logging_redirect_tqdm
@@ -44,6 +44,7 @@ def redact_folder(
     skip_existing: bool = True,
     auto_delete_job: bool = True,
     auto_delete_input_file: bool = False,
+    custom_headers: Optional[Dict[str, str]] = None,
 ) -> JobsSummary:
     # Normalize paths, e.g.: '~/..' -> '/home'
     in_dir_path = normalize_path(input_dir)
@@ -81,6 +82,7 @@ def redact_folder(
         skip_existing=skip_existing,
         auto_delete_job=auto_delete_job,
         auto_delete_input_file=auto_delete_input_file,
+        custom_headers=custom_headers,
     )
 
     log.info(f"Starting {n_parallel_jobs} parallel jobs to anonymize files ...")

diff --git a/tests/commons/test_utils.py b/tests/commons/test_utils.py
@@ -3,7 +3,12 @@
 
 import pytest
 
-from redact.commons.utils import files_in_dir, images_in_dir, normalize_path
+from redact.commons.utils import (
+    files_in_dir,
+    images_in_dir,
+    normalize_path,
+    parse_key_value_pairs,
+)
 from redact.utils import normalize_url
 
 
@@ -56,3 +61,30 @@ def test_images_in_dir(images_path: Path):
         "sub_dir/img_1.jpeg",
         "sub_dir/img_2.jpeg",
     ]
+
+
+@pytest.mark.parametrize(
+    "input, expected",
+    [
+        ([], {}),
+        (["hello=world"], {"hello": "world"}),
+        (["hello=wor=ld"], {"hello": "wor=ld"}),
+        (["hello=world", "foo=boo"], {"hello": "world", "foo": "boo"}),
+    ],
+)
+def test_parse_key_value_pairs(input, expected):
+    parsed = parse_key_value_pairs(input)
+    assert parsed == expected
+
+
+@pytest.mark.parametrize(
+    "input",
+    [
+        ["helloworld"],
+        ["=world"],
+        ["world="],
+    ],
+)
+def test_parse_key_value_pairs_exception_on_illformatted(input):
+    with pytest.raises(ValueError):
+        _ = parse_key_value_pairs(input)