Skip to content

Commit

Permalink
[CT-1878] add custom headers (#100)
Browse files Browse the repository at this point in the history
* add header option

* rename to custom headers

---------

Co-authored-by: Patrick <[email protected]>
  • Loading branch information
pkern90 and Patrick authored Feb 9, 2024
1 parent 33ea482 commit 29cdd8a
Show file tree
Hide file tree
Showing 18 changed files with 224 additions and 17 deletions.
23 changes: 23 additions & 0 deletions redact/commons/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,26 @@ def setup_logging(verbose_logging: bool) -> None:
level = logging.DEBUG if verbose_logging else Settings().log_level

logging.basicConfig(format=format, level=level)


def parse_key_value_pairs(kv_pairs: List[str]) -> dict:
"""Parse a list of key-value strings into a dictionary with error handling."""
result = {}
for item in kv_pairs:
# Check if the item contains an equal sign
if "=" not in item:
raise ValueError(
f"Invalid key-value pair: {item}. Expected format: key=value"
)

key, value = item.split("=", 1) # Split only on the first equal sign

# Validate key and value
if not key:
raise ValueError(f"Empty key in pair: {item}")
if not value:
raise ValueError(f"Empty value in pair: {item}")

result[key] = value

return result
18 changes: 16 additions & 2 deletions redact/tools/v3.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from typing import Optional
from typing import List, Optional

import typer

from redact.commons.utils import setup_logging
from redact.commons.utils import parse_key_value_pairs, setup_logging
from redact.settings import Settings
from redact.v3 import InputType, JobArguments, OutputType, Region, ServiceType
from redact.v3.tools.redact_file import redact_file as rdct_file
Expand Down Expand Up @@ -99,9 +99,15 @@ def redact_file(
True, help="Specify whether to automatically delete the job from the backend"
),
verbose_logging: bool = typer.Option(False, help="Enable very noisy logging."),
custom_headers: List[str] = typer.Option(
[],
help="Key-value pairs in the format key=value which will be added to allr equest header",
),
):
setup_logging(verbose_logging)

parsed_header = parse_key_value_pairs(custom_headers)

job_args = JobArguments(
region=region,
face=face,
Expand All @@ -127,6 +133,7 @@ def redact_file(
skip_existing=skip_existing,
save_labels=save_labels,
auto_delete_job=auto_delete_job,
custom_headers=parsed_header,
)


Expand Down Expand Up @@ -220,9 +227,15 @@ def redact_folder(
"from the input folder after processing of a file completed.",
),
verbose_logging: bool = typer.Option(False, help="Enable very noisy logging."),
custom_headers: List[str] = typer.Option(
[],
help="Key-value pairs in the format key=value which will be added to allr equest header",
),
):
setup_logging(verbose_logging)

parsed_header = parse_key_value_pairs(custom_headers)

job_args = JobArguments(
region=region,
face=face,
Expand Down Expand Up @@ -250,4 +263,5 @@ def redact_folder(
skip_existing=skip_existing,
auto_delete_job=auto_delete_job,
auto_delete_input_file=auto_delete_input_file,
custom_headers=parsed_header,
)
16 changes: 15 additions & 1 deletion redact/tools/v4.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import typer

from redact.commons.utils import setup_logging
from redact.commons.utils import parse_key_value_pairs, setup_logging
from redact.settings import Settings
from redact.v4 import InputType, JobArguments, OutputType, Region, ServiceType
from redact.v4.tools.redact_file import redact_file as rdct_file
Expand Down Expand Up @@ -133,9 +133,15 @@ def redact_file(
),
show_default=False,
),
custom_headers: List[str] = typer.Option(
[],
help="Key-value pairs in the format key=value which will be added to allr equest header",
),
):
setup_logging(verbose_logging)

parsed_header = parse_key_value_pairs(custom_headers)

job_args = JobArguments(
region=region,
face=face,
Expand Down Expand Up @@ -163,6 +169,7 @@ def redact_file(
ignore_warnings=ignore_warnings,
skip_existing=skip_existing,
auto_delete_job=auto_delete_job,
custom_headers=parsed_header,
)


Expand Down Expand Up @@ -283,9 +290,15 @@ def redact_folder(
f"{EXPERIMENTAL_WARNING}"
),
),
custom_headers: List[str] = typer.Option(
[],
help="Key-value pairs in the format key=value which will be added to allr equest header",
),
):
setup_logging(verbose_logging)

parsed_header = parse_key_value_pairs(custom_headers)

job_args = JobArguments(
region=region,
face=face,
Expand Down Expand Up @@ -316,4 +329,5 @@ def redact_folder(
skip_existing=skip_existing,
auto_delete_job=auto_delete_job,
auto_delete_input_file=auto_delete_input_file,
custom_headers=parsed_header,
)
8 changes: 6 additions & 2 deletions redact/v3/redact_instance.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import IO, BinaryIO, Optional, Union
from typing import IO, BinaryIO, Dict, Optional, Union

from redact.settings import Settings
from redact.v3.data_models import JobArguments, JobLabels, OutputType, ServiceType
Expand Down Expand Up @@ -35,12 +35,16 @@ def create(
redact_url: str = settings.redact_url_default,
subscription_id: Optional[str] = None,
api_key: Optional[str] = None,
custom_headers: Optional[Dict] = None,
) -> "RedactInstance":
"""
The default way of creating RedactInstance objects.
"""
redact_requests = RedactRequests(
redact_url=redact_url, subscription_id=subscription_id, api_key=api_key
redact_url=redact_url,
subscription_id=subscription_id,
api_key=api_key,
custom_headers=custom_headers,
)
return cls(redact_requests=redact_requests, service=service, out_type=out_type)

Expand Down
6 changes: 5 additions & 1 deletion redact/v3/redact_requests.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,17 @@ def __init__(
subscription_id: Optional[str] = None,
api_key: Optional[str] = None,
httpx_client: Optional[httpx.Client] = None,
custom_headers: Optional[Dict] = None,
):
self.redact_url = normalize_url(redact_url)
self.api_key = api_key
self.subscription_id = subscription_id
self._headers = {"Accept": "*/*"}
self.retry_total_time_limit: float = 600 # 10 minutes in seconds

self._headers = {"Accept": "*/*"}
if custom_headers is not None:
self._headers.update(custom_headers)

if self.api_key:
self._headers["api-key"] = self.api_key
if self.subscription_id:
Expand Down
4 changes: 3 additions & 1 deletion redact/v3/tools/redact_file.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging
from pathlib import Path
from typing import Optional, Union
from typing import Dict, Optional, Union

from redact.commons.utils import normalize_path
from redact.settings import Settings
Expand Down Expand Up @@ -39,6 +39,7 @@ def redact_file(
auto_delete_input_file: bool = False,
waiting_time_between_job_status_checks: Optional[float] = None,
redact_requests_param: Optional[RedactRequests] = None,
custom_headers: Optional[Dict[str, str]] = None,
) -> Optional[JobStatus]:
"""
If no out_path is given, <input_filename_redacted> will be used.
Expand Down Expand Up @@ -88,6 +89,7 @@ def redact_file(
out_type=output_type,
redact_url=redact_url,
api_key=api_key,
custom_headers=custom_headers,
)
with open(file_path, "rb") as file:
job: RedactJob = redact.start_job(
Expand Down
4 changes: 3 additions & 1 deletion redact/v3/tools/redact_folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
from typing import Any, List, Optional, Tuple, Union
from typing import Any, Dict, List, Optional, Tuple, Union

import tqdm
from tqdm.contrib.logging import logging_redirect_tqdm
Expand Down Expand Up @@ -45,6 +45,7 @@ def redact_folder(
skip_existing: bool = True,
auto_delete_job: bool = True,
auto_delete_input_file: bool = False,
custom_headers: Optional[Dict[str, str]] = None,
) -> JobsSummary:
# Normalize paths, e.g.: '~/..' -> '/home'
in_dir_path = normalize_path(input_dir)
Expand Down Expand Up @@ -83,6 +84,7 @@ def redact_folder(
skip_existing=skip_existing,
auto_delete_job=auto_delete_job,
auto_delete_input_file=auto_delete_input_file,
custom_headers=custom_headers,
)

log.info(f"Starting {n_parallel_jobs} parallel jobs to anonymize files ...")
Expand Down
8 changes: 6 additions & 2 deletions redact/v4/redact_instance.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import BinaryIO, Optional
from typing import BinaryIO, Dict, Optional

from redact.settings import Settings
from redact.v4.data_models import JobArguments, OutputType, ServiceType
Expand Down Expand Up @@ -35,12 +35,16 @@ def create(
redact_url: str = settings.redact_url_default,
subscription_id: Optional[str] = None,
api_key: Optional[str] = None,
custom_headers: Optional[Dict] = None,
) -> "RedactInstance":
"""
The default way of creating RedactInstance objects.
"""
redact_requests = RedactRequests(
redact_url=redact_url, subscription_id=subscription_id, api_key=api_key
redact_url=redact_url,
subscription_id=subscription_id,
api_key=api_key,
custom_headers=custom_headers,
)
return cls(redact_requests=redact_requests, service=service, out_type=out_type)

Expand Down
6 changes: 5 additions & 1 deletion redact/v4/redact_requests.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,17 @@ def __init__(
subscription_id: Optional[str] = None,
api_key: Optional[str] = None,
httpx_client: Optional[httpx.Client] = None,
custom_headers: Optional[Dict] = None,
):
self.redact_url = normalize_url(redact_url)
self.api_key = api_key
self.subscription_id = subscription_id
self._headers = {"Accept": "*/*"}
self.retry_total_time_limit: float = 600 # 10 minutes in seconds

self._headers = {"Accept": "*/*"}
if custom_headers is not None:
self._headers.update(custom_headers)

if self.api_key:
self._headers["api-key"] = self.api_key
if self.subscription_id:
Expand Down
4 changes: 3 additions & 1 deletion redact/v4/tools/redact_file.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging
from pathlib import Path
from typing import Optional, Union
from typing import Dict, Optional, Union

from redact.commons.utils import normalize_path
from redact.settings import Settings
Expand Down Expand Up @@ -36,6 +36,7 @@ def redact_file(
auto_delete_input_file: bool = False,
waiting_time_between_job_status_checks: Optional[float] = None,
redact_requests_param: Optional[RedactRequests] = None,
custom_headers: Optional[Dict[str, str]] = None,
) -> Optional[JobStatus]:
"""
If no out_path is given, <input_filename_redacted> will be used.
Expand Down Expand Up @@ -79,6 +80,7 @@ def redact_file(
out_type=output_type,
redact_url=redact_url,
api_key=api_key,
custom_headers=custom_headers,
)
with open(file_path, "rb") as file:
job: RedactJob = redact.start_job(
Expand Down
4 changes: 3 additions & 1 deletion redact/v4/tools/redact_folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
from typing import Any, List, Optional, Tuple, Union
from typing import Any, Dict, List, Optional, Tuple, Union

import tqdm
from tqdm.contrib.logging import logging_redirect_tqdm
Expand Down Expand Up @@ -44,6 +44,7 @@ def redact_folder(
skip_existing: bool = True,
auto_delete_job: bool = True,
auto_delete_input_file: bool = False,
custom_headers: Optional[Dict[str, str]] = None,
) -> JobsSummary:
# Normalize paths, e.g.: '~/..' -> '/home'
in_dir_path = normalize_path(input_dir)
Expand Down Expand Up @@ -81,6 +82,7 @@ def redact_folder(
skip_existing=skip_existing,
auto_delete_job=auto_delete_job,
auto_delete_input_file=auto_delete_input_file,
custom_headers=custom_headers,
)

log.info(f"Starting {n_parallel_jobs} parallel jobs to anonymize files ...")
Expand Down
34 changes: 33 additions & 1 deletion tests/commons/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@

import pytest

from redact.commons.utils import files_in_dir, images_in_dir, normalize_path
from redact.commons.utils import (
files_in_dir,
images_in_dir,
normalize_path,
parse_key_value_pairs,
)
from redact.utils import normalize_url


Expand Down Expand Up @@ -56,3 +61,30 @@ def test_images_in_dir(images_path: Path):
"sub_dir/img_1.jpeg",
"sub_dir/img_2.jpeg",
]


@pytest.mark.parametrize(
"input, expected",
[
([], {}),
(["hello=world"], {"hello": "world"}),
(["hello=wor=ld"], {"hello": "wor=ld"}),
(["hello=world", "foo=boo"], {"hello": "world", "foo": "boo"}),
],
)
def test_parse_key_value_pairs(input, expected):
parsed = parse_key_value_pairs(input)
assert parsed == expected


@pytest.mark.parametrize(
"input",
[
["helloworld"],
["=world"],
["world="],
],
)
def test_parse_key_value_pairs_exception_on_illformatted(input):
with pytest.raises(ValueError):
_ = parse_key_value_pairs(input)
Loading

0 comments on commit 29cdd8a

Please sign in to comment.