Skip to content

Commit

Permalink
requested changes
Browse files Browse the repository at this point in the history
  • Loading branch information
0dm committed Jun 13, 2023
1 parent b20a4d9 commit 76b1396
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 32 deletions.
23 changes: 22 additions & 1 deletion openadapt/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,12 @@
"SCRUB_ENABLED": True,
"SCRUB_CHAR": "*",
"SCRUB_LANGUAGE": "en",
# TODO support lists in getenv_fallback
"SCRUB_FILL_COLOR": (255, 0, 0),
"SCRUB_CONFIG_TRF": {
"nlp_engine_name": "spacy",
"models": [{"lang_code": "en", "model_name": "en_core_web_trf"}],
},
"DEFAULT_SCRUB_FILL_COLOR": (255, 0, 0),
"SCRUB_IGNORE_ENTITIES": [
# 'US_PASSPORT',
# 'US_DRIVER_LICENSE',
Expand Down Expand Up @@ -106,3 +107,23 @@ def getenv_fallback(var_name):
for key, val in locals().items():
if not key.startswith("_") and key.isupper():
logger.info(f"{key}={val}")


def filter_log_messages(data):
"""
This function filters log messages by ignoring any message that contains a specific string.
Args:
data: The input parameter "data" is expected to be data from a loguru logger.
Returns:
a boolean value indicating whether the message in the input data should be ignored or not. If the
message contains any of the messages in the `messages_to_ignore` list, the function returns `False`
indicating that the message should be ignored. Otherwise, it returns `True` indicating that the
message should not be ignored.
"""
# TODO: ultimately, we want to fix the underlying issues, but for now, we can ignore these messages
messages_to_ignore = [
"Cannot pickle Objective-C objects",
]
return not any(msg in data["message"] for msg in messages_to_ignore)
26 changes: 20 additions & 6 deletions openadapt/scrub.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,9 @@ def scrub_text_all(text: str) -> str:
return config.SCRUB_CHAR * len(text)


def scrub_image(image: Image, fill_color=config.DEFAULT_SCRUB_FILL_COLOR) -> Image:
def scrub_image(
image: Image, fill_color=config.SCRUB_FILL_COLOR
) -> Image:
"""
Scrub the image of all PII/PHI using Presidio Image Redactor
Expand Down Expand Up @@ -157,7 +159,9 @@ def _is_scrubbed(old_text: str, new_text: str) -> bool:
return old_text != new_text


def _scrub_text_item(value: str, key: Any, force_scrub_children: bool = False) -> str:
def _scrub_text_item(
value: str, key: Any, force_scrub_children: bool = False
) -> str:
"""
Scrubs the value of a dict item.
Expand Down Expand Up @@ -188,7 +192,11 @@ def _should_scrub_list_item(item: Any, key: Any, list_keys: List[str]) -> bool:
bool: True if the key and value should be scrubbed, False otherwise
"""

return isinstance(item, (str, dict)) and isinstance(key, str) and key in list_keys
return (
isinstance(item, (str, dict))
and isinstance(key, str)
and key in list_keys
)


def _scrub_list_item(
Expand All @@ -209,7 +217,9 @@ def _scrub_list_item(
dict/str: The scrubbed dict/value respectively
"""
if isinstance(item, dict):
return scrub_dict(item, list_keys, force_scrub_children=force_scrub_children)
return scrub_dict(
item, list_keys, force_scrub_children=force_scrub_children
)
return _scrub_text_item(item, key)


Expand All @@ -234,8 +244,12 @@ def scrub_dict(
scrubbed_dict = {}
for key, value in input_dict.items():
if _should_scrub_text(key, value, list_keys, scrub_all):
scrubbed_text = _scrub_text_item(value, key, force_scrub_children)
if key in ("text", "canonical_text") and _is_scrubbed(value, scrubbed_text):
scrubbed_text = _scrub_text_item(
value, key, force_scrub_children
)
if key in ("text", "canonical_text") and _is_scrubbed(
value, scrubbed_text
):
force_scrub_children = True
scrubbed_dict[key] = scrubbed_text
elif isinstance(value, list):
Expand Down
10 changes: 2 additions & 8 deletions openadapt/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,18 +29,11 @@ def configure_logging(logger, log_level):
logger.add(
sys.stderr,
level=log_level,
filter=filter_log_messages if config.IGNORE_WARNINGS else None,
filter=config.filter_log_messages if config.IGNORE_WARNINGS else None,
)
logger.debug(f"{log_level=}")


def filter_log_messages(data):
messages_to_ignore = [
"Cannot pickle Objective-C objects",
]
return not any(msg in data["message"] for msg in messages_to_ignore)


def row2dict(row, follow=True):
if isinstance(row, dict):
return row
Expand Down Expand Up @@ -173,6 +166,7 @@ def get_monitor_dims():
return monitor_width, monitor_height


# TODO: move parameters to config
def draw_ellipse(
x,
y,
Expand Down
22 changes: 11 additions & 11 deletions openadapt/visualize.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,9 +157,9 @@ def main():
event_dicts = scrub.scrub_list_dicts(event_dicts)
logger.info(f"event_dicts=\n{pformat(event_dicts)}")

r2d_recording = row2dict(recording)
recording_dict = row2dict(recording)
if SCRUB:
r2d_recording = scrub.scrub_dict(r2d_recording)
recording_dict = scrub.scrub_dict(recording_dict)

rows = [
row(
Expand All @@ -169,7 +169,7 @@ def main():
),
row(
Div(
text=f"{dict2html(r2d_recording)}",
text=f"{dict2html(recording_dict)}",
),
),
row(
Expand Down Expand Up @@ -197,13 +197,13 @@ def main():
mask_utf8 = image2utf8(mask)
width, height = image.size

r2d_ae = row2dict(action_event)
r2d_ae_we = row2dict(action_event.window_event)
action_event_dict = row2dict(action_event)
window_event_dict = row2dict(action_event.window_event)

if SCRUB:
r2d_ae = scrub.scrub_dict(r2d_ae)
r2d_ae_we = scrub.scrub_dict(r2d_ae_we)
action_event_dict = scrub.scrub_dict(action_event_dict)
window_event_dict = scrub.scrub_dict(window_event_dict)

rows.append(
[
row(
Expand All @@ -230,14 +230,14 @@ def main():
>
</div>
<table>
{dict2html(r2d_ae_we , None)}
{dict2html(window_event_dict , None)}
</table>
""",
),
Div(
text=f"""
<table>
{dict2html(r2d_ae)}
{dict2html(action_event_dict)}
</table>
"""
),
Expand Down
9 changes: 3 additions & 6 deletions tests/openadapt/test_scrub.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,7 @@ def test_scrub_image() -> None:

# Count the number of pixels having the color of the mask
mask_pixels = sum(
1
for pixel in scrubbed_image.getdata()
if pixel == config.DEFAULT_SCRUB_FILL_COLOR
1 for pixel in scrubbed_image.getdata() if pixel == config.SCRUB_FILL_COLOR
)
total_pixels = scrubbed_image.width * scrubbed_image.height

Expand Down Expand Up @@ -104,9 +102,7 @@ def test_scrub_credit_card() -> None:
"""

assert (
scrub.scrub_text(
"My credit card number is 4234-5678-9012-3456 and "
)
scrub.scrub_text("My credit card number is 4234-5678-9012-3456 and ")
) == "My credit card number is ******************* and "


Expand Down Expand Up @@ -219,5 +215,6 @@ def test_scrub_all_together() -> None:
" He was born on 01/01/1980."
)


if __name__ == "__main__":
test_scrub_image()

0 comments on commit 76b1396

Please sign in to comment.