Skip to content

Commit

Permalink
moved the text_sep, prefix and suffix
Browse files Browse the repository at this point in the history
from models.py to config
also move language to config
also moves the masking char to scrub
  • Loading branch information
Krish Patel committed Jun 1, 2023
1 parent 99bbb9a commit c69caf3
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 11 deletions.
10 changes: 10 additions & 0 deletions openadapt/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,18 @@ def getenv_fallback(var_name):
logger.info(f"{key}={val}")


# ACTION EVENT CONFIGURATIONS
TEXT_SEP = "-"
TEXT_NAME_PREFIX = "<"
TEXT_NAME_SUFFIX = ">"




# SCRUBBING CONFIGURATIONS

SCRUB_CHAR = "*"
SCRUB_LANGUAGE = "en"
SCRUB_CONFIG_TRF = {
"nlp_engine_name": "spacy",
"models": [{"lang_code": "en", "model_name": "en_core_web_trf"}],
Expand Down
9 changes: 4 additions & 5 deletions openadapt/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import numpy as np
import sqlalchemy as sa

from openadapt import db, utils, window
from openadapt import config, db, utils, window


# https://groups.google.com/g/sqlalchemy/c/wlr7sShU6-k
Expand Down Expand Up @@ -61,6 +61,9 @@ def processed_action_events(self):

class ActionEvent(db.Base):
__tablename__ = "action_event"
_text_sep = config.TEXT_SEP
_text_name_prefix = config.TEXT_NAME_PREFIX
_text_name_suffix = config.TEXT_NAME_SUFFIX

id = sa.Column(sa.Integer, primary_key=True)
name = sa.Column(sa.String)
Expand Down Expand Up @@ -201,10 +204,6 @@ def __str__(self):
rval = " ".join(attrs)
return rval

_text_sep = "-"
_text_name_prefix = "<"
_text_name_suffix = ">"

@classmethod
def from_children(cls, children_dicts):
children = [
Expand Down
12 changes: 6 additions & 6 deletions openadapt/scrub.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,20 +31,20 @@ def scrub_text(text: str, is_hyphenated: bool = False) -> str:
return None

if is_hyphenated and not (
text.startswith("<") or text.endswith(">")
text.startswith(config.TEXT_NAME_PREFIX) or text.endswith(config.TEXT_NAME_SUFFIX)
):
text = "".join(text.split("-"))
text = "".join(text.split(config.TEXT_SEP))

analyzer_results = config.ANALYZER_TRF.analyze(
text=text, entities=config.SCRUBBING_ENTITIES, language="en"
text=text, entities=config.SCRUBBING_ENTITIES, language=config.SCRUB_LANGUAGE
)

operators = {}
for entity in analyzer_results:
operators[entity.entity_type] = OperatorConfig(
"mask",
{
"masking_char": "*",
"masking_char": config.SCRUB_CHAR,
"chars_to_mask": entity.end - entity.start,
"from_end": True,
},
Expand All @@ -57,9 +57,9 @@ def scrub_text(text: str, is_hyphenated: bool = False) -> str:
)

if is_hyphenated and not (
text.startswith("<") or text.endswith(">")
text.startswith(config.TEXT_NAME_PREFIX) or text.endswith(config.TEXT_NAME_SUFFIX)
):
anonymized_results.text = "-".join(anonymized_results.text)
anonymized_results.text = config.TEXT_SEP.join(anonymized_results.text)

return anonymized_results.text

Expand Down

0 comments on commit c69caf3

Please sign in to comment.