Skip to content

Commit

Permalink
Update validation logic following email discussion w Kelly and Megan
Browse files Browse the repository at this point in the history
  • Loading branch information
phirework committed Jun 16, 2020
1 parent 87d6092 commit 68d0b72
Showing 1 changed file with 11 additions and 2 deletions.
13 changes: 11 additions & 2 deletions src/corporacreator/corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,17 +58,26 @@ def _preprocessor_wrapper(self, client_id, sentence, up_votes, down_votes):
return pd.Series([sentence, up_votes, down_votes])

def _partition_corpus_data(self):
# If there are < 2 votes, or 2 opposing votes
# there is not enough information to make a determination
self.other = self.corpus_data.loc[
lambda df: (df.up_votes + df.down_votes) <= 1, :
lambda df: (df.up_votes + df.down_votes <= 1)
| ((df.up_votes == 1) & (df.down_votes == 1)), :
]
# If there are 2+ votes, and up_votes > down_votes, clip is valid
self.validated = self.corpus_data.loc[
lambda df: (df.up_votes + df.down_votes > 1)
& (df.up_votes > df.down_votes),
:,
]
# If there are 2+ votes, and down_votes > up_votes, clip is invalid
# If there are 3+ votes, and up_votes == down_votes, opinions
# are diverging too much to be relied upon, and clip is invalid
self.invalidated = self.corpus_data.loc[
lambda df: (df.up_votes + df.down_votes > 1)
& (df.up_votes <= df.down_votes),
& (df.up_votes < df.down_votes)
| ((df.up_votes == df.down_votes)
& (df.up_votes + df.down_votes > 2)),
:,
]

Expand Down

0 comments on commit 68d0b72

Please sign in to comment.