purely summarizes now

OpenAdaptAI · Jun 1, 2023 · f197253 · f197253
1 parent 6b8a564
commit f197253
Show file tree

Hide file tree

Showing 2 changed files with 32 additions and 129 deletions.
diff --git a/puterbot/strategies/summary_mixin.py b/puterbot/strategies/summary_mixin.py
@@ -1,6 +1,6 @@
 """
-Implements a ReplayStrategy mixin which identifies whether windows are different
-from summarizing their contents.
+Implements a ReplayStrategy mixin which summarizes the content of texts.
+
 
 Usage:
 
@@ -15,10 +15,6 @@
 from sumy.nlp.tokenizers import Tokenizer
 from sumy.nlp.stemmers import Stemmer
 from sumy.utils import get_stop_words
-from nltk.corpus import wordnet
-import re
-from fuzzywuzzy import fuzz
-import statistics
 
 
 class SummaryReplayStrategyMixin(BaseReplayStrategy):
@@ -32,74 +28,23 @@ def __init__(
     ):
         """
         See base class.
+
+        Additional Attributes:
+            - stemmer:
+            - summarizer:
         """
         super().__init__(recording)
+        self.stemmer = Stemmer("english")
+        summarizer = LsaSummarizer(self.stemmer)
+        summarizer.stop_words = get_stop_words("english")
+        self.summarizer = summarizer
 
     def get_summary(
         self,
-        ascii_curr: str,
-        ascii_prev: str,
-        ocr_curr: str,
-        ocr_prev: str,
-    ) -> float:
-        """Returns how similar the contents of 2 screenshots are.
-
-        Args:
-            self: the SummaryReplayStrategyMixin
-            ascii_curr: the ascii text of the current screenshot
-            ascii_prev: the ascii text of the previous screenshot
-            ocr_curr: the ocr text of the current screenshot
-            ocr_prev: the ocr text of the previous screenshot
-
-        Returns:
-            A float which represents the mean of the following 2 similarity values:
-                1. how similar the summaries of the current and previous ASCII text are
-                2. how similar the summaries of the current and previous OCR text are
-        """
-        cleaned_ascii_curr = clean_ascii(ascii_curr)
-        cleaned_ascii_prev = clean_ascii(ascii_prev)
-
-        ascii_similarity = compare_text(cleaned_ascii_curr, cleaned_ascii_prev)
-        ocr_similarity = compare_text(ocr_curr, ocr_prev)
-
-        similarity_list = [ascii_similarity, ocr_similarity]
-
-        # may want the required minimum similarity
-        return statistics.fmean(similarity_list)
-
-
-def clean_ascii(
-    text: str,
-) -> str:
-    """
-    Returns a string of the words in the inputted text.
-    """
-    # remove the irrelevant symbols in the ascii and returns a list of strings
-    no_symbols = re.sub(r"[^\w\s]+", " ", text)
-    ascii_words = []
-
-    for word in no_symbols.split():
-        if wordnet.synsets(word):  # check if it's a word
-            ascii_words.append(word)
-    return " ".join(ascii_words)
-
-
-def compare_text(
-    text1: str,
-    text2: str,
-) -> int:
-    """
-    Returns a int value between 0 and 100 (inclusive on both sides) representing how similar the 2 strings are.
-    """
-    stemmer = Stemmer("english")
-    summarizer = LsaSummarizer(stemmer)
-    summarizer.stop_words = get_stop_words("english")
-
-    parser1 = PlaintextParser.from_string(text1, Tokenizer("english"))
-    summarized1 = summarizer(parser1.document, 1)
-
-    parser2 = PlaintextParser.from_string(text2, Tokenizer("english"))
-    summarized2 = summarizer(parser2.document, 1)
+        text: str,
+        num_sentences: int,
+    ) -> str:
+        parser = PlaintextParser.from_string(text, Tokenizer("english"))
+        summarized = self.summarizer(parser.document, num_sentences)
+        return summarized
 
-    # may want to change to something more complex
-    return fuzz.WRatio(summarized1, summarized2)
diff --git a/tests/puterbot/test_summary_mixin.py b/tests/puterbot/test_summary_mixin.py
@@ -1,66 +1,24 @@
 """
-Tests the 2 helper functions (clean_ascii and compare_text) in summary_mixin.py
+Tests the get_summary function in summary_mixin.py
 """
-from puterbot.strategies.summary_mixin import clean_ascii, compare_text
+from fuzzywuzzy import fuzz
 
+from puterbot.strategies.demo import DemoReplayStrategy
+from puterbot.models import Recording, Screenshot
 
-################################################################
-# Clean ASCII tests
-################################################################
+RECORDING = Recording()
+REPLAY = DemoReplayStrategy(RECORDING)
 
-
-def test_clean_ascii_empty():
+def test_summary_empty():
     empty_text = ""
-    expected = ""
-    actual = clean_ascii(empty_text)
-    assert actual == expected
-
-
-def test_clean_ascii_no_symbols_or_stopwords():
-    no_symbols = "wow no symbols"
-    expected = "wow no symbols"
-    actual = clean_ascii(no_symbols)
-    assert actual == expected
-
-
-def test_clean_ascii_some_symbols_and_stopwords():
-    many_symbols = "wow this! has some... symbols"
-    expected = "wow has some symbols"
-    actual = clean_ascii(many_symbols)
-    assert actual == expected
-
-
-def test_clean_ascii_all_symbols_and_stopwords():
-    all_symbols = "&*@#($)#!| ~~  this  \\"
-    expected = ""
-    actual = clean_ascii(all_symbols)
-    assert actual == expected
-
-
-################################################################
-# Compare text tests
-################################################################
-
-
-def test_compare_text_empty():
-    text1 = ""
-    text2 = ""
-    expected = 0
-    actual = compare_text(text1, text2)
-    assert actual == expected
-
-
-def test_compare_text_similar():
-    text1 = "I love sunshine so much."
-    text2 = "I adore the sun."
-    expected = 50
-    actual = compare_text(text1, text2)
-    assert actual > expected
+    actual = REPLAY.get_summary(empty_text, 1)
+    assert len(actual) == 0
 
 
-def test_compare_text_not_similar():
-    text1 = "I love sunshine so much"
-    text2 = "Once upon a time, there was a princess."
-    expected = 50
-    actual = compare_text(text1, text2)
-    assert actual < expected
+def test_summary_sentence():
+    story = "However, this bottle was not marked “poison,” so Alice ventured to taste it, \
+        and finding it very nice, (it had, in fact, a sort of mixed flavour of cherry-tart, \
+        custard, pine-apple, roast turkey, toffee, and hot buttered toast,) \
+        she very soon finished it off."
+    actual = REPLAY.get_summary(story, 1)
+    assert fuzz.WRatio(actual, story) > 50