Skip to content
This repository has been archived by the owner on Sep 8, 2024. It is now read-only.

Commit

Permalink
Merge 0a37e81 into 13539d3
Browse files Browse the repository at this point in the history
  • Loading branch information
ken-mycroft authored Sep 6, 2021
2 parents 13539d3 + 0a37e81 commit d1e916c
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 16 deletions.
30 changes: 30 additions & 0 deletions mycroft/res/text/en-us/noise_words.list
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
where
what's
which
them
they
when
what
that
will
from
that
also
who
how
did
and
but
the
too
why
for
is
it
do
or
to
of
a


95 changes: 81 additions & 14 deletions mycroft/skills/common_query_skill.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,15 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import time

from enum import IntEnum
from abc import ABC, abstractmethod
from .mycroft_skill import MycroftSkill

from mycroft.configuration import Configuration
from mycroft.util.file_utils import resolve_resource_file


class CQSMatchLevel(IntEnum):
EXACT = 1 # Skill could find a specific answer for the question
Expand All @@ -34,6 +38,20 @@ def is_CQSVisualMatchLevel(match_level):

VISUAL_DEVICES = ['mycroft_mark_2']

"""these are for the confidence calculation"""
# how much each topic word is worth
# when found in the answer
TOPIC_MATCH_RELEVANCE = 5

# elevate relevance above all else
RELEVANCE_MULTIPLIER = 2

# we like longer articles but only so much
MAX_ANSWER_LEN_FOR_CONFIDENCE = 50

# higher number - less bias for word length
WORD_COUNT_DIVISOR = 100


def handles_visuals(platform):
return platform in VISUAL_DEVICES
Expand All @@ -49,8 +67,25 @@ class CommonQuerySkill(MycroftSkill, ABC):
This class works in conjunction with skill-query which collects
answers from several skills presenting the best one available.
"""

def __init__(self, name=None, bus=None):
super().__init__(name, bus)
noise_words_filepath = "text/%s/noise_words.list" % (self.lang,)
noise_words_filename = resolve_resource_file(noise_words_filepath)
self.translated_noise_words = []
try:
with open(noise_words_filename) as f:
self.translated_noise_words = f.read().strip()
self.translated_noise_words = self.translated_noise_words.split()
except FileNotFoundError:
self.log.warning("Missing noise_words.list file in res/text/lang")

# these should probably be configurable
self.level_confidence = {
CQSMatchLevel.EXACT: 0.9,
CQSMatchLevel.CATEGORY: 0.6,
CQSMatchLevel.GENERAL: 0.5
}

def bind(self, bus):
"""Overrides the default bind method of MycroftSkill.
Expand Down Expand Up @@ -80,7 +115,8 @@ def __handle_question_query(self, message):
level = result[1]
answer = result[2]
callback = result[3] if len(result) > 3 else None
confidence = self.__calc_confidence(match, search_phrase, level)
confidence = self.__calc_confidence(
match, search_phrase, level, answer)
self.bus.emit(message.response({"phrase": search_phrase,
"skill_id": self.skill_id,
"answer": answer,
Expand All @@ -92,27 +128,58 @@ def __handle_question_query(self, message):
"skill_id": self.skill_id,
"searching": False}))

def __calc_confidence(self, match, phrase, level):
def remove_noise(self, phrase):
"""remove noise to produce essence of question"""
phrase = ' ' + phrase + ' '
for word in self.translated_noise_words:
mtch = ' ' + word + ' '
if phrase.find(mtch) > -1:
phrase = phrase.replace(mtch, " ")
phrase = ' '.join(phrase.split())
return phrase.strip()

def __calc_confidence(self, match, phrase, level, answer):
# Assume the more of the words that get consumed, the better the match
consumed_pct = len(match.split()) / len(phrase.split())
if consumed_pct > 1.0:
consumed_pct = 1.0
consumed_pct /= 10

# bonus for more sentences
num_sentences = float(float(len(answer.split("."))) / float(10))

# Add bonus if match has visuals and the device supports them.
platform = self.config_core.get('enclosure', {}).get('platform')
platform = self.config_core.get("enclosure", {}).get("platform")
bonus = 0.0
if is_CQSVisualMatchLevel(level) and handles_visuals(platform):
bonus = 0.1
else:
bonus = 0

if int(level) == int(CQSMatchLevel.EXACT):
return 0.9 + (consumed_pct / 10) + bonus
elif int(level) == int(CQSMatchLevel.CATEGORY):
return 0.6 + (consumed_pct / 10) + bonus
elif int(level) == int(CQSMatchLevel.GENERAL):
return 0.5 + (consumed_pct / 10) + bonus
else:
return 0.0 # should never happen

# extract topic
topic = self.remove_noise(match)

# calculate relevance
answer = answer.lower()
matches = 0
for word in topic.split(' '):
if answer.find(word) > -1:
matches += TOPIC_MATCH_RELEVANCE

answer_size = len(answer.split(" "))
answer_size = min(MAX_ANSWER_LEN_FOR_CONFIDENCE, answer_size)

relevance = 0.0
if answer_size > 0:
relevance = float(float(matches) / float(answer_size))

relevance = relevance * RELEVANCE_MULTIPLIER

# extra credit for more words up to a point
wc_mod = float(float(answer_size) / float(WORD_COUNT_DIVISOR)) * 2

confidence = self.level_confidence[level] + \
consumed_pct + bonus + num_sentences + relevance + wc_mod

return confidence

def __handle_query_action(self, message):
"""Message handler for question:action.
Expand Down
4 changes: 2 additions & 2 deletions test/unittests/skills/test_common_query_skill.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def test_successful_match_query_phrase(self):
'What\'s the meaning of life')
self.assertEqual(response.data['skill_id'], self.skill.skill_id)
self.assertEqual(response.data['answer'], '42')
self.assertEqual(response.data['conf'], 1.0)
self.assertEqual(response.data['conf'], 1.12)

def test_successful_visual_match_query_phrase(self):
self.skill.config_core['enclosure']['platform'] = 'mycroft_mark_2'
Expand All @@ -120,7 +120,7 @@ def test_successful_visual_match_query_phrase(self):
'What\'s the meaning of life')
self.assertEqual(response.data['skill_id'], self.skill.skill_id)
self.assertEqual(response.data['answer'], '42')
self.assertEqual(response.data['conf'], 1.1)
self.assertEqual(response.data['conf'], 1.2200000000000002)


class CQSTest(CommonQuerySkill):
Expand Down

0 comments on commit d1e916c

Please sign in to comment.