Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[voice] Reduce collisions on exact match and use item synonyms #3698

Merged
merged 8 commits into from
Sep 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import org.eclipse.jdt.annotation.Nullable;
import org.openhab.core.events.EventPublisher;
import org.openhab.core.items.ItemRegistry;
import org.openhab.core.items.MetadataRegistry;
import org.openhab.core.library.types.HSBType;
import org.openhab.core.library.types.IncreaseDecreaseType;
import org.openhab.core.library.types.NextPreviousType;
Expand Down Expand Up @@ -48,8 +49,8 @@ public class StandardInterpreter extends AbstractRuleBasedInterpreter {

@Activate
public StandardInterpreter(final @Reference EventPublisher eventPublisher,
final @Reference ItemRegistry itemRegistry) {
super(eventPublisher, itemRegistry);
final @Reference ItemRegistry itemRegistry, @Reference MetadataRegistry metadataRegistry) {
super(eventPublisher, itemRegistry, metadataRegistry);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
package org.openhab.core.voice.text;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
Expand All @@ -22,6 +23,7 @@
import java.util.Map.Entry;
import java.util.ResourceBundle;
import java.util.Set;
import java.util.stream.Collectors;

import org.eclipse.jdt.annotation.NonNullByDefault;
import org.eclipse.jdt.annotation.Nullable;
Expand All @@ -30,6 +32,9 @@
import org.openhab.core.items.GroupItem;
import org.openhab.core.items.Item;
import org.openhab.core.items.ItemRegistry;
import org.openhab.core.items.Metadata;
import org.openhab.core.items.MetadataKey;
import org.openhab.core.items.MetadataRegistry;
import org.openhab.core.items.events.ItemEventFactory;
import org.openhab.core.library.types.DecimalType;
import org.openhab.core.library.types.StringType;
Expand All @@ -43,6 +48,7 @@
*
* @author Tilman Kamp - Initial contribution
* @author Kai Kreuzer - Improved error handling
* @author Miguel Álvarez - Reduce collisions on exact match and use item synonyms
*/
@NonNullByDefault
public abstract class AbstractRuleBasedInterpreter implements HumanLanguageInterpreter {
Expand All @@ -65,11 +71,15 @@ public abstract class AbstractRuleBasedInterpreter implements HumanLanguageInter

private static final String LANGUAGE_SUPPORT = "LanguageSupport";

private static final String SYNONYMS_NAMESPACE = "synonyms";

private final MetadataRegistry metadataRegistry;

private Logger logger = LoggerFactory.getLogger(AbstractRuleBasedInterpreter.class);

private final Map<Locale, List<Rule>> languageRules = new HashMap<>();
private final Map<Locale, Set<String>> allItemTokens = new HashMap<>();
private final Map<Locale, Map<Item, List<Set<String>>>> itemTokens = new HashMap<>();
private final Map<Locale, Map<Item, List<List<List<String>>>>> itemTokens = new HashMap<>();

private final ItemRegistry itemRegistry;
private final EventPublisher eventPublisher;
Expand All @@ -90,11 +100,36 @@ public void updated(Item oldElement, Item element) {
invalidate();
}
};
private RegistryChangeListener<Metadata> synonymsChangeListener = new RegistryChangeListener<Metadata>() {
@Override
public void added(Metadata element) {
invalidateIfSynonymsMetadata(element);
}

@Override
public void removed(Metadata element) {
invalidateIfSynonymsMetadata(element);
}

public AbstractRuleBasedInterpreter(final EventPublisher eventPublisher, final ItemRegistry itemRegistry) {
@Override
public void updated(Metadata oldElement, Metadata element) {
invalidateIfSynonymsMetadata(element);
}

private void invalidateIfSynonymsMetadata(Metadata metadata) {
if (metadata.getUID().getNamespace().equals(SYNONYMS_NAMESPACE)) {
invalidate();
}
}
};

public AbstractRuleBasedInterpreter(final EventPublisher eventPublisher, final ItemRegistry itemRegistry,
final MetadataRegistry metadataRegistry) {
this.eventPublisher = eventPublisher;
this.itemRegistry = itemRegistry;
this.metadataRegistry = metadataRegistry;
itemRegistry.addRegistryChangeListener(registryChangeListener);
metadataRegistry.addRegistryChangeListener(synonymsChangeListener);
}

protected void deactivate() {
Expand Down Expand Up @@ -155,40 +190,56 @@ Set<String> getAllItemTokens(Locale locale) {
allItemTokens.put(locale, localeTokens = new HashSet<>());
for (Item item : itemRegistry.getAll()) {
localeTokens.addAll(tokenize(locale, item.getLabel()));
for (String synonym : getItemSynonyms(item)) {
localeTokens.addAll(tokenize(locale, synonym));
}
}
}
return localeTokens;
}

/**
* Retrieves the list of identifier token sets per item currently contained in the {@link ItemRegistry}.
* Each item entry in the resulting hash map will feature a list of different token sets. Each token set
* represents one possible way "through" a chain of parent groups, where each groups tokenized name is
* part of the set.
* Each item entry in the resulting hash map will feature a list of different token lists. Each list
* represents one possible way "through" a chain of parent groups, where each groups name is tokenized
* into a list of strings and included as a member of the chain. Item synonym metadata options are
* used as alternative labels creating new alternative chains for the item.
*
* @param locale The locale that is to be used for preparing the tokens.
* @return the list of identifier token sets per item
*/
Map<Item, List<Set<String>>> getItemTokens(Locale locale) {
Map<Item, List<Set<String>>> localeTokens = itemTokens.get(locale);
Map<Item, List<List<List<String>>>> getItemTokens(Locale locale) {
Map<Item, List<List<List<String>>>> localeTokens = itemTokens.get(locale);
if (localeTokens == null) {
itemTokens.put(locale, localeTokens = new HashMap<>());
for (Item item : itemRegistry.getItems()) {
if (item.getGroupNames().isEmpty()) {
addItem(locale, localeTokens, new HashSet<>(), item);
addItem(locale, localeTokens, new ArrayList<>(), item);
}
}
}
return localeTokens;
}

private void addItem(Locale locale, Map<Item, List<Set<String>>> target, Set<String> tokens, Item item) {
Set<String> nt = new HashSet<>(tokens);
nt.addAll(tokenize(locale, item.getLabel()));
List<Set<String>> list = target.get(item);
if (list == null) {
target.put(item, list = new ArrayList<>());
private String[] getItemSynonyms(Item item) {
MetadataKey key = new MetadataKey("synonyms", item.getName());
Metadata synonymsMetadata = metadataRegistry.get(key);
return (synonymsMetadata != null) ? synonymsMetadata.getValue().split(",") : new String[] {};
}

private void addItem(Locale locale, Map<Item, List<List<List<String>>>> target, List<List<String>> tokens,
Item item) {
addItem(locale, target, tokens, item, item.getLabel());
for (String synonym : getItemSynonyms(item)) {
addItem(locale, target, tokens, item, synonym);
}
}

private void addItem(Locale locale, Map<Item, List<List<List<String>>>> target, List<List<String>> tokens,
Item item, @Nullable String itemLabel) {
List<List<String>> nt = new ArrayList<>(tokens);
nt.add(tokenize(locale, itemLabel));
List<List<List<String>>> list = target.computeIfAbsent(item, k -> new ArrayList<>());
list.add(nt);
if (item instanceof GroupItem groupItem) {
for (Item member : groupItem.getMembers()) {
Expand Down Expand Up @@ -347,7 +398,7 @@ public InterpretationResult interpretAST(ResourceBundle language, ASTNode node)
* touched.
* All others are converted to {@link match} expressions.
*
* @param obj the objects that are to be converted
* @param objects the objects that are to be converted
* @return resulting expression array
*/
protected Expression[] exps(Object... objects) {
Expand Down Expand Up @@ -527,7 +578,8 @@ protected String executeSingle(ResourceBundle language, String[] labelFragments,

/**
* Filters the item registry by matching each item's name with the provided name fragments.
* The item's label and its parent group's labels are tokenized {@link tokenize} and then altogether looked up
* The item's label and its parent group's labels are {@link #tokenize(Locale, String) tokenized} and then
* altogether looked up
* by each and every provided fragment.
* For the item to get included into the result list, every provided fragment has to be found among the label
* tokens.
Expand All @@ -538,52 +590,66 @@ protected String executeSingle(ResourceBundle language, String[] labelFragments,
* @param language Language information that is used for matching
* @param labelFragments label fragments that are used to match an item's label.
* For a positive match, the item's label has to contain every fragment - independently of their order.
* They are treated case insensitive.
* They are treated case-insensitive.
* @param commandType optional command type that all items have to support.
* Provide {null} if there is no need for a certain command to be supported.
* @return All matching items from the item registry.
*/
protected List<Item> getMatchingItems(ResourceBundle language, String[] labelFragments,
@Nullable Class<?> commandType) {
List<Item> items = new ArrayList<>();
Map<Item, List<Set<String>>> map = getItemTokens(language.getLocale());
for (Item item : map.keySet()) {
for (Set<String> parts : map.get(item)) {
boolean allMatch = true;
for (String fragment : labelFragments) {
if (!parts.contains(fragment.toLowerCase(language.getLocale()))) {
allMatch = false;
Set<Item> items = new HashSet<>();
Set<Item> exactMatchItems = new HashSet<>();
Map<Item, List<List<List<String>>>> map = getItemTokens(language.getLocale());
for (Entry<Item, List<List<List<String>>>> entry : map.entrySet()) {
Item item = entry.getKey();
for (List<List<String>> itemLabelFragmentsPath : entry.getValue()) {
boolean exactMatch = false;
logger.trace("Checking tokens {} against the item tokens {}", labelFragments, itemLabelFragmentsPath);
List<String> lowercaseLabelFragments = Arrays.stream(labelFragments)
.map(lf -> lf.toLowerCase(language.getLocale())).toList();
List<String> unmatchedFragments = new ArrayList<>(lowercaseLabelFragments);
for (List<String> itemLabelFragments : itemLabelFragmentsPath) {
if (itemLabelFragments.equals(lowercaseLabelFragments)) {
exactMatch = true;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is that important? And I believe this is not true in general:

  • labelFragments [a, b, c, d]
  • segments: [[b], [c, d], [a]]
  • first segment, there is [a, c, d] left
  • second segment, there is [a] left
  • third segment matches everything that is left, so exactMatch = true

Is that intended?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for the review, and sorry for taking so long to answer, it caught me in the middle of another task that took more than expected.

Maybe I don't understanding the question but I think the code ok.
Each segment is compared against lowercaseLabelFragments which is never modified, checking if some segment in the path is an exact match agains the label extracted from the voice command. Each iteration I'm trying to remove from unmatchedFragments but that is a different list initialized with same strings.

I have renamed the segment and parts variables trying to make that part easier to read.

unmatchedFragments.clear();
break;
}
unmatchedFragments.removeAll(itemLabelFragments);
}
if (allMatch) {
boolean allMatched = unmatchedFragments.isEmpty();
logger.trace("All labels matched: {}", allMatched);
logger.trace("Exact match: {}", exactMatch);
if (allMatched) {
if (commandType == null || item.getAcceptedCommandTypes().contains(commandType)) {
String name = item.getName();
boolean insert = true;
for (Item si : items) {
if (name.startsWith(si.getName())) {
insert = false;
}
}
if (insert) {
for (int i = 0; i < items.size(); i++) {
Item si = items.get(i);
if (si.getName().startsWith(name)) {
items.remove(i);
i--;
}
}
items.add(item);
insertDiscardingMembers(items, item);
if (exactMatch) {
insertDiscardingMembers(exactMatchItems, item);
}
}
}
}
}
return items;
if (logger.isDebugEnabled()) {
String typeDetails = commandType != null ? " that accept " + commandType.getSimpleName() : "";
logger.debug("Partial matched items against {}{}: {}", labelFragments, typeDetails,
items.stream().map(Item::getName).collect(Collectors.joining(", ")));
logger.debug("Exact matched items against {}{}: {}", labelFragments, typeDetails,
exactMatchItems.stream().map(Item::getName).collect(Collectors.joining(", ")));
}
return new ArrayList<>(items.size() != 1 && exactMatchItems.size() == 1 ? exactMatchItems : items);
}

private static void insertDiscardingMembers(Set<Item> items, Item item) {
String name = item.getName();
boolean insert = items.stream().noneMatch(i -> name.startsWith(i.getName()));
if (insert) {
items.removeIf((matchedItem) -> matchedItem.getName().startsWith(name));
items.add(item);
}
}

/**
* Tokenizes text. Filters out all unsupported punctuation. Tokens will be lower case.
* Tokenizes text. Filters out all unsupported punctuation. Tokens will be lowercase.
*
* @param locale the locale that should be used for lower casing
* @param text the text that should be tokenized
Expand Down
Loading