Added language fallback and folder configuration

petewarden · Jan 3, 2022 · a9b83fb · a9b83fb
1 parent b547dbf
commit a9b83fb
Show file tree

Hide file tree

Showing 2 changed files with 68 additions and 8 deletions.
diff --git a/args.h b/args.h
@@ -19,7 +19,8 @@ const char* source = NULL;
 
 std::vector<std::string> filename_args;
 
-const std::string models_folder = "/etc/spchcat/models/";
+const char* default_languages_dir = "/etc/spchcat/models/";
+std::string languages_dir = default_languages_dir;
 
 const char* model = NULL;
 
@@ -55,7 +56,8 @@ char* hot_words = NULL;
 
 const std::string ListAvailableLanguages() {
   std::string result;
-  DIR* dp = opendir(models_folder.c_str());
+  fprintf(stderr, "Looking for '%s'\n", languages_dir.c_str());
+  DIR* dp = opendir(languages_dir.c_str());
   if (dp != NULL)
   {
     struct dirent* ep;
@@ -82,12 +84,13 @@ void PrintHelp(const char* bin)
   std::cout <<
     "Usage: " << bin << " [--source mic|system|file] [--language " + ListAvailableLanguages() + "] <WAV files>\n"
     "\n"
-    "Convert speech in audio to text transcripts.\n"
+    "Speech recognition tool to convert audio to text transcripts.\n"
     "\n"
     "\t--language\tWhich language to look for (default '" << language << "')\n"
     "\t--source NAME\tName of the audio source (default 'mic', can also be 'system', 'file')\n"
     "\t--help\t\tShow help\n"
     "\nAdvanced settings:\n\n"
+    "\t--languages_dir\t\t\tPath to folder containing models (default '" << default_languages_dir << "')\n"
     "\t--model MODEL\t\t\tPath to the model (protocol buffer binary file)\n"
     "\t--scorer SCORER\t\t\tPath to the external scorer file\n"
     "\t--source_buffer_size SIZE\tNumber of samples to fetch from source\n"
@@ -123,6 +126,10 @@ bool HasEnding(std::string const& fullString, std::string const& ending) {
   }
 }
 
+bool HasPrefix(std::string const& fullString, std::string const& prefix) {
+  return (strncmp(fullString.c_str(), prefix.c_str(), prefix.size()) == 0);
+}
+
 std::string FindFileWithExtension(const std::string& folder, const std::string& extension,
   const std::vector<std::string>& excludes = {}) {
   std::string result;
@@ -151,6 +158,25 @@ std::string FindFileWithExtension(const std::string& folder, const std::string&
   return result;
 }
 
+std::string FindFileWithPrefix(const std::string& folder, const std::string& prefix) {
+  std::string result;
+  DIR* dp = opendir(folder.c_str());
+  if (dp != NULL)
+  {
+    struct dirent* ep;
+    while (ep = readdir(dp))
+    {
+      std::string filename = ep->d_name;
+      if (HasPrefix(filename, prefix)) {
+        result = folder + filename;
+        break;
+      }
+    }
+    closedir(dp);
+  }
+  return result;
+}
+
 void SplitString(std::string const& str, const char delim,
   std::vector<std::string>& out)
 {
@@ -178,11 +204,12 @@ bool ProcessArgs(int argc, char** argv)
     language = parts[0].c_str();
   }
 
-  const char* const short_opts = "s:l:m:o:z:b:c:d:tejs:r:R:w:vh";
+  const char* const short_opts = "s:l:m:y:o:z:b:c:d:tejs:r:R:w:vh";
   const option long_opts[] = {
           {"source", required_argument, nullptr, 's'},
           {"language", required_argument, nullptr, 'l'},
           {"model", required_argument, nullptr, 'm'},
+          {"languages_dir", required_argument, nullptr, 'y'},
           {"scorer", required_argument, nullptr, 'o'},
           {"source_buffer_size", required_argument, nullptr, 'z'},
           {"audio", required_argument, nullptr, 'a'},
@@ -201,6 +228,8 @@ bool ProcessArgs(int argc, char** argv)
           {nullptr, no_argument, nullptr, 0}
   };
 
+  bool should_print_help = false;
+
   while (true)
   {
     const auto opt = getopt_long(argc, argv, short_opts, long_opts, nullptr);
@@ -218,6 +247,10 @@ bool ProcessArgs(int argc, char** argv)
       source = optarg;
       break;
 
+    case 'y':
+      languages_dir = optarg;
+      break;
+
     case 'm':
       model = optarg;
       break;
@@ -280,7 +313,7 @@ bool ProcessArgs(int argc, char** argv)
     case 'h': // -h or --help
     case '?': // Unrecognized option
     default:
-      PrintHelp(argv[0]);
+      should_print_help = true;
       break;
     }
   }
@@ -301,8 +334,30 @@ bool ProcessArgs(int argc, char** argv)
   }
 
   if (!model) {
-    const std::string language_folder = models_folder + language + "/";
+    // Look for the exact match to the language and country combination.
+    const std::string language_folder = languages_dir + language + "/";
     static std::string model_string = FindFileWithExtension(language_folder, ".tflite");
+    if (model_string.length() == 0) {
+      // If the right country wasn't found, try falling back to any folder
+      // with the right language.
+      std::vector<std::string> lang_parts;
+      SplitString(language, '_', lang_parts);
+      const std::string& lang_only = lang_parts[0] + "_";
+      const std::string lang_only_folder = FindFileWithPrefix(languages_dir, lang_only);
+      if (lang_only_folder.length() > 0) {
+        std::vector<std::string> path_parts;
+        SplitString(lang_only_folder, '/', path_parts);
+        static std::string found_language = path_parts[path_parts.size() - 1];
+        const std::string found_language_folder = languages_dir + found_language + "/";
+        model_string = FindFileWithExtension(found_language_folder, ".tflite");
+        if (model_string.length() > 0) {
+          fprintf(stderr, "Warning: Language '%s' not found, falling back to '%s'\n",
+            language, found_language.c_str());
+          language = found_language.c_str();
+        }
+      }
+    }
+
     if (model_string.length() == 0) {
       fprintf(stderr, "Warning: Model not found in %s\n", language_folder.c_str());
     }
@@ -312,7 +367,7 @@ bool ProcessArgs(int argc, char** argv)
   }
 
   if (!scorer) {
-    const std::string language_folder = models_folder + language + "/";
+    const std::string language_folder = languages_dir + language + "/";
     static std::string scorer_string = FindFileWithExtension(language_folder, ".scorer",
       { "command", "digits", "yesno" });
     if (scorer_string.length() == 0) {
@@ -345,6 +400,11 @@ bool ProcessArgs(int argc, char** argv)
     }
   }
 
+  if (should_print_help) {
+    PrintHelp(argv[0]);
+    return false;
+  }
+
   return true;
 }
 

diff --git a/create_deb_package.sh b/create_deb_package.sh
@@ -4,7 +4,7 @@ NAME=spchcat
 VERSION=0.0-1
 ARCH=amd64
 [email protected]
-DESCRIPTION="Convert speech in audio to text transcripts."
+DESCRIPTION="Speech recognition tool to convert audio to text transcripts."
 
 BUILD_DIR=build/
 MODELS_DIR=${BUILD_DIR}models/