Skip to content

Commit

Permalink
Added language fallback and folder configuration
Browse files Browse the repository at this point in the history
  • Loading branch information
petewarden committed Jan 3, 2022
1 parent b547dbf commit a9b83fb
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 8 deletions.
74 changes: 67 additions & 7 deletions args.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ const char* source = NULL;

std::vector<std::string> filename_args;

const std::string models_folder = "/etc/spchcat/models/";
const char* default_languages_dir = "/etc/spchcat/models/";
std::string languages_dir = default_languages_dir;

const char* model = NULL;

Expand Down Expand Up @@ -55,7 +56,8 @@ char* hot_words = NULL;

const std::string ListAvailableLanguages() {
std::string result;
DIR* dp = opendir(models_folder.c_str());
fprintf(stderr, "Looking for '%s'\n", languages_dir.c_str());
DIR* dp = opendir(languages_dir.c_str());
if (dp != NULL)
{
struct dirent* ep;
Expand All @@ -82,12 +84,13 @@ void PrintHelp(const char* bin)
std::cout <<
"Usage: " << bin << " [--source mic|system|file] [--language " + ListAvailableLanguages() + "] <WAV files>\n"
"\n"
"Convert speech in audio to text transcripts.\n"
"Speech recognition tool to convert audio to text transcripts.\n"
"\n"
"\t--language\tWhich language to look for (default '" << language << "')\n"
"\t--source NAME\tName of the audio source (default 'mic', can also be 'system', 'file')\n"
"\t--help\t\tShow help\n"
"\nAdvanced settings:\n\n"
"\t--languages_dir\t\t\tPath to folder containing models (default '" << default_languages_dir << "')\n"
"\t--model MODEL\t\t\tPath to the model (protocol buffer binary file)\n"
"\t--scorer SCORER\t\t\tPath to the external scorer file\n"
"\t--source_buffer_size SIZE\tNumber of samples to fetch from source\n"
Expand Down Expand Up @@ -123,6 +126,10 @@ bool HasEnding(std::string const& fullString, std::string const& ending) {
}
}

bool HasPrefix(std::string const& fullString, std::string const& prefix) {
return (strncmp(fullString.c_str(), prefix.c_str(), prefix.size()) == 0);
}

std::string FindFileWithExtension(const std::string& folder, const std::string& extension,
const std::vector<std::string>& excludes = {}) {
std::string result;
Expand Down Expand Up @@ -151,6 +158,25 @@ std::string FindFileWithExtension(const std::string& folder, const std::string&
return result;
}

std::string FindFileWithPrefix(const std::string& folder, const std::string& prefix) {
std::string result;
DIR* dp = opendir(folder.c_str());
if (dp != NULL)
{
struct dirent* ep;
while (ep = readdir(dp))
{
std::string filename = ep->d_name;
if (HasPrefix(filename, prefix)) {
result = folder + filename;
break;
}
}
closedir(dp);
}
return result;
}

void SplitString(std::string const& str, const char delim,
std::vector<std::string>& out)
{
Expand Down Expand Up @@ -178,11 +204,12 @@ bool ProcessArgs(int argc, char** argv)
language = parts[0].c_str();
}

const char* const short_opts = "s:l:m:o:z:b:c:d:tejs:r:R:w:vh";
const char* const short_opts = "s:l:m:y:o:z:b:c:d:tejs:r:R:w:vh";
const option long_opts[] = {
{"source", required_argument, nullptr, 's'},
{"language", required_argument, nullptr, 'l'},
{"model", required_argument, nullptr, 'm'},
{"languages_dir", required_argument, nullptr, 'y'},
{"scorer", required_argument, nullptr, 'o'},
{"source_buffer_size", required_argument, nullptr, 'z'},
{"audio", required_argument, nullptr, 'a'},
Expand All @@ -201,6 +228,8 @@ bool ProcessArgs(int argc, char** argv)
{nullptr, no_argument, nullptr, 0}
};

bool should_print_help = false;

while (true)
{
const auto opt = getopt_long(argc, argv, short_opts, long_opts, nullptr);
Expand All @@ -218,6 +247,10 @@ bool ProcessArgs(int argc, char** argv)
source = optarg;
break;

case 'y':
languages_dir = optarg;
break;

case 'm':
model = optarg;
break;
Expand Down Expand Up @@ -280,7 +313,7 @@ bool ProcessArgs(int argc, char** argv)
case 'h': // -h or --help
case '?': // Unrecognized option
default:
PrintHelp(argv[0]);
should_print_help = true;
break;
}
}
Expand All @@ -301,8 +334,30 @@ bool ProcessArgs(int argc, char** argv)
}

if (!model) {
const std::string language_folder = models_folder + language + "/";
// Look for the exact match to the language and country combination.
const std::string language_folder = languages_dir + language + "/";
static std::string model_string = FindFileWithExtension(language_folder, ".tflite");
if (model_string.length() == 0) {
// If the right country wasn't found, try falling back to any folder
// with the right language.
std::vector<std::string> lang_parts;
SplitString(language, '_', lang_parts);
const std::string& lang_only = lang_parts[0] + "_";
const std::string lang_only_folder = FindFileWithPrefix(languages_dir, lang_only);
if (lang_only_folder.length() > 0) {
std::vector<std::string> path_parts;
SplitString(lang_only_folder, '/', path_parts);
static std::string found_language = path_parts[path_parts.size() - 1];
const std::string found_language_folder = languages_dir + found_language + "/";
model_string = FindFileWithExtension(found_language_folder, ".tflite");
if (model_string.length() > 0) {
fprintf(stderr, "Warning: Language '%s' not found, falling back to '%s'\n",
language, found_language.c_str());
language = found_language.c_str();
}
}
}

if (model_string.length() == 0) {
fprintf(stderr, "Warning: Model not found in %s\n", language_folder.c_str());
}
Expand All @@ -312,7 +367,7 @@ bool ProcessArgs(int argc, char** argv)
}

if (!scorer) {
const std::string language_folder = models_folder + language + "/";
const std::string language_folder = languages_dir + language + "/";
static std::string scorer_string = FindFileWithExtension(language_folder, ".scorer",
{ "command", "digits", "yesno" });
if (scorer_string.length() == 0) {
Expand Down Expand Up @@ -345,6 +400,11 @@ bool ProcessArgs(int argc, char** argv)
}
}

if (should_print_help) {
PrintHelp(argv[0]);
return false;
}

return true;
}

Expand Down
2 changes: 1 addition & 1 deletion create_deb_package.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ NAME=spchcat
VERSION=0.0-1
ARCH=amd64
[email protected]
DESCRIPTION="Convert speech in audio to text transcripts."
DESCRIPTION="Speech recognition tool to convert audio to text transcripts."

BUILD_DIR=build/
MODELS_DIR=${BUILD_DIR}models/
Expand Down

0 comments on commit a9b83fb

Please sign in to comment.