From 46b230b0100f65584c168c1e3be746a42290e9ec Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Mon, 28 Feb 2022 11:03:59 +0000 Subject: [PATCH] Re-run linter over files after update --- bin/import_atthack.py | 8 ++++---- bin/import_ccpmf.py | 8 ++++---- bin/import_cv2.py | 2 +- bin/import_cv_personal.py | 2 +- bin/import_fisher.py | 8 ++++---- bin/import_lingua_libre.py | 2 +- bin/import_swb.py | 8 ++++---- bin/import_swc.py | 8 ++++---- data/lm/generate_lm.py | 2 +- doc/conf.py | 8 ++++---- training/coqui_stt_training/util/auto_input.py | 8 ++++---- training/coqui_stt_training/util/config.py | 2 +- training/coqui_stt_training/util/evaluate_tools.py | 2 +- training/coqui_stt_training/util/lm_optimize.py | 2 +- 14 files changed, 35 insertions(+), 35 deletions(-) diff --git a/bin/import_atthack.py b/bin/import_atthack.py index 5fa720ed9..e2b24b2da 100755 --- a/bin/import_atthack.py +++ b/bin/import_atthack.py @@ -280,13 +280,13 @@ def get_sample_size(population_size): margin_of_error = 0.01 fraction_picking = 0.50 z_score = 2.58 # Corresponds to confidence level 99% - numerator = (z_score ** 2 * fraction_picking * (1 - fraction_picking)) / ( - margin_of_error ** 2 + numerator = (z_score**2 * fraction_picking * (1 - fraction_picking)) / ( + margin_of_error**2 ) sample_size = 0 for train_size in range(population_size, 0, -1): - denominator = 1 + (z_score ** 2 * fraction_picking * (1 - fraction_picking)) / ( - margin_of_error ** 2 * train_size + denominator = 1 + (z_score**2 * fraction_picking * (1 - fraction_picking)) / ( + margin_of_error**2 * train_size ) sample_size = int(numerator / denominator) if 2 * sample_size + train_size <= population_size: diff --git a/bin/import_ccpmf.py b/bin/import_ccpmf.py index 2134eb5f7..82a11c901 100755 --- a/bin/import_ccpmf.py +++ b/bin/import_ccpmf.py @@ -754,13 +754,13 @@ def get_sample_size(population_size): margin_of_error = 0.01 fraction_picking = 0.50 z_score = 2.58 # Corresponds to confidence level 99% - numerator = (z_score ** 2 * fraction_picking * (1 - fraction_picking)) / ( - margin_of_error ** 2 + numerator = (z_score**2 * fraction_picking * (1 - fraction_picking)) / ( + margin_of_error**2 ) sample_size = 0 for train_size in range(population_size, 0, -1): - denominator = 1 + (z_score ** 2 * fraction_picking * (1 - fraction_picking)) / ( - margin_of_error ** 2 * train_size + denominator = 1 + (z_score**2 * fraction_picking * (1 - fraction_picking)) / ( + margin_of_error**2 * train_size ) sample_size = int(numerator / denominator) if 2 * sample_size + train_size <= population_size: diff --git a/bin/import_cv2.py b/bin/import_cv2.py index 48e5f908a..95eedad56 100755 --- a/bin/import_cv2.py +++ b/bin/import_cv2.py @@ -59,7 +59,7 @@ def init_worker(params): def one_sample(sample): - """ Take an audio file, and optionally convert it to 16kHz WAV """ + """Take an audio file, and optionally convert it to 16kHz WAV""" mp3_filename = sample[0] if not os.path.splitext(mp3_filename.lower())[1] == ".mp3": mp3_filename += ".mp3" diff --git a/bin/import_cv_personal.py b/bin/import_cv_personal.py index ee80d85d4..1691ecc63 100755 --- a/bin/import_cv_personal.py +++ b/bin/import_cv_personal.py @@ -59,7 +59,7 @@ def init_worker(params): def one_sample(sample): - """ Take an audio file, and optionally convert it to 16kHz WAV """ + """Take an audio file, and optionally convert it to 16kHz WAV""" mp3_filename = sample[0] # Storing wav files next to the mp3 ones - just with a different suffix wav_filename = os.path.splitext(mp3_filename)[0] + ".wav" diff --git a/bin/import_fisher.py b/bin/import_fisher.py index acbee0762..3d794a278 100755 --- a/bin/import_fisher.py +++ b/bin/import_fisher.py @@ -264,13 +264,13 @@ def get_sample_size(population_size): margin_of_error = 0.01 fraction_picking = 0.50 z_score = 2.58 # Corresponds to confidence level 99% - numerator = (z_score ** 2 * fraction_picking * (1 - fraction_picking)) / ( - margin_of_error ** 2 + numerator = (z_score**2 * fraction_picking * (1 - fraction_picking)) / ( + margin_of_error**2 ) sample_size = 0 for train_size in range(population_size, 0, -1): - denominator = 1 + (z_score ** 2 * fraction_picking * (1 - fraction_picking)) / ( - margin_of_error ** 2 * train_size + denominator = 1 + (z_score**2 * fraction_picking * (1 - fraction_picking)) / ( + margin_of_error**2 * train_size ) sample_size = int(numerator / denominator) if 2 * sample_size + train_size <= population_size: diff --git a/bin/import_lingua_libre.py b/bin/import_lingua_libre.py index dc1fa5bda..b37e09f7c 100755 --- a/bin/import_lingua_libre.py +++ b/bin/import_lingua_libre.py @@ -57,7 +57,7 @@ def _maybe_extract(target_dir, extracted_data, archive_path): def one_sample(sample): - """ Take a audio file, and optionally convert it to 16kHz WAV """ + """Take a audio file, and optionally convert it to 16kHz WAV""" ogg_filename = sample[0] # Storing wav files next to the ogg ones - just with a different suffix wav_filename = os.path.splitext(ogg_filename)[0] + ".wav" diff --git a/bin/import_swb.py b/bin/import_swb.py index 068a2f739..e4342e95c 100755 --- a/bin/import_swb.py +++ b/bin/import_swb.py @@ -318,13 +318,13 @@ def get_sample_size(population_size): margin_of_error = 0.01 fraction_picking = 0.50 z_score = 2.58 # Corresponds to confidence level 99% - numerator = (z_score ** 2 * fraction_picking * (1 - fraction_picking)) / ( - margin_of_error ** 2 + numerator = (z_score**2 * fraction_picking * (1 - fraction_picking)) / ( + margin_of_error**2 ) sample_size = 0 for train_size in range(population_size, 0, -1): - denominator = 1 + (z_score ** 2 * fraction_picking * (1 - fraction_picking)) / ( - margin_of_error ** 2 * train_size + denominator = 1 + (z_score**2 * fraction_picking * (1 - fraction_picking)) / ( + margin_of_error**2 * train_size ) sample_size = int(numerator / denominator) if 2 * sample_size + train_size <= population_size: diff --git a/bin/import_swc.py b/bin/import_swc.py index 4b984131c..75b1410dc 100755 --- a/bin/import_swc.py +++ b/bin/import_swc.py @@ -103,13 +103,13 @@ def get_sample_size(population_size): margin_of_error = 0.01 fraction_picking = 0.50 z_score = 2.58 # Corresponds to confidence level 99% - numerator = (z_score ** 2 * fraction_picking * (1 - fraction_picking)) / ( - margin_of_error ** 2 + numerator = (z_score**2 * fraction_picking * (1 - fraction_picking)) / ( + margin_of_error**2 ) sample_size = 0 for train_size in range(population_size, 0, -1): - denominator = 1 + (z_score ** 2 * fraction_picking * (1 - fraction_picking)) / ( - margin_of_error ** 2 * train_size + denominator = 1 + (z_score**2 * fraction_picking * (1 - fraction_picking)) / ( + margin_of_error**2 * train_size ) sample_size = int(numerator / denominator) if 2 * sample_size + train_size <= population_size: diff --git a/data/lm/generate_lm.py b/data/lm/generate_lm.py index f26f257a6..24bfe595f 100644 --- a/data/lm/generate_lm.py +++ b/data/lm/generate_lm.py @@ -9,7 +9,7 @@ def convert_and_filter_topk(args): - """ Convert to lowercase, count word occurrences and save top-k words to a file """ + """Convert to lowercase, count word occurrences and save top-k words to a file""" counter = Counter() data_lower = os.path.join(args.output_dir, "lower.txt.gz") diff --git a/doc/conf.py b/doc/conf.py index ce7e99bf4..2497d4191 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -51,7 +51,7 @@ # -- Project information ----------------------------------------------------- -project = u"Coqui STT" +project = "Coqui STT" copyright = "2021 Coqui GmbH, 2020 DeepSpeech authors, 2019-2020 Mozilla Corporation" author = "Coqui GmbH" @@ -179,7 +179,7 @@ # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, "STT.tex", u"Coqui STT Documentation", u"Coqui GmbH", "manual"), + (master_doc, "STT.tex", "Coqui STT Documentation", "Coqui GmbH", "manual"), ] @@ -187,7 +187,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [(master_doc, "stt", u"Coqui STT Documentation", [author], 1)] +man_pages = [(master_doc, "stt", "Coqui STT Documentation", [author], 1)] # -- Options for Texinfo output ------------------------------------------- @@ -199,7 +199,7 @@ ( master_doc, "STT", - u"Coqui STT Documentation", + "Coqui STT Documentation", author, "STT", "One line description of project.", diff --git a/training/coqui_stt_training/util/auto_input.py b/training/coqui_stt_training/util/auto_input.py index 40e51e845..5e0f60c77 100644 --- a/training/coqui_stt_training/util/auto_input.py +++ b/training/coqui_stt_training/util/auto_input.py @@ -32,13 +32,13 @@ def _get_sample_size(population_size): margin_of_error = 0.01 fraction_picking = 0.50 z_score = 2.58 # Corresponds to confidence level 99% - numerator = (z_score ** 2 * fraction_picking * (1 - fraction_picking)) / ( - margin_of_error ** 2 + numerator = (z_score**2 * fraction_picking * (1 - fraction_picking)) / ( + margin_of_error**2 ) sample_size = 0 for train_size in range(population_size, 0, -1): - denominator = 1 + (z_score ** 2 * fraction_picking * (1 - fraction_picking)) / ( - margin_of_error ** 2 * train_size + denominator = 1 + (z_score**2 * fraction_picking * (1 - fraction_picking)) / ( + margin_of_error**2 * train_size ) sample_size = int(numerator / denominator) if 2 * sample_size + train_size <= population_size: diff --git a/training/coqui_stt_training/util/config.py b/training/coqui_stt_training/util/config.py index 106c30c10..fe7839957 100644 --- a/training/coqui_stt_training/util/config.py +++ b/training/coqui_stt_training/util/config.py @@ -872,7 +872,7 @@ def initialize_globals_from_args(**override_args): def initialize_globals_from_instance(config): - """ Initialize Config singleton from an existing instance """ + """Initialize Config singleton from an existing instance""" _ConfigSingleton._config = config # pylint: disable=protected-access diff --git a/training/coqui_stt_training/util/evaluate_tools.py b/training/coqui_stt_training/util/evaluate_tools.py index 26f1e2835..9ab5669da 100644 --- a/training/coqui_stt_training/util/evaluate_tools.py +++ b/training/coqui_stt_training/util/evaluate_tools.py @@ -95,7 +95,7 @@ def calculate_and_print_report( def print_report(samples, losses, wer, cer, dataset_name, report_count=5): - """ Print a report summary and samples of best, median and worst results """ + """Print a report summary and samples of best, median and worst results""" # Print summary mean_loss = np.mean(losses) diff --git a/training/coqui_stt_training/util/lm_optimize.py b/training/coqui_stt_training/util/lm_optimize.py index c17af5d50..70a7f3d66 100644 --- a/training/coqui_stt_training/util/lm_optimize.py +++ b/training/coqui_stt_training/util/lm_optimize.py @@ -57,7 +57,7 @@ def compute_lm_optimization() -> dict: study = optuna.create_study() study.set_user_attr("is_character_based", is_character_based) study.optimize(objective, n_jobs=1, n_trials=Config.n_trials) - + return { "lm_alpha": study.best_params.get("lm_alpha"), "lm_beta": study.best_params.get("lm_beta"),