Skip to content

Commit

Permalink
Add CCPMF dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexandre committed Nov 30, 2020
1 parent 7937f7c commit eb400d3
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 1 deletion.
4 changes: 4 additions & 0 deletions DeepSpeech/Dockerfile.train
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ RUN apt-get -qq update && apt-get -qq install -y --no-install-recommends \
curl \
wget \
git \
ffmpeg \
python3 \
python3-pip \
ca-certificates \
Expand Down Expand Up @@ -175,6 +176,9 @@ RUN pip install parso==0.7.0

RUN python setup.py install

# For CC PMF importer
RUN pip install num2words

WORKDIR $HOMEDIR

ENV PATH="$HOMEDIR/kenlm/build/bin/:$PATH"
Expand Down
2 changes: 2 additions & 0 deletions DeepSpeech/checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ done;

mkdir /mnt/extracted/data/ || true

python -c "import tensorflow as tf; tf.test.is_gpu_available()"

# Checking with basic LDC93S1 before running into heavy-load
pushd $HOME/ds/
./bin/run-tc-ldc93s1_new.sh 2 16000
Expand Down
14 changes: 14 additions & 0 deletions DeepSpeech/fr/import_ccpmf.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/bash

set -xe

pushd $HOME/ds/
if [ ! -f "/mnt/extracted/data/ccpmf/ccpmf_train.csv" ]; then
# Hot patching like that.
sed -ri 's/MAX_SECS = .*/MAX_SECS = 4.5/g' bin/import_ccpmf.py

python bin/import_ccpmf.py \
${IMPORTERS_VALIDATE_LOCALE} \
/mnt/extracted/data/ccpmf/
fi;
popd
2 changes: 2 additions & 0 deletions DeepSpeech/fr/importers.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,5 @@ import_trainingspeech.sh
import_slr57.sh

../import_m-ailabs.sh

import_ccpmf.sh
2 changes: 1 addition & 1 deletion DeepSpeech/fr/metadata.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ set -xe

export METADATA_AUTHOR="DeepSpeech-FR-Team"
export METADATA_MODEL_NAME="deepspeech-fr"
export METADATA_MODEL_VERSION="0.5"
export METADATA_MODEL_VERSION="0.6"
export METADATA_CONTACT_INFO="https://discourse.mozilla.org/c/voice/fr"
export METADATA_LICENSE="MIT-0"
export METADATA_LANGUAGE="fr-FR"
Expand Down

0 comments on commit eb400d3

Please sign in to comment.