Skip to content

Commit

Permalink
deepspeech 0.7.3
Browse files Browse the repository at this point in the history
  • Loading branch information
DewiBrynJones committed Jun 18, 2020
2 parents 5a1d79d + bd8e6b8 commit dff4947
Show file tree
Hide file tree
Showing 45 changed files with 1,165 additions and 1,556 deletions.
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
*.pyc
DeepSpeech
CorporaCreator
*.csv
.vscode/*
data/*
export/*
homedir/*
Expand All @@ -9,4 +11,5 @@ checkpoints/*
tmp/*
local/bin/commonvoice_url.py
keep
local/bin/__pycache__
local/__pycache__
local/utils/__pycache__
14 changes: 8 additions & 6 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,21 @@ ARG BRANCH
FROM mozilla/deepspeech:$BRANCH

RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash \
&& apt-get update && apt-get install -y git-lfs lame sox vim zip file \
unzip python3 python3-pip python3-dev \
libffi-dev libssl-dev libxml2-dev \
&& apt-get update && apt-get install -y git-lfs lame sox vim zip file locales-all \
unzip valgrind libffi-dev libssl-dev libxml2-dev \
libxslt1-dev libjpeg8-dev zlib1g-dev dos2unix\
&& apt-get clean \
&& git lfs install \
&& pip3 install sox wget sklearn pandas python_speech_features virtualenv requests jiwer tqdm \
&& pip install sox wget sklearn pandas python_speech_features virtualenv requests tqdm columnize \
&& rm -rf /var/lib/apt/lists/*

ENV LC_ALL cy_GB.UTF-8
ENV LANG cy_GB.UTF-8
ENV LANGUAGE cy_GB.UTF-8

WORKDIR /DeepSpeech

#RUN python3 util/taskcluster.py --source tensorflow --artifact convert_graphdef_memmapped_format --target native_client \
# && chmod +x native_client/convert_graphdef_memmapped_format
RUN python util/taskcluster.py --source tensorflow --artifact convert_graphdef_memmapped_format --branch r1.15 --target .

ENV PATH /DeepSpeech/native_client:/DeepSpeech/native_client/kenlm/build/bin:$PATH

27 changes: 18 additions & 9 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,40 +1,49 @@
default: build
DEEPSPEECH_RELEASE := 0.7.3
DEEPSPEECH_BRANCH := v$(DEEPSPEECH_RELEASE)
#DEEPSPEECH_RELEASE := 0.5.1
#DEEPSPEECH_BRANCH := transfer-learning2


run:
docker run --gpus all --name techiaith-deepspeech-${DEEPSPEECH_BRANCH}-${USER} -it \
-v ${PWD}/data/:/data \
-v ${PWD}/checkpoints/:/checkpoints \
-v ${PWD}/checkpoints/:/checkpoints \
-v ${PWD}/models/:/models \
-v ${PWD}/export/:/export \
-v ${PWD}/homedir/:/root \
-v ${PWD}/local/bin:/DeepSpeech/bin/bangor_welsh \
-v ${PWD}/local/:/DeepSpeech/bin/bangor_welsh \
techiaith/deepspeech:${DEEPSPEECH_BRANCH} bash



build:
if [ ! -d "DeepSpeech" ]; then \
git clone --branch $(DEEPSPEECH_BRANCH) https://github.com/mozilla/DeepSpeech.git; \
cd DeepSpeech && docker build --rm -t mozilla/deepspeech:${DEEPSPEECH_BRANCH} .; \
fi
fi
if [ ! -d "checkpoints/mozilla" ]; then \
mkdir -p checkpoints/mozilla; \
cd checkpoints/mozilla && \
wget https://github.com/mozilla/DeepSpeech/releases/download/v$(DEEPSPEECH_RELEASE)/deepspeech-$(DEEPSPEECH_RELEASE)-checkpoint.tar.gz && \
tar xvfz deepspeech-$(DEEPSPEECH_RELEASE)-checkpoint.tar.gz;\
tar xvfz deepspeech-$(DEEPSPEECH_RELEASE)-checkpoint.tar.gz && \
mv deepspeech-$(DEEPSPEECH_RELEASE)-checkpoint deepspeech-en-checkpoint;\
fi
if [ ! -d "models/mozilla" ]; then \
mkdir -p models/mozilla; \
cd models/mozilla && \
wget https://github.com/mozilla/DeepSpeech/releases/download/v$(DEEPSPEECH_RELEASE)/deepspeech-$(DEEPSPEECH_RELEASE)-models.pbmm && \
wget https://github.com/mozilla/DeepSpeech/releases/download/v$(DEEPSPEECH_RELEASE)/deepspeech-$(DEEPSPEECH_RELEASE)-models.scorer;\
fi
docker build --build-arg BRANCH=${DEEPSPEECH_BRANCH} --rm -t techiaith/deepspeech:${DEEPSPEECH_BRANCH} .


clean:
-docker rmi techiaith/deepspeech:${DEEPSPEECH_BRANCH}
-docker rmi mozilla/deepspeech:${DEEPSPEECH_BRANCH}
-docker rmi nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04
sudo rm -rf DeepSpeech
sudo rm -rf homedir
sudo rm -rf checkpoints



stop:
-docker stop techiaith-deepspeech-${DEEPSPEECH_BRANCH}-${USER}
-docker rm techiaith-deepspeech-${DEEPSPEECH_BRANCH}-${USER}

35 changes: 5 additions & 30 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ Gweler/*See also* : https://github.com/NVIDIA/nvidia-docker#quickstart

<br/>

## Cychwyn arni / *Quickstart*
## Gosod / *Installation*

```
$ git clone https://github.com/techiaith/docker-deepspeech-cy
Expand All @@ -27,38 +27,13 @@ $ make run

### Data Cymraeg Mozilla CommonVoice / *Mozilla Common Welsh Data*

Llwythwch y data diweddaraf i lawr o https://voice.mozilla.org/cy/datasets ac yna echdynnwch popeth i ffolder newydd o dan `data`. Er enghraifft.....
Llwythwch y data diweddaraf i lawr o https://voice.mozilla.org/cy/datasets i'r ffolder `docker-deepspeech-cy/data`.

*Download the latest data from https://voice.mozilla.org/cy/datasets and extract all into a new folder underneath `data`. For example.....*
*Download the latest data from https://voice.mozilla.org/cy/datasets to the `docker-deepspeech-cy/data`folder.*


```bash
techiaith@gweinydd:/home/techiaith/docker/docker-deepspeech-cy/data/commonvoice-cy-v4-20191210⟫ ls -l
total 2124544
drwxr-xr-x 2 techiaith techiaith 6459392 Feb 3 18:35 clips
-rw-r--r-- 1 techiaith techiaith 148342 Dec 10 13:42 dev.tsv
-rw-r--r-- 1 techiaith techiaith 580477 Dec 10 13:42 invalidated.tsv
-rw-r--r-- 1 techiaith techiaith 2568371 Dec 10 13:42 other.tsv
-rw-r--r-- 1 techiaith techiaith 147797 Dec 10 13:42 test.tsv
-rw-r--r-- 1 techiaith techiaith 164667 Dec 10 13:42 train.tsv
-rw-r--r-- 1 techiaith techiaith 10434562 Dec 10 13:42 validated.tsv
```

## Hyfforddi / *Training*

Y prif sgriptiau a ddefnyddir ar gyfer hyfforddi yw:

*The sgripts primarity for training are:*

```
root@3deb765f2438:/DeepSpeech# ./bin/bangor_welsh/run-tl-cv-macsen.sh
root@3deb765f2438:/DeepSpeech# ./bin/bangor_welsh/run-tl-cv-arddweud.sh
```

Gweler y nodyn rhyddhau am wybodaeth am unrhyw ddata pellach y gallai fod eu hangen arnoch

*Please see the release note for information on any further data you might require*



Gweler [README.md](local/README.md)

*See [README.md](local/README_EN.md)*
71 changes: 71 additions & 0 deletions local/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# Sgriptiau Hyfforddi DeepSpeech Mozilla

*Click [here](README_EN.md) to read this page in English*

Mae dogfennaeth gan Mozilla ar DeepSpeech ar gael fan hyn: https://deepspeech.readthedocs.io .

Mae'r sgriptiau canlynol yn enghreifftio ac yn hwyluso defnyddio'r camau cyffredinol a ddisgrifir yn nogfennaeth DeepSpeech Mozilla er mwyn creu modelau adnabod lleferydd Cymraeg ar gyfer rhaglenni cynorthwyydd digidol (e.e. Macsen) a trawsgrifiwr.


## Rhagofynion

Llwythwch i lawr data lleferydd Cymraeg o wefan CommonVoice: https://voice.mozilla.org/cy/datasets sy'n cael ei ddarparu fel un ffeil mawr wedi'i gwasgu (e.e. `cy.tar.gz`) . Cadwch y ffeil o fewn y ffolder `data`.


## Paratoi Data

### `import_audio_archive.py`

```shell
root@c67722092f2e:/DeepSpeech# bin/bangor_welsh/import_audio_archive.py --archive /data/cy-v4.tar.gz --target_dir /data/commonvoice-cy-v4-20191210/
```

### `analyze_audio.py`

```shell
root@c67722092f2e:/DeepSpeech# /DeepSpeech/bin/bangor_welsh/analyze_audio.py --csv_dir /data/commonvoice-cy-v4-20191210/clips/
/data/commonvoice-cy-v4-20191210/clips/dev.csv 0.91 hours (3269.93 seconds)
/data/commonvoice-cy-v4-20191210/clips/test.csv 0.98 hours (3514.49 seconds)
/data/commonvoice-cy-v4-20191210/clips/train.csv 1.09 hours (3941.04 seconds)
/data/commonvoice-cy-v4-20191210/clips/train-all.csv 7.48 hours (26928.55 seconds)
/data/commonvoice-cy-v4-20191210/clips/other.csv 14.75 hours (53092.44 seconds)
/data/commonvoice-cy-v4-20191210/clips/validated.csv 58.16 hours (209380.97 seconds)
```

## Model Acwstig


### `run_tl_cv_cy.sh`

```shell
root@c67722092f2e:/DeepSpeech# /DeepSpeech/bin/bangor_welsh/run_tl_cv_cy.sh -c /data/commonvoice-cy-v4-20191210/clips
```


## Modelau Iaith / Parth Penodol

### `import_bangor_resources.py`

Mae angen rhagor o adnoddau gan Brifysgol Bangor er mwyn hyfforddi DeepSpeech ar gyfer adnabod lleferydd Cymraeg mewn gwahanol gyd-destunau defnyddiol. Mae'r sgript isod yn llwytho i lawr rhagor o recordiadau ac/neu chorpora testun sydd yn galluogi adnabod lleferydd Cymraeg o fewn cynorthwyydd digidol ('macsen') neu drawsgrifiwr ('transcribe').

```shell
root@6a88b0d59848:/DeepSpeech# bin/bangor_welsh/import_bangor_resources.py -t /data/macsen -d macsen
```

### `clean_lm_corpus.py`

```shell
root@6a88b0d59848:/DeepSpeech# bin/bangor_welsh/clean_lm_corpus.sh -s /data/macsen/corpus.txt -o /data/macsen/corpus.clean.txt
```

### `build_lm_scorer.sh`

```shell
root@6a88b0d59848:/DeepSpeech# bin/bangor_welsh/build_lm_scorer.sh -s /data/macsen/corpus.clean.txt -o /data/macsen/ -t /data/macsen/deepspeech.csv
```

### `evaluate_lm_scorer.sh`

```shell
root@6a88b0d59848:/DeepSpeech# bin/bangor_welsh/evaluate_lm_scorer.sh -l /data/mascen -t /data/macsen/deepspeech.csv
```
70 changes: 70 additions & 0 deletions local/README_EN.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# Scripts for Training Mozilla DeepSpeech

*Cliciwch [yma](README.md) i ddarllen y dudalen hon yn Gymraeg*

Documentation by Mozilla on DeepSpeech can be found here: https://deepspeech.readthedocs.io

The following scripts demonstrate how the general steps described in Mozilla's documentation can be used to create Welsh language speech recognition models for both a voice assistant (e.g. Macsen) and a transcribing applications.


## Prerequisites

Download the Welsh speech data from the Mozilla CommonVoice website: https://voice.mozilla.org/cy/datasets which is provided as a single large compressed file (`.tar.gz`). Save the file into the `data` ffolder.


## Prepare Data

### `import_audio_archive.py`

```shell
root@c67722092f2e:/DeepSpeech# bin/bangor_welsh/import_audio_archive.py --archive /data/cy-v4.tar.gz --target_dir /data/commonvoice-cy-v4-20191210/
```

### `analyze_audio.py`

```shell
root@c67722092f2e:/DeepSpeech# /DeepSpeech/bin/bangor_welsh/analyze_audio.py --csv_dir /data/commonvoice-cy-v4-20191210/clips/
/data/commonvoice-cy-v4-20191210/clips/dev.csv 0.91 hours (3269.93 seconds)
/data/commonvoice-cy-v4-20191210/clips/test.csv 0.98 hours (3514.49 seconds)
/data/commonvoice-cy-v4-20191210/clips/train.csv 1.09 hours (3941.04 seconds)
/data/commonvoice-cy-v4-20191210/clips/train-all.csv 7.48 hours (26928.55 seconds)
/data/commonvoice-cy-v4-20191210/clips/other.csv 14.75 hours (53092.44 seconds)
/data/commonvoice-cy-v4-20191210/clips/validated.csv 58.16 hours (209380.97 seconds)
```


## Acoustic Model

### `run_tl_cv_cy.sh`

```shell
root@c67722092f2e:/DeepSpeech# /DeepSpeech/bin/bangor_welsh/run_tl_cv_cy.sh -c /data/commonvoice-cy-v4-20191210/clips
```

## Language Models / Domain Specific

### `import_bangor_resources.py`

You will need further resources from Bangor University in order to train DeepSpeech for various Welsh language applications. The below script will download further recordings and/or text corpora that facilitate Welsh speech recognition for a simple voice assistant ('macsen') or a transcriber ('transcribe').

```shell
root@6a88b0d59848:/DeepSpeech# bin/bangor_welsh/import_bangor_resources.py -t /data/macsen -d macsen
```

### `clean_lm_corpus.py`

```shell
root@6a88b0d59848:/DeepSpeech# bin/bangor_welsh/clean_lm_corpus.sh -s /data/texts/macsen/corpus.txt -o /data/texts/macsen/corpus.clean.txt
```

### `build_lm_scorer.sh`

```shell
root@6a88b0d59848:/DeepSpeech# bin/bangor_welsh/build_lm_scorer.sh -s /data/texts/macsen/corpus.clean.txt -o /data/texts/macsen/ -t /data/macsen/deepspeech.csv
```

### `evaluate_lm_scorer.sh`

```shell
root@6a88b0d59848:/DeepSpeech# bin/bangor_welsh/evaluate_lm_scorer.sh -l /data/texts/macsen -t /data/macsen/deepspeech.csv
```
Empty file added local/__init__.py
Empty file.
File renamed without changes.
36 changes: 36 additions & 0 deletions local/analyze_audio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import sys
import pathlib
import librosa
import pandas

from argparse import ArgumentParser, RawTextHelpFormatter

DESCRIPTION = """
"""

def main(csv_root_dir, **args):
csv_files = pathlib.Path(csv_root_dir).glob("*.csv")

for csv_file_path in csv_files:
df = pandas.read_csv(csv_file_path, encoding='utf-8')
total_duration = 0.0
for index, row in df.iterrows():
wav_file_path = os.path.join(csv_root_dir, row["wav_filename"])
total_duration = total_duration + librosa.get_duration(filename=wav_file_path)

print ("%s\t\t%.2f hours\t(%.2f seconds)" % (csv_file_path, total_duration/60.0/60.0, total_duration))


if __name__ == "__main__":

parser = ArgumentParser(description=DESCRIPTION, formatter_class=RawTextHelpFormatter)

parser.add_argument("--csv_dir", dest="csv_root_dir", required=True, help="path to audio corpus CSV files")

parser.set_defaults(func=main)
args = parser.parse_args()
args.func(**vars(args))
Loading

0 comments on commit dff4947

Please sign in to comment.