-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtoolkit.py
95 lines (85 loc) · 5.26 KB
/
toolkit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
"""Toolkit for importing, exporting and manipulating audio-gui annotations.
Usage:
toolkit.py import-movie <file-path> <movie-name>
toolkit.py import-annotation <file-path> <movie-name> <annotator-name>
toolkit.py process-movie [--no-audio] [--no-spectrograms] [--movie-start=<movie-start-seconds>] [--movie-end=<movie-end-seconds>] <movie-name> <segment-length-seconds>
toolkit.py process-annotation <movie-name> <annotator-name>
toolkit.py export-all-annotations
toolkit.py (-h | --help)
toolkit.py --version
Options:
-h --help Show this screen.
--version Show version.
--no-audio Don't generate the audio files
--no-spectrograms Dont't generate spectrogram images
--movie-start=<movie-start> Offset into the movie
--segment-length=<segment-length> The length of each segment
--movie-end=<movie-end> If you want to process only part of the movie
"""
import numpy
import math
import pylab
import wave
import array
import os
import progressbar
from docopt import docopt
if __name__ == '__main__':
arguments = docopt(__doc__, version='Toolkit')
print(arguments)
def segmentName(start, size):
return '%s:%05d:%05d' % (arguments['<movie-name>'], start, start+size)
# toolkit.py import-movie <file-path> <movie-name>
if arguments['import-movie']:
os.system('mkdir -p movies/%s' % arguments['<movie-name>'])
os.system('ffmpeg -y -i %s -ac 2 movies/%s/%s.wav' % (arguments['<file-path>'], arguments['<movie-name>'], arguments['<movie-name>']))
# toolkit.py import-annotation <file-path> <movie-name> <annotator-name>
if arguments['import-annotation']:
os.system('mkdir -p movies/%s' % arguments['<movie-name>'])
os.system('cp %s movies/%s/word-times-%s.csv' % (arguments['<file-path>'],
arguments['<movie-name>'],
arguments['<annotator-name>']))
# toolkit.py process-movie [--no-audio] [--no-spectrograms] [--movie-start=<movie-start-seconds>] [--movie-end=<movie-end-seconds>] <movie-name> <segment-length-seconds>
if arguments['process-movie']:
wavein = wave.open('movies/%s/%s.wav' % (arguments['<movie-name>'], arguments['<movie-name>']), 'rb')
os.system('mkdir -p public/spectrograms/%s' % arguments['<movie-name>'])
os.system('mkdir -p public/audio-clips/%s' % arguments['<movie-name>'])
movieStart = 0 if arguments['--movie-start'] is None else int(arguments['--movie-start'])
end = math.floor(wavein.getnframes()/wavein.getframerate()) if arguments['--movie-end'] is None else int(arguments['--movie-end'])
step = int(arguments['<segment-length-seconds>'])
finalStart = list(range(movieStart, end, step))[-1]
wavein.setpos(movieStart*wavein.getframerate())
for start in progressbar.progressbar(range(movieStart, end, step), redirect_stdout=True):
audioData = numpy.array(array.array('h', wavein.readframes(step*wavein.getframerate())))
if not arguments['--no-spectrograms']:
fig = pylab.figure(figsize=(20,5))
spectrum, freqs, t, im = pylab.specgram(audioData/numpy.abs(audioData).mean(),
NFFT=2*1024, Fs=wavein.getframerate(),
noverlap=2*768, window=pylab.window_hanning, scale_by_freq=True,
vmin=-60, vmax=-5,
cmap=pylab.get_cmap('gist_gray')) # cividis gist_gray cubehelix Greys
pylab.ylim(100,3500)
pylab.axis('off')
pylab.gca().xaxis.set_major_locator(pylab.NullLocator())
pylab.gca().yaxis.set_major_locator(pylab.NullLocator())
pylab.savefig('public/spectrograms/%s/%s.jpg' % (arguments['<movie-name>'], segmentName(start, step)),
bbox_inches='tight', pad_inches=0, dpi=100)
pylab.close()
os.system('jpegoptim -ts -S50 public/spectrograms/%s/%s.jpg > /dev/null' % (arguments['<movie-name>'], segmentName(start, step)))
if not arguments['--no-audio']:
waveout = wave.open('public/audio-clips/%s/%s.wav' % (arguments['<movie-name>'], segmentName(start, step)), 'wb')
waveout.setparams(wavein.getparams())
waveout.writeframes(audioData)
waveout.close()
os.system('ffmpeg -hide_banner -loglevel panic -y -i file:public/audio-clips/%s/%s.wav file:public/audio-clips/%s/%s.mp3'
% (arguments['<movie-name>'], segmentName(start, step), arguments['<movie-name>'], segmentName(start, step)))
os.system('ffmpeg -hide_banner -loglevel panic -y -i file:public/audio-clips/{0}/{1}.mp3 -filter:a "atempo=0.5" -vn file:public/audio-clips/{0}/{1}-0.5.mp3 > /dev/null'
# Could do: asetrate=44000*0.5,aresample=44000,
.format(arguments['<movie-name>'], segmentName(start, step)))
# toolkit.py process-annotation <movie-name> <annotator-name>
if arguments['process-annotation']:
os.system('node populate.js %s %s' % (arguments['<movie-name>'], arguments['<annotator-name>']))
# toolkit.py export-all-annotations
if arguments['export-all-annotations']:
os.system('pip install rdbtools python-lzf')
os.system('rdb --command json dump.rdb > dump.json')