Skip to content
This repository has been archived by the owner on Feb 22, 2020. It is now read-only.

Commit

Permalink
refactor(ffmpeg): refactor ffmpeg again
Browse files Browse the repository at this point in the history
  • Loading branch information
felix committed Aug 26, 2019
1 parent 71cb36f commit 10cef54
Show file tree
Hide file tree
Showing 5 changed files with 362 additions and 135 deletions.
100 changes: 68 additions & 32 deletions gnes/preprocessor/io_utils/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,33 +19,51 @@
import numpy as np
import soundfile as sf

from .ffmpeg import compile_args
from .helper import run_command, run_command_async

from typing import List

DEFAULT_SILENCE_DURATION = 0.3
DEFAULT_SILENCE_THRESHOLD = -60


def capture_audio(filename: str = 'pipe:',
video_data: bytes = None,
def capture_audio(input_fn: str = 'pipe:',
input_data: bytes = None,
bits_per_raw_sample: int = 16,
sample_rate: int = 16000,
start_time: float = None,
end_time: float = None,
**kwargs) -> List['np.ndarray']:

capture_stdin = (filename == 'pipe:')
if capture_stdin and video_data is None:
capture_stdin = (input_fn == 'pipe:')
if capture_stdin and input_data is None:
raise ValueError(
"the buffered video data for stdin should not be empty")

stream = ffmpeg.input(filename)
stream = stream.output(
'pipe:',
format='wav',
bits_per_raw_sample=bits_per_raw_sample,
ac=1,
ar=16000)
input_kwargs = {}
if start_time is not None:
input_kwargs['ss'] = str(start_time)
else:
start_time = 0.
if end_time is not None:
input_kwargs['t'] = str(end_time - start_time)

output_kwargs = {
'format': 'wav',
'bits_per_raw_sample': bits_per_raw_sample,
'ac': 1,
'ar': sample_rate
}

cmd_args = compile_args(
input_fn=input_fn,
input_options=input_kwargs,
output_options=output_kwargs,
overwrite_output=True)

stdout, _ = stream.run(
input=video_data, capture_stdout=True, capture_stderr=True)
stdout, _ = run_command(
cmd_args, input=input_data, pipe_stdout=True, pipe_stderr=True)

audio_stream = io.BytesIO(stdout)
audio_data, sample_rate = sf.read(audio_stream)
Expand All @@ -56,8 +74,8 @@ def capture_audio(filename: str = 'pipe:',
return audio_data


def get_chunk_times(filename: str = 'pipe:',
video_data: bytes = None,
def get_chunk_times(input_fn: str = 'pipe:',
input_data: bytes = None,
silence_threshold: float = DEFAULT_SILENCE_THRESHOLD,
silence_duration: float = DEFAULT_SILENCE_DURATION,
start_time: float = None,
Expand All @@ -78,15 +96,20 @@ def get_chunk_times(filename: str = 'pipe:',
if end_time is not None:
input_kwargs['t'] = end_time - start_time

stream = ffmpeg.input(filename, **input_kwargs)
stream = stream.filter(
'silencedetect',
n='{}dB'.format(silence_threshold),
d=silence_duration)
stream = stream.output('pipe:', format='null')
au_filters = [
'silencedetect=noise={}dB:d={}'.format(silence_threshold,
silence_duration)
]

stdout, stderr = stream.run(
input=video_data, capture_stdout=True, capture_stderr=True)
output_kwargs = {'format': 'null'}
cmd_args = compile_args(
input_fn=input_fn,
input_options=input_kwargs,
audio_filters=au_filters,
output_options=output_kwargs)

stdout, stderr = run_command(
cmd_args, input=input_data, pipe_stdout=True, pipe_stderr=True)

lines = stderr.decode().splitlines()

Expand Down Expand Up @@ -121,28 +144,41 @@ def get_chunk_times(filename: str = 'pipe:',
return list(zip(chunk_starts, chunk_ends))


def split_audio(filename: str = 'pipe:',
video_data: bytes = None,
def split_audio(input_fn: str = 'pipe:',
input_data: bytes = None,
silence_threshold=DEFAULT_SILENCE_THRESHOLD,
silence_duration=DEFAULT_SILENCE_DURATION,
start_time: float = None,
end_time: float = None,
verbose=False):
chunk_times = get_chunk_times(
filename,
video_data=video_data,
input_fn,
input_data=input_data,
silence_threshold=silence_threshold,
silence_duration=silence_duration,
start_time=start_time,
end_time=end_time)

audio_chunks = list()
for i, (start_time, end_time) in enumerate(chunk_times):
time = end_time - start_time
stream = ffmpeg.input(filename, ss=start_time, t=time)
stream = stream.output('pipe:', format='wav')
stdout, _ = stream.run(
input=video_data, capture_stdout=True, capture_stderr=True)
if time < 0:
continue
input_kwargs = {
'ss': start_time,
't': time
}

output_kwargs = {
'format': 'wav'
}

cmd_args = compile_args(
input_fn=input_fn,
input_options=input_kwargs,
output_options=output_kwargs)

stdout, stderr = run_command(
cmd_args, input=input_data, pipe_stdout=True, pipe_stderr=True)

audio_stream = io.BytesIO(stdout)
audio_data, sample_rate = sf.read(audio_stream)
Expand Down
140 changes: 140 additions & 0 deletions gnes/preprocessor/io_utils/ffmpeg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
# Tencent is pleased to support the open source community by making GNES available.
#
# Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
# Licensed under the Apache License, Version 2.0 (the 'License');
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an 'AS IS' BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import re
from .helper import kwargs_to_cmd_args, run_command, run_command_async

VIDEO_DUR_PATTERN = re.compile(r".*Duration: (\d+):(\d+):(\d+)", re.DOTALL)
VIDEO_INFO_PATTERN = re.compile(
r'.*Stream #0:(\d+)(?:\(\w+\))?: Video: (\w+).*, (yuv\w+)[(,].* (\d+)x(\d+).* (\d+)(\.\d.)? fps',
re.DOTALL)
AUDIO_INFO_PATTERN = re.compile(
r'^\s+Stream #0:(?P<stream>\d+)(\((?P<lang>\w+)\))?: Audio: (?P<format>\w+).*?(?P<default>\(default\))?$',
re.MULTILINE)
STREAM_SUBTITLE_PATTERN = re.compile(
r'^\s+Stream #0:(?P<stream>\d+)(\((?P<lang>\w+)\))?: Subtitle:',
re.MULTILINE)


def parse_media_details(infos):
video_dur_match = VIDEO_DUR_PATTERN.match(infos)
dur_hrs, dur_mins, dur_secs = video_dur_match.group(1, 2, 3)

video_info_match = VIDEO_INFO_PATTERN.match(infos)
codec, pix_fmt, res_width, res_height, fps = video_info_match.group(
2, 3, 4, 5, 6)

audio_tracks = list()
for audio_match in AUDIO_INFO_PATTERN.finditer(infos):
ainfo = audio_match.groupdict()
if ainfo['lang'] is None:
ainfo['lang'] = 'und'
audio_tracks.append(ainfo)

medio_info = {
'vcodec': codec,
'frame_width': int(res_width),
'frame_height': int(res_height),
'duration': (int(dur_hrs) * 3600 + int(dur_mins) * 60 + int(dur_secs)),
'fps': int(fps),
'pix_fmt': pix_fmt,
'audio': audio_tracks,
}
return medio_info


def compile_args(input_fn: str = 'pipe:',
output_fn: str = 'pipe:',
video_filters: str = [],
audio_filters: str = [],
input_options=dict(),
output_options=dict(),
overwrite_output: bool = True):
"""Wrapper for various `FFmpeg <https://www.ffmpeg.org/>`_ related applications (ffmpeg,
ffprobe).
"""
args = ['ffmpeg']

input_args = []
fmt = input_options.pop('format', None)
if fmt:
input_args += ['-f', fmt]

input_args += kwargs_to_cmd_args(input_options)
input_args += ['-i', input_fn]

vf_args = []
if len(video_filters) > 0:
vf_args = ['-vf', ','.join(video_filters)]

af_args = []
if len(audio_filters) > 0:
af_args = ['-af', ','.join(audio_filters)]

output_args = []

fmt = output_options.pop('format', None)
if fmt:
output_args += ['-f', fmt]
video_bitrate = output_options.pop('video_bitrate', None)
if video_bitrate:
output_args += ['-b:v', str(video_bitrate)]
audio_bitrate = output_options.pop('audio_bitrate', None)
if audio_bitrate:
output_args += ['-b:a', str(audio_bitrate)]
output_args += kwargs_to_cmd_args(output_options)

output_args += [output_fn]

args += input_args + vf_args + af_args + output_args

if overwrite_output:
args += ['-y']

return args


def probe(input_fn: str):
command = [
'ffprobe', '-v', 'fatal', '-show_entries',
'stream=width,height,r_frame_rate,duration', '-of',
'default=noprint_wrappers=1:nokey=1', input_fn, '-sexagesimal'
]
out, err = run_command(command, pipe_stdout=True, pipe_stderr=True)

out = out.decode().split('\n')
return {
'file': input_fn,
'width': int(out[0]),
'height': int(out[1]),
'fps': float(out[2].split('/')[0]) / float(out[2].split('/')[1]),
'duration': out[3]
}


def get_media_meta(input_fn: str = 'pipe:',
input_data: bytes = None,
input_options=dict()):
cmd_args = ['ffmpeg']

fmt = input_options.pop('format', None)
if fmt:
cmd_args += ['-f', fmt]
cmd_args += ['-i', input_fn]

cmd_args += ['-f', 'ffmetadata', 'pipe:']
out, err = run_command(
cmd_args, input=input_data, pipe_stdout=True, pipe_stderr=True)
return parse_media_details(err.decode())
18 changes: 8 additions & 10 deletions gnes/preprocessor/io_utils/gif.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@
import numpy as np
import subprocess as sp
import tempfile
import ffmpeg
from .helper import extract_frame_size

from .ffmpeg import parse_media_details



def decode_gif(data: bytes, fps: int = -1,
Expand All @@ -35,26 +36,23 @@ def decode_gif(data: bytes, fps: int = -1,

out, err = stream.run(capture_stdout=True, capture_stderr=True)

width, height = extract_frame_size(err.decode())
meta_info = parse_media_details(err.decode())
width = meta_info['frame_width']
height = meta_info['frame_height']

depth = 3
if pix_fmt == 'rgba':
depth = 4

frames = np.frombuffer(out,
np.uint8).reshape([-1, height, width, depth])
return list(frames)
return frames


def encode_gif(
images: np.ndarray,
scale: str,
images: List[np.ndarray],
fps: int,
pix_fmt: str = 'rgb24'):
"""
https://superuser.com/questions/556029/how-do-i-convert-a-video-to-gif-using-ffmpeg-with-reasonable-quality
https://gist.github.com/alexlee-gk/38916bf524dc75ca1b988d113aa30710
"""

cmd = [
'ffmpeg', '-y', '-f', 'rawvideo', '-vcodec', 'rawvideo', '-r',
Expand Down
Loading

0 comments on commit 10cef54

Please sign in to comment.