1# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# =============================================================================
15"""Encoding and decoding audio using FFmpeg."""
16
17from __future__ import absolute_import
18from __future__ import division
19from __future__ import print_function
20
21from tensorflow.contrib.ffmpeg.ops import gen_decode_audio_op_py
22from tensorflow.contrib.ffmpeg.ops import gen_decode_video_op_py
23from tensorflow.contrib.ffmpeg.ops import gen_encode_audio_op_py
24from tensorflow.contrib.util import loader
25from tensorflow.python.framework import ops
26from tensorflow.python.platform import resource_loader
27from tensorflow.python.util.deprecation import deprecated
28
29_ffmpeg_so = loader.load_op_library(
30    resource_loader.get_path_to_datafile('ffmpeg.so'))
31
32
33@deprecated('2018-09-04', 'This will be deleted and should not be used.')
34def decode_audio(contents, file_format=None, samples_per_second=None,
35                 channel_count=None, stream=None):
36  """Create an op that decodes the contents of an audio file.
37
38  Note that ffmpeg is free to select the "best" audio track from an mp4.
39  https://trac.ffmpeg.org/wiki/Map
40
41  Args:
42    contents: The binary contents of the audio file to decode. This is a
43        scalar.
44    file_format: A string or scalar string tensor specifying which
45        format the contents will conform to. This can be mp3, mp4, ogg,
46        or wav.
47    samples_per_second: The number of samples per second that is
48        assumed, as an `int` or scalar `int32` tensor. In some cases,
49        resampling will occur to generate the correct sample rate.
50    channel_count: The number of channels that should be created from the
51        audio contents, as an `int` or scalar `int32` tensor. If the
52        `contents` have more than this number, then some channels will
53        be merged or dropped. If `contents` has fewer than this, then
54        additional channels will be created from the existing ones.
55    stream: A string specifying which stream from the content file
56        should be decoded, e.g., '0' means the 0-th stream.
57        The default value is '' which leaves the decision to ffmpeg.
58
59  Returns:
60    A rank-2 tensor that has time along dimension 0 and channels along
61    dimension 1. Dimension 0 will be `samples_per_second *
62    length_in_seconds` wide, and dimension 1 will be `channel_count`
63    wide. If ffmpeg fails to decode the audio then an empty tensor will
64    be returned.
65  """
66  return gen_decode_audio_op_py.decode_audio_v2(
67      contents, file_format=file_format, samples_per_second=samples_per_second,
68      channel_count=channel_count, stream=stream)
69
70
71ops.NotDifferentiable('DecodeAudio')
72
73
74@deprecated('2018-09-04', 'This will be deleted and should not be used.')
75def encode_audio(audio, file_format=None, samples_per_second=None):
76  """Creates an op that encodes an audio file using sampled audio from a tensor.
77
78  Args:
79    audio: A rank-2 `Tensor` that has time along dimension 0 and
80        channels along dimension 1. Dimension 0 is `samples_per_second *
81        length_in_seconds` long.
82    file_format: The type of file to encode, as a string or rank-0
83        string tensor. "wav" is the only supported format.
84    samples_per_second: The number of samples in the audio tensor per
85        second of audio, as an `int` or rank-0 `int32` tensor.
86
87  Returns:
88    A scalar tensor that contains the encoded audio in the specified file
89    format.
90  """
91  return gen_encode_audio_op_py.encode_audio_v2(
92      audio,
93      file_format=file_format,
94      samples_per_second=samples_per_second,
95      bits_per_second=192000)  # not used by WAV
96
97
98ops.NotDifferentiable('EncodeAudio')
99
100
101@deprecated('2018-09-04', 'This will be deleted and should not be used.')
102def decode_video(contents):
103  """Create an op that decodes the contents of a video file.
104
105  Args:
106    contents: The binary contents of the video file to decode. This is a
107      scalar.
108
109  Returns:
110    A rank-4 `Tensor` that has `[frames, height, width, 3]` RGB as output.
111  """
112  return gen_decode_video_op_py.decode_video(contents)
113
114
115ops.NotDifferentiable('DecodeVideo')
116