1# Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================= 15"""Encoding and decoding audio using FFmpeg.""" 16 17from __future__ import absolute_import 18from __future__ import division 19from __future__ import print_function 20 21from tensorflow.contrib.ffmpeg.ops import gen_decode_audio_op_py 22from tensorflow.contrib.ffmpeg.ops import gen_decode_video_op_py 23from tensorflow.contrib.ffmpeg.ops import gen_encode_audio_op_py 24from tensorflow.contrib.util import loader 25from tensorflow.python.framework import ops 26from tensorflow.python.platform import resource_loader 27from tensorflow.python.util.deprecation import deprecated 28 29_ffmpeg_so = loader.load_op_library( 30 resource_loader.get_path_to_datafile('ffmpeg.so')) 31 32 33@deprecated('2018-09-04', 'This will be deleted and should not be used.') 34def decode_audio(contents, file_format=None, samples_per_second=None, 35 channel_count=None, stream=None): 36 """Create an op that decodes the contents of an audio file. 37 38 Note that ffmpeg is free to select the "best" audio track from an mp4. 39 https://trac.ffmpeg.org/wiki/Map 40 41 Args: 42 contents: The binary contents of the audio file to decode. This is a 43 scalar. 44 file_format: A string or scalar string tensor specifying which 45 format the contents will conform to. This can be mp3, mp4, ogg, 46 or wav. 47 samples_per_second: The number of samples per second that is 48 assumed, as an `int` or scalar `int32` tensor. In some cases, 49 resampling will occur to generate the correct sample rate. 50 channel_count: The number of channels that should be created from the 51 audio contents, as an `int` or scalar `int32` tensor. If the 52 `contents` have more than this number, then some channels will 53 be merged or dropped. If `contents` has fewer than this, then 54 additional channels will be created from the existing ones. 55 stream: A string specifying which stream from the content file 56 should be decoded, e.g., '0' means the 0-th stream. 57 The default value is '' which leaves the decision to ffmpeg. 58 59 Returns: 60 A rank-2 tensor that has time along dimension 0 and channels along 61 dimension 1. Dimension 0 will be `samples_per_second * 62 length_in_seconds` wide, and dimension 1 will be `channel_count` 63 wide. If ffmpeg fails to decode the audio then an empty tensor will 64 be returned. 65 """ 66 return gen_decode_audio_op_py.decode_audio_v2( 67 contents, file_format=file_format, samples_per_second=samples_per_second, 68 channel_count=channel_count, stream=stream) 69 70 71ops.NotDifferentiable('DecodeAudio') 72 73 74@deprecated('2018-09-04', 'This will be deleted and should not be used.') 75def encode_audio(audio, file_format=None, samples_per_second=None): 76 """Creates an op that encodes an audio file using sampled audio from a tensor. 77 78 Args: 79 audio: A rank-2 `Tensor` that has time along dimension 0 and 80 channels along dimension 1. Dimension 0 is `samples_per_second * 81 length_in_seconds` long. 82 file_format: The type of file to encode, as a string or rank-0 83 string tensor. "wav" is the only supported format. 84 samples_per_second: The number of samples in the audio tensor per 85 second of audio, as an `int` or rank-0 `int32` tensor. 86 87 Returns: 88 A scalar tensor that contains the encoded audio in the specified file 89 format. 90 """ 91 return gen_encode_audio_op_py.encode_audio_v2( 92 audio, 93 file_format=file_format, 94 samples_per_second=samples_per_second, 95 bits_per_second=192000) # not used by WAV 96 97 98ops.NotDifferentiable('EncodeAudio') 99 100 101@deprecated('2018-09-04', 'This will be deleted and should not be used.') 102def decode_video(contents): 103 """Create an op that decodes the contents of a video file. 104 105 Args: 106 contents: The binary contents of the video file to decode. This is a 107 scalar. 108 109 Returns: 110 A rank-4 `Tensor` that has `[frames, height, width, 3]` RGB as output. 111 """ 112 return gen_decode_video_op_py.decode_video(contents) 113 114 115ops.NotDifferentiable('DecodeVideo') 116