1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/examples/wav_to_spectrogram/wav_to_spectrogram.h"
17 
18 #include <vector>
19 
20 #include "tensorflow/cc/ops/audio_ops.h"
21 #include "tensorflow/cc/ops/const_op.h"
22 #include "tensorflow/cc/ops/image_ops.h"
23 #include "tensorflow/cc/ops/standard_ops.h"
24 #include "tensorflow/core/framework/graph.pb.h"
25 #include "tensorflow/core/framework/tensor.h"
26 #include "tensorflow/core/graph/default_device.h"
27 #include "tensorflow/core/graph/graph_def_builder.h"
28 #include "tensorflow/core/lib/core/errors.h"
29 #include "tensorflow/core/lib/core/stringpiece.h"
30 #include "tensorflow/core/lib/core/threadpool.h"
31 #include "tensorflow/core/lib/io/path.h"
32 #include "tensorflow/core/lib/strings/stringprintf.h"
33 #include "tensorflow/core/platform/logging.h"
34 #include "tensorflow/core/platform/types.h"
35 #include "tensorflow/core/public/session.h"
36 
37 using tensorflow::DT_FLOAT;
38 using tensorflow::DT_UINT8;
39 using tensorflow::Output;
40 using tensorflow::TensorShape;
41 
42 // Runs a TensorFlow graph to convert an audio file into a visualization.
WavToSpectrogram(const tensorflow::string & input_wav,tensorflow::int32 window_size,tensorflow::int32 stride,float brightness,const tensorflow::string & output_image)43 tensorflow::Status WavToSpectrogram(const tensorflow::string& input_wav,
44                                     tensorflow::int32 window_size,
45                                     tensorflow::int32 stride, float brightness,
46                                     const tensorflow::string& output_image) {
47   auto root = tensorflow::Scope::NewRootScope();
48   using namespace tensorflow::ops;  // NOLINT(build/namespaces)
49   // The following block creates a TensorFlow graph that:
50   //  - Reads and decodes the audio file into a tensor of float samples.
51   //  - Creates a float spectrogram from those samples.
52   //  - Scales, clamps, and converts that spectrogram to 0 to 255 uint8's.
53   //  - Reshapes the tensor so that it's [height, width, 1] for imaging.
54   //  - Encodes it as a PNG stream and saves it out to a file.
55   Output file_reader =
56       tensorflow::ops::ReadFile(root.WithOpName("input_wav"), input_wav);
57   DecodeWav wav_decoder =
58       DecodeWav(root.WithOpName("wav_decoder"), file_reader);
59   Output spectrogram = AudioSpectrogram(root.WithOpName("spectrogram"),
60                                         wav_decoder.audio, window_size, stride);
61   Output brightness_placeholder =
62       Placeholder(root.WithOpName("brightness_placeholder"), DT_FLOAT,
63                   Placeholder::Attrs().Shape(TensorShape({})));
64   Output mul = Mul(root.WithOpName("mul"), spectrogram, brightness_placeholder);
65   Output min_const = Const(root.WithOpName("min_const"), 255.0f);
66   Output min = Minimum(root.WithOpName("min"), mul, min_const);
67   Output cast = Cast(root.WithOpName("cast"), min, DT_UINT8);
68   Output expand_dims_const = Const(root.WithOpName("expand_dims_const"), -1);
69   Output expand_dims =
70       ExpandDims(root.WithOpName("expand_dims"), cast, expand_dims_const);
71   Output squeeze = Squeeze(root.WithOpName("squeeze"), expand_dims,
72                            Squeeze::Attrs().Axis({0}));
73   Output png_encoder = EncodePng(root.WithOpName("png_encoder"), squeeze);
74   tensorflow::ops::WriteFile file_writer = tensorflow::ops::WriteFile(
75       root.WithOpName("output_image"), output_image, png_encoder);
76   tensorflow::GraphDef graph;
77   TF_RETURN_IF_ERROR(root.ToGraphDef(&graph));
78 
79   // Build a session object from this graph definition. The power of TensorFlow
80   // is that you can reuse complex computations like this, so usually we'd run a
81   // lot of different inputs through it. In this example, we're just doing a
82   // one-off run, so we'll create it and then use it immediately.
83   std::unique_ptr<tensorflow::Session> session(
84       tensorflow::NewSession(tensorflow::SessionOptions()));
85   TF_RETURN_IF_ERROR(session->Create(graph));
86 
87   // We're passing in the brightness as an input, so create a tensor to hold the
88   // value.
89   tensorflow::Tensor brightness_tensor(DT_FLOAT, TensorShape({}));
90   brightness_tensor.scalar<float>()() = brightness;
91 
92   // Run the session to analyze the audio and write out the file.
93   TF_RETURN_IF_ERROR(
94       session->Run({{"brightness_placeholder", brightness_tensor}}, {},
95                    {"output_image"}, nullptr));
96   return tensorflow::Status::OK();
97 }
98