1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/core/summary/summary_converter.h"
16
17 #include "tensorflow/core/framework/register_types.h"
18 #include "tensorflow/core/framework/summary.pb.h"
19 #include "tensorflow/core/framework/types.h"
20 #include "tensorflow/core/framework/types.pb.h"
21 #include "tensorflow/core/lib/histogram/histogram.h"
22 #include "tensorflow/core/lib/io/path.h"
23 #include "tensorflow/core/lib/png/png_io.h"
24 #include "tensorflow/core/lib/wav/wav_io.h"
25
26 namespace tensorflow {
27 namespace {
28
29 template <typename T>
TensorValueAt(Tensor t,int64 i,T * out)30 Status TensorValueAt(Tensor t, int64 i, T* out) {
31 #define CASE(I) \
32 case DataTypeToEnum<I>::value: \
33 *out = static_cast<T>(t.flat<I>()(i)); \
34 break;
35 #define COMPLEX_CASE(I) \
36 case DataTypeToEnum<I>::value: \
37 *out = static_cast<T>(t.flat<I>()(i).real()); \
38 break;
39 // clang-format off
40 switch (t.dtype()) {
41 TF_CALL_half(CASE)
42 TF_CALL_float(CASE)
43 TF_CALL_double(CASE)
44 TF_CALL_int8(CASE)
45 TF_CALL_int16(CASE)
46 TF_CALL_int32(CASE)
47 TF_CALL_int64(CASE)
48 TF_CALL_uint8(CASE)
49 TF_CALL_uint16(CASE)
50 TF_CALL_uint32(CASE)
51 TF_CALL_uint64(CASE)
52 TF_CALL_complex64(COMPLEX_CASE)
53 TF_CALL_complex128(COMPLEX_CASE)
54 default:
55 return errors::Unimplemented("SummaryFileWriter ",
56 DataTypeString(t.dtype()),
57 " not supported.");
58 }
59 // clang-format on
60 return Status::OK();
61 #undef CASE
62 #undef COMPLEX_CASE
63 }
64
65 typedef Eigen::Tensor<uint8, 2, Eigen::RowMajor> Uint8Image;
66
67 // Add the sequence of images specified by ith_image to the summary.
68 //
69 // Factoring this loop out into a helper function lets ith_image behave
70 // differently in the float and uint8 cases: the float case needs a temporary
71 // buffer which can be shared across calls to ith_image, but the uint8 case
72 // does not.
AddImages(const string & tag,int max_images,int batch_size,int w,int h,int depth,const std::function<Uint8Image (int)> & ith_image,Summary * s)73 Status AddImages(const string& tag, int max_images, int batch_size, int w,
74 int h, int depth,
75 const std::function<Uint8Image(int)>& ith_image, Summary* s) {
76 const int N = std::min<int>(max_images, batch_size);
77 for (int i = 0; i < N; ++i) {
78 Summary::Value* v = s->add_value();
79 // The tag depends on the number of requested images (not the number
80 // produced.)
81 //
82 // Note that later on avisu uses "/" to figure out a consistent naming
83 // convention for display, so we append "/image" to guarantee that the
84 // image(s) won't be displayed in the global scope with no name.
85 if (max_images > 1) {
86 v->set_tag(strings::StrCat(tag, "/image/", i));
87 } else {
88 v->set_tag(strings::StrCat(tag, "/image"));
89 }
90
91 const auto image = ith_image(i);
92 Summary::Image* si = v->mutable_image();
93 si->set_height(h);
94 si->set_width(w);
95 si->set_colorspace(depth);
96 const int channel_bits = 8;
97 const int compression = -1; // Use zlib default
98 if (!png::WriteImageToBuffer(image.data(), w, h, w * depth, depth,
99 channel_bits, compression,
100 si->mutable_encoded_image_string(), nullptr)) {
101 return errors::Internal("PNG encoding failed");
102 }
103 }
104 return Status::OK();
105 }
106
107 template <class T>
NormalizeFloatImage(int hw,int depth,typename TTypes<T>::ConstMatrix values,typename TTypes<uint8>::ConstVec bad_color,Uint8Image * image)108 void NormalizeFloatImage(int hw, int depth,
109 typename TTypes<T>::ConstMatrix values,
110 typename TTypes<uint8>::ConstVec bad_color,
111 Uint8Image* image) {
112 if (!image->size()) return; // Nothing to do for empty images
113
114 // Rescale the image to uint8 range.
115 //
116 // We are trying to generate an RGB image from a float/half tensor. We do
117 // not have any info about the expected range of values in the tensor
118 // but the generated image needs to have all RGB values within [0, 255].
119 //
120 // We use two different algorithms to generate these values. If the
121 // tensor has only positive values we scale them all by 255/max(values).
122 // If the tensor has both negative and positive values we scale them by
123 // the max of their absolute values and center them around 127.
124 //
125 // This works for most cases, but does not respect the relative dynamic
126 // range across different instances of the tensor.
127
128 // Compute min and max ignoring nonfinite pixels
129 float image_min = std::numeric_limits<float>::infinity();
130 float image_max = -image_min;
131 for (int i = 0; i < hw; i++) {
132 bool finite = true;
133 for (int j = 0; j < depth; j++) {
134 if (!Eigen::numext::isfinite(values(i, j))) {
135 finite = false;
136 break;
137 }
138 }
139 if (finite) {
140 for (int j = 0; j < depth; j++) {
141 float value(values(i, j));
142 image_min = std::min(image_min, value);
143 image_max = std::max(image_max, value);
144 }
145 }
146 }
147
148 // Pick an affine transform into uint8
149 const float kZeroThreshold = 1e-6;
150 T scale, offset;
151 if (image_min < 0) {
152 const float max_val = std::max(std::abs(image_min), std::abs(image_max));
153 scale = T(max_val < kZeroThreshold ? 0.0f : 127.0f / max_val);
154 offset = T(128.0f);
155 } else {
156 scale = T(image_max < kZeroThreshold ? 0.0f : 255.0f / image_max);
157 offset = T(0.0f);
158 }
159
160 // Transform image, turning nonfinite values to bad_color
161 for (int i = 0; i < hw; i++) {
162 bool finite = true;
163 for (int j = 0; j < depth; j++) {
164 if (!Eigen::numext::isfinite(values(i, j))) {
165 finite = false;
166 break;
167 }
168 }
169 if (finite) {
170 image->chip<0>(i) =
171 (values.template chip<0>(i) * scale + offset).template cast<uint8>();
172 } else {
173 image->chip<0>(i) = bad_color;
174 }
175 }
176 }
177
178 template <class T>
NormalizeAndAddImages(const Tensor & tensor,int max_images,int h,int w,int hw,int depth,int batch_size,const string & base_tag,Tensor bad_color_tensor,Summary * s)179 Status NormalizeAndAddImages(const Tensor& tensor, int max_images, int h, int w,
180 int hw, int depth, int batch_size,
181 const string& base_tag, Tensor bad_color_tensor,
182 Summary* s) {
183 // For float and half images, nans and infs are replaced with bad_color.
184 if (bad_color_tensor.dim_size(0) < depth) {
185 return errors::InvalidArgument(
186 "expected depth <= bad_color.size, got depth = ", depth,
187 ", bad_color.size = ", bad_color_tensor.dim_size(0));
188 }
189 auto bad_color_full = bad_color_tensor.vec<uint8>();
190 typename TTypes<uint8>::ConstVec bad_color(bad_color_full.data(), depth);
191
192 // Float images must be scaled and translated.
193 Uint8Image image(hw, depth);
194 auto ith_image = [&tensor, &image, bad_color, batch_size, hw, depth](int i) {
195 auto tensor_eigen = tensor.template shaped<T, 3>({batch_size, hw, depth});
196 typename TTypes<T>::ConstMatrix values(
197 &tensor_eigen(i, 0, 0), Eigen::DSizes<Eigen::DenseIndex, 2>(hw, depth));
198 NormalizeFloatImage<T>(hw, depth, values, bad_color, &image);
199 return image;
200 };
201 return AddImages(base_tag, max_images, batch_size, w, h, depth, ith_image, s);
202 }
203
204 } // namespace
205
AddTensorAsScalarToSummary(const Tensor & t,const string & tag,Summary * s)206 Status AddTensorAsScalarToSummary(const Tensor& t, const string& tag,
207 Summary* s) {
208 Summary::Value* v = s->add_value();
209 v->set_tag(tag);
210 float value;
211 TF_RETURN_IF_ERROR(TensorValueAt<float>(t, 0, &value));
212 v->set_simple_value(value);
213 return Status::OK();
214 }
215
AddTensorAsHistogramToSummary(const Tensor & t,const string & tag,Summary * s)216 Status AddTensorAsHistogramToSummary(const Tensor& t, const string& tag,
217 Summary* s) {
218 Summary::Value* v = s->add_value();
219 v->set_tag(tag);
220 histogram::Histogram histo;
221 for (int64 i = 0; i < t.NumElements(); i++) {
222 double double_val;
223 TF_RETURN_IF_ERROR(TensorValueAt<double>(t, i, &double_val));
224 if (Eigen::numext::isnan(double_val)) {
225 return errors::InvalidArgument("Nan in summary histogram for: ", tag);
226 } else if (Eigen::numext::isinf(double_val)) {
227 return errors::InvalidArgument("Infinity in summary histogram for: ",
228 tag);
229 }
230 histo.Add(double_val);
231 }
232 histo.EncodeToProto(v->mutable_histo(), false /* Drop zero buckets */);
233 return Status::OK();
234 }
235
AddTensorAsImageToSummary(const Tensor & tensor,const string & tag,int max_images,const Tensor & bad_color,Summary * s)236 Status AddTensorAsImageToSummary(const Tensor& tensor, const string& tag,
237 int max_images, const Tensor& bad_color,
238 Summary* s) {
239 if (!(tensor.dims() == 4 &&
240 (tensor.dim_size(3) == 1 || tensor.dim_size(3) == 3 ||
241 tensor.dim_size(3) == 4))) {
242 return errors::InvalidArgument(
243 "Tensor must be 4-D with last dim 1, 3, or 4, not ",
244 tensor.shape().DebugString());
245 }
246 if (!(tensor.dim_size(0) < (1LL << 31) && tensor.dim_size(1) < (1LL << 31) &&
247 tensor.dim_size(2) < (1LL << 31) &&
248 (tensor.dim_size(1) * tensor.dim_size(2)) < (1LL << 29))) {
249 return errors::InvalidArgument("Tensor too large for summary ",
250 tensor.shape().DebugString());
251 }
252 // The casts and h * w cannot overflow because of the limits above.
253 const int batch_size = static_cast<int>(tensor.dim_size(0));
254 const int h = static_cast<int>(tensor.dim_size(1));
255 const int w = static_cast<int>(tensor.dim_size(2));
256 const int hw = h * w; // Compact these two dims for simplicity
257 const int depth = static_cast<int>(tensor.dim_size(3));
258 if (tensor.dtype() == DT_UINT8) {
259 // For uint8 input, no normalization is necessary
260 auto ith_image = [&tensor, batch_size, hw, depth](int i) {
261 auto values = tensor.shaped<uint8, 3>({batch_size, hw, depth});
262 return typename TTypes<uint8>::ConstMatrix(
263 &values(i, 0, 0), Eigen::DSizes<Eigen::DenseIndex, 2>(hw, depth));
264 };
265 TF_RETURN_IF_ERROR(
266 AddImages(tag, max_images, batch_size, w, h, depth, ith_image, s));
267 } else if (tensor.dtype() == DT_HALF) {
268 TF_RETURN_IF_ERROR(NormalizeAndAddImages<Eigen::half>(
269 tensor, max_images, h, w, hw, depth, batch_size, tag, bad_color, s));
270 } else if (tensor.dtype() == DT_FLOAT) {
271 TF_RETURN_IF_ERROR(NormalizeAndAddImages<float>(
272 tensor, max_images, h, w, hw, depth, batch_size, tag, bad_color, s));
273 } else {
274 return errors::InvalidArgument(
275 "Only DT_INT8, DT_HALF, and DT_FLOAT images are supported. Got ",
276 DataTypeString(tensor.dtype()));
277 }
278 return Status::OK();
279 }
280
AddTensorAsAudioToSummary(const Tensor & tensor,const string & tag,int max_outputs,float sample_rate,Summary * s)281 Status AddTensorAsAudioToSummary(const Tensor& tensor, const string& tag,
282 int max_outputs, float sample_rate,
283 Summary* s) {
284 if (sample_rate <= 0.0f) {
285 return errors::InvalidArgument("sample_rate must be > 0");
286 }
287 const int batch_size = tensor.dim_size(0);
288 const int64 length_frames = tensor.dim_size(1);
289 const int64 num_channels =
290 tensor.dims() == 2 ? 1 : tensor.dim_size(tensor.dims() - 1);
291 const int N = std::min<int>(max_outputs, batch_size);
292 for (int i = 0; i < N; ++i) {
293 Summary::Value* v = s->add_value();
294 if (max_outputs > 1) {
295 v->set_tag(strings::StrCat(tag, "/audio/", i));
296 } else {
297 v->set_tag(strings::StrCat(tag, "/audio"));
298 }
299
300 Summary::Audio* sa = v->mutable_audio();
301 sa->set_sample_rate(sample_rate);
302 sa->set_num_channels(num_channels);
303 sa->set_length_frames(length_frames);
304 sa->set_content_type("audio/wav");
305
306 auto values =
307 tensor.shaped<float, 3>({batch_size, length_frames, num_channels});
308 auto channels_by_frames = typename TTypes<float>::ConstMatrix(
309 &values(i, 0, 0),
310 Eigen::DSizes<Eigen::DenseIndex, 2>(length_frames, num_channels));
311 size_t sample_rate_truncated = lrintf(sample_rate);
312 if (sample_rate_truncated == 0) {
313 sample_rate_truncated = 1;
314 }
315 TF_RETURN_IF_ERROR(wav::EncodeAudioAsS16LEWav(
316 channels_by_frames.data(), sample_rate_truncated, num_channels,
317 length_frames, sa->mutable_encoded_audio_string()));
318 }
319 return Status::OK();
320 }
321
322 } // namespace tensorflow
323