1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include <unordered_map>
17
18 #include "absl/base/call_once.h"
19 #include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
20 #include "tensorflow/core/example/example.pb.h"
21 #include "tensorflow/core/example/feature.pb.h"
22 #include "tensorflow/core/framework/op.h"
23 #include "tensorflow/core/framework/tensor.h"
24 #include "tensorflow/core/framework/tensor_shape.h"
25 #include "tensorflow/core/framework/tensor_types.h"
26 #include "tensorflow/core/framework/types.pb.h"
27 #include "tensorflow/core/graph/algorithm.h"
28 #include "tensorflow/core/graph/graph.h"
29 #include "tensorflow/core/graph/node_builder.h"
30 #include "tensorflow/core/lib/core/status_test_util.h"
31 #include "tensorflow/core/lib/strings/stringprintf.h"
32 #include "tensorflow/core/platform/test.h"
33 #include "tensorflow/core/platform/test_benchmark.h"
34 #include "tensorflow/core/platform/types.h"
35
36 namespace tensorflow {
37
38 typedef std::map<std::tuple<int, int, int>, Tensor> ExampleTensorMap;
39
40 // Fillers to fill the underlying repeated array in protobuf.
41 class BytesFiller {
42 public:
BytesFiller()43 BytesFiller() {}
operator ()(Feature * f,int feature_size) const44 void operator()(Feature* f, int feature_size) const {
45 for (int i = 0; i < feature_size; ++i) {
46 f->mutable_bytes_list()->add_value("abcd1234abcd1234abcd1234abcd1234!");
47 }
48 }
make_dense_default(int feature_size)49 Tensor make_dense_default(int feature_size) {
50 return Tensor(dtype, TensorShape({feature_size}));
51 }
52 DataType dtype = DT_STRING;
53 };
54
55 class Int64Filler {
56 public:
Int64Filler()57 Int64Filler() {}
operator ()(Feature * f,int feature_size) const58 void operator()(Feature* f, int feature_size) const {
59 for (int i = 0; i < feature_size; ++i) {
60 f->mutable_int64_list()->add_value(1729);
61 }
62 }
make_dense_default(int feature_size)63 Tensor make_dense_default(int feature_size) {
64 return Tensor(dtype, TensorShape({feature_size}));
65 }
66 DataType dtype = DT_INT64;
67 };
68
69 class FloatFiller {
70 public:
FloatFiller()71 FloatFiller() {}
operator ()(Feature * f,int feature_size) const72 void operator()(Feature* f, int feature_size) const {
73 for (int i = 0; i < feature_size; ++i) {
74 f->mutable_float_list()->add_value(1.729);
75 }
76 }
make_dense_default(int feature_size)77 Tensor make_dense_default(int feature_size) {
78 return Tensor(dtype, TensorShape({feature_size}));
79 }
80 DataType dtype = DT_FLOAT;
81 };
82
83 template <typename T>
84 struct ExampleStore {
85 private:
86 static ExampleTensorMap serialized_example;
87 static absl::once_flag flags_init;
88
89 public:
GetSerializedExampletensorflow::ExampleStore90 static ExampleTensorMap& GetSerializedExample() {
91 absl::call_once(flags_init, [] {
92 AddExample(&serialized_example, 10, 1, 1);
93 AddExample(&serialized_example, 100, 1, 1);
94 AddExample(&serialized_example, 1000, 1, 1);
95 AddExample(&serialized_example, 10, 128, 1);
96 AddExample(&serialized_example, 100, 128, 1);
97 AddExample(&serialized_example, 1000, 128, 1);
98 AddExample(&serialized_example, 10, 512, 1);
99 AddExample(&serialized_example, 100, 512, 1);
100 AddExample(&serialized_example, 1000, 512, 1);
101 AddExample(&serialized_example, 1, 1, 10);
102 AddExample(&serialized_example, 1, 1, 100);
103 AddExample(&serialized_example, 1, 1, 1000);
104 AddExample(&serialized_example, 1, 1, 10000);
105 AddExample(&serialized_example, 1, 1, 100000);
106 AddExample(&serialized_example, 1, 1, 1000000);
107 AddExample(&serialized_example, 10, 1, 100000);
108 AddExample(&serialized_example, 100, 1, 10000);
109 AddExample(&serialized_example, 1000, 1, 1000);
110 });
111 return serialized_example;
112 }
113 typedef T Filler;
AddExampletensorflow::ExampleStore114 static void AddExample(ExampleTensorMap* examples, int num_keys,
115 int batch_size, int feature_size) {
116 Example example;
117 Filler fill;
118 Tensor record_string(DT_STRING, TensorShape({batch_size}));
119 auto string_t = record_string.vec<tstring>();
120 example.Clear();
121 for (int b = 0; b < batch_size; ++b) {
122 for (int k = 0; k < num_keys; ++k) {
123 string k_str = strings::Printf("feature_%d", k);
124 Feature f;
125 fill(&f, feature_size);
126 Features* features = example.mutable_features();
127 (*features->mutable_feature())[k_str] = f;
128 }
129 CHECK(SerializeToTString(example, &string_t(b)));
130 }
131 (*examples)[std::make_tuple(batch_size, num_keys, feature_size)] =
132 record_string;
133 }
134 };
135 template <typename T>
136 ExampleTensorMap ExampleStore<T>::serialized_example;
137 template <typename T>
138 absl::once_flag ExampleStore<T>::flags_init;
139
140 template struct ExampleStore<BytesFiller>;
141 template struct ExampleStore<Int64Filler>;
142 template struct ExampleStore<FloatFiller>;
143
144 enum BenchmarkType { kDense, kSparse, kVarLenDense, kRagged };
145
146 template <typename S, BenchmarkType b_type>
147 struct BenchmarkOptions {
148 int benchmark_type = b_type;
149 typedef S Store;
150 typename S::Filler filler;
151 };
152
153 template <typename Options>
ParseExample(int batch_size,int num_keys,int feature_size)154 static Graph* ParseExample(int batch_size, int num_keys, int feature_size) {
155 Graph* g = new Graph(OpRegistry::Global());
156 Tensor& serialized = Options::Store::GetSerializedExample()[std::make_tuple(
157 batch_size, num_keys, feature_size)];
158 Tensor names(DT_STRING, TensorShape({batch_size}));
159
160 std::vector<NodeBuilder::NodeOut> sparse_keys;
161 std::vector<NodeBuilder::NodeOut> dense_keys;
162 std::vector<NodeBuilder::NodeOut> dense_defaults;
163 std::vector<DataType> sparse_types;
164 std::vector<PartialTensorShape> dense_shapes;
165 Options opt;
166 for (int i = 0; i < num_keys; ++i) {
167 Tensor key(DT_STRING, TensorShape());
168 key.scalar<tstring>()() = strings::Printf("feature_%d", i);
169 switch (opt.benchmark_type) {
170 case kDense:
171 dense_keys.emplace_back(test::graph::Constant(g, key));
172 dense_defaults.emplace_back(test::graph::Constant(
173 g, opt.filler.make_dense_default(feature_size)));
174 dense_shapes.push_back(PartialTensorShape({feature_size}));
175 break;
176 case kVarLenDense:
177 dense_keys.emplace_back(test::graph::Constant(g, key));
178 dense_defaults.emplace_back(
179 test::graph::Constant(g, opt.filler.make_dense_default(1)));
180 dense_shapes.push_back(PartialTensorShape({-1}));
181 break;
182 case kSparse:
183 sparse_keys.emplace_back(test::graph::Constant(g, key));
184 sparse_types.push_back(opt.filler.dtype);
185 break;
186 }
187 }
188
189 Node* ret;
190 TF_EXPECT_OK(NodeBuilder(g->NewName("n"), "ParseExample")
191 .Input(test::graph::Constant(g, serialized))
192 .Input(test::graph::Constant(g, names))
193 .Input(sparse_keys)
194 .Input(dense_keys)
195 .Input(dense_defaults)
196 .Attr("sparse_types", sparse_types)
197 .Attr("dense_shapes", dense_shapes)
198 .Finalize(g, &ret));
199
200 FixupSourceAndSinkEdges(g);
201 return g;
202 }
203
204 template <typename Options>
ParseExampleV2(int batch_size,int num_keys,int feature_size)205 static Graph* ParseExampleV2(int batch_size, int num_keys, int feature_size) {
206 bool scalar_input = (batch_size == 0);
207 Graph* g = new Graph(OpRegistry::Global());
208 Tensor& serialized_batch =
209 Options::Store::GetSerializedExample()[std::make_tuple(
210 scalar_input ? 1 : batch_size, num_keys, feature_size)];
211 Tensor serialized_example(DT_STRING, TensorShape());
212 Tensor names(DT_STRING,
213 scalar_input ? TensorShape({}) : TensorShape({batch_size}));
214 Tensor* serialized;
215
216 if (scalar_input) {
217 serialized_example.scalar<tstring>()() = serialized_batch.vec<tstring>()(0);
218 serialized = &serialized_example;
219 } else {
220 serialized = &serialized_batch;
221 }
222
223 std::vector<NodeBuilder::NodeOut> dense_defaults;
224 std::vector<DataType> sparse_types;
225 std::vector<DataType> ragged_value_types;
226 std::vector<DataType> ragged_split_types;
227 std::vector<PartialTensorShape> dense_shapes;
228 Tensor keys_t(DT_STRING, {static_cast<int32>(num_keys)});
229 auto keys_flat = keys_t.flat<tstring>();
230 Options opt;
231 for (int i = 0; i < num_keys; ++i) {
232 keys_flat(i) = strings::Printf("feature_%d", i);
233 switch (opt.benchmark_type) {
234 case kDense:
235 dense_defaults.emplace_back(test::graph::Constant(
236 g, opt.filler.make_dense_default(feature_size)));
237 dense_shapes.push_back(PartialTensorShape({feature_size}));
238 break;
239 case kVarLenDense:
240 dense_defaults.emplace_back(
241 test::graph::Constant(g, opt.filler.make_dense_default(1)));
242 dense_shapes.push_back(PartialTensorShape({-1}));
243 break;
244 case kSparse:
245 sparse_types.push_back(opt.filler.dtype);
246 break;
247 case kRagged:
248 ragged_value_types.push_back(opt.filler.dtype);
249 ragged_split_types.push_back(DT_INT32);
250 break;
251 }
252 }
253
254 Tensor empty_keys(DT_STRING, {0});
255 auto bm_type = opt.benchmark_type;
256 auto& sparse_keys = (bm_type == kSparse) ? keys_t : empty_keys;
257 auto& dense_keys =
258 (bm_type == kDense || bm_type == kVarLenDense) ? keys_t : empty_keys;
259 auto& ragged_keys = (bm_type == kRagged) ? keys_t : empty_keys;
260 int num_sparse = opt.benchmark_type == kSparse ? num_keys : 0;
261
262 Node* ret;
263 TF_EXPECT_OK(NodeBuilder(g->NewName("n"), "ParseExampleV2")
264 .Input(test::graph::Constant(g, *serialized))
265 .Input(test::graph::Constant(g, names))
266 .Input(test::graph::Constant(g, sparse_keys))
267 .Input(test::graph::Constant(g, dense_keys))
268 .Input(test::graph::Constant(g, ragged_keys))
269 .Input(dense_defaults)
270 .Attr("num_sparse", num_sparse)
271 .Attr("sparse_types", sparse_types)
272 .Attr("ragged_value_types", ragged_value_types)
273 .Attr("ragged_split_types", ragged_split_types)
274 .Attr("dense_shapes", dense_shapes)
275 .Finalize(g, &ret));
276
277 FixupSourceAndSinkEdges(g);
278 return g;
279 }
280
281 template <typename Options>
ParseSingleExample(int num_keys,int feature_size)282 static Graph* ParseSingleExample(int num_keys, int feature_size) {
283 Graph* g = new Graph(OpRegistry::Global());
284 Tensor& serialized_batch_1 =
285 Options::Store::GetSerializedExample()[std::make_tuple(1, num_keys,
286 feature_size)];
287 Tensor serialized(DT_STRING, TensorShape());
288 serialized.scalar<tstring>()() = serialized_batch_1.vec<tstring>()(0);
289
290 std::vector<string> sparse_keys;
291 std::vector<string> dense_keys;
292 std::vector<NodeBuilder::NodeOut> dense_defaults;
293 std::vector<DataType> sparse_types;
294 std::vector<PartialTensorShape> dense_shapes;
295 Options opt;
296 for (int i = 0; i < num_keys; ++i) {
297 string key = strings::Printf("feature_%d", i);
298 switch (opt.benchmark_type) {
299 case kDense:
300 dense_keys.push_back(key),
301 dense_defaults.emplace_back(test::graph::Constant(
302 g, opt.filler.make_dense_default(feature_size)));
303 dense_shapes.push_back(PartialTensorShape({feature_size}));
304 break;
305 case kVarLenDense:
306 dense_keys.push_back(key),
307 dense_defaults.emplace_back(
308 test::graph::Constant(g, opt.filler.make_dense_default(1)));
309 dense_shapes.push_back(PartialTensorShape({-1}));
310 break;
311 case kSparse:
312 sparse_keys.push_back(key), sparse_types.push_back(opt.filler.dtype);
313 break;
314 }
315 }
316
317 Node* ret;
318 TF_EXPECT_OK(NodeBuilder(g->NewName("n"), "ParseSingleExample")
319 .Input(test::graph::Constant(g, serialized))
320 .Input(dense_defaults)
321 .Attr<int64>("num_sparse", sparse_keys.size())
322 .Attr("sparse_keys", sparse_keys)
323 .Attr("sparse_types", sparse_types)
324 .Attr("dense_keys", dense_keys)
325 .Attr("dense_shapes", dense_shapes)
326 .Finalize(g, &ret));
327
328 FixupSourceAndSinkEdges(g);
329 return g;
330 }
331
332 // Benchmark settings (Sparse, Dense) X (Bytes, Int64, Float)
333 typedef BenchmarkOptions<ExampleStore<BytesFiller>, kSparse> SparseString;
334 typedef BenchmarkOptions<ExampleStore<BytesFiller>, kDense> DenseString;
335 typedef BenchmarkOptions<ExampleStore<BytesFiller>, kVarLenDense>
336 VarLenDenseString;
337 typedef BenchmarkOptions<ExampleStore<BytesFiller>, kRagged> RaggedString;
338 typedef BenchmarkOptions<ExampleStore<Int64Filler>, kSparse> SparseInt64;
339 typedef BenchmarkOptions<ExampleStore<Int64Filler>, kDense> DenseInt64;
340 typedef BenchmarkOptions<ExampleStore<Int64Filler>, kVarLenDense>
341 VarLenDenseInt64;
342 typedef BenchmarkOptions<ExampleStore<Int64Filler>, kRagged> RaggedInt64;
343 typedef BenchmarkOptions<ExampleStore<FloatFiller>, kSparse> SparseFloat;
344 typedef BenchmarkOptions<ExampleStore<FloatFiller>, kDense> DenseFloat;
345 typedef BenchmarkOptions<ExampleStore<FloatFiller>, kVarLenDense>
346 VarLenDenseFloat;
347 typedef BenchmarkOptions<ExampleStore<FloatFiller>, kRagged> RaggedFloat;
348
349 // B == batch_size, K == num_keys. F == feature_size.
350 // K must be one of 10, 100, 1000
351 #define BM_ParseExample(TYPE, B, K, F) \
352 static void BM_ParseExample##_##TYPE##_##B##_##K##_##F( \
353 ::testing::benchmark::State& state) { \
354 int64 items_per_iter = static_cast<int64>(B) * K * F; \
355 test::Benchmark("cpu", ParseExample<TYPE>(B, K, F), nullptr, nullptr, \
356 nullptr, "SINGLE_THREADED_EXECUTOR", false) \
357 .Run(state); \
358 state.SetItemsProcessed(static_cast<int64>(state.iterations()) * \
359 items_per_iter); \
360 } \
361 BENCHMARK(BM_ParseExample##_##TYPE##_##B##_##K##_##F)->UseRealTime();
362
363 #define BM_AllParseExample(Type) \
364 BM_ParseExample(Type, 1, 10, 1); \
365 BM_ParseExample(Type, 128, 10, 1); \
366 BM_ParseExample(Type, 512, 10, 1); \
367 BM_ParseExample(Type, 1, 100, 1); \
368 BM_ParseExample(Type, 128, 100, 1); \
369 BM_ParseExample(Type, 512, 100, 1); \
370 BM_ParseExample(Type, 1, 1000, 1); \
371 BM_ParseExample(Type, 128, 1000, 1); \
372 BM_ParseExample(Type, 512, 1000, 1); \
373 BM_ParseExample(Type, 1, 1, 1000000);
374
375 BM_AllParseExample(SparseString);
376 BM_AllParseExample(DenseString);
377 BM_AllParseExample(VarLenDenseString);
378 BM_AllParseExample(SparseInt64);
379 BM_AllParseExample(DenseInt64);
380 BM_AllParseExample(VarLenDenseInt64);
381 BM_AllParseExample(SparseFloat);
382 BM_AllParseExample(DenseFloat);
383 BM_AllParseExample(VarLenDenseFloat);
384
385 // B == batch_size, K == num_keys. F == feature_size.
386 // K must be one of 10, 100, 1000
387 // B=0 indicates that a scalar input should be used (instead of a vector).
388 #define BM_ParseExampleV2(TYPE, B, K, F) \
389 static void BM_ParseExampleV2##_##TYPE##_##B##_##K##_##F( \
390 ::testing::benchmark::State& state) { \
391 int64 items_per_iter = static_cast<int64>(std::max(B, 1)) * K * F; \
392 test::Benchmark("cpu", ParseExampleV2<TYPE>(B, K, F), nullptr, nullptr, \
393 nullptr, "SINGLE_THREADED_EXECUTOR", \
394 /*old_benchmark_api=*/false) \
395 .Run(state); \
396 state.SetItemsProcessed(static_cast<int64>(state.iterations()) * \
397 items_per_iter); \
398 } \
399 BENCHMARK(BM_ParseExampleV2##_##TYPE##_##B##_##K##_##F)->UseRealTime();
400
401 #define BM_AllParseExampleV2(Type) \
402 /* Vector Inputs */ \
403 BM_ParseExampleV2(Type, 1, 10, 1); \
404 BM_ParseExampleV2(Type, 128, 10, 1); \
405 BM_ParseExampleV2(Type, 512, 10, 1); \
406 BM_ParseExampleV2(Type, 1, 100, 1); \
407 BM_ParseExampleV2(Type, 128, 100, 1); \
408 BM_ParseExampleV2(Type, 512, 100, 1); \
409 BM_ParseExampleV2(Type, 1, 1000, 1); \
410 BM_ParseExampleV2(Type, 128, 1000, 1); \
411 BM_ParseExampleV2(Type, 512, 1000, 1); \
412 BM_ParseExampleV2(Type, 1, 1, 1000000); \
413 /* Scalar Inputs */ \
414 BM_ParseExampleV2(Type, 0, 10, 1); \
415 BM_ParseExampleV2(Type, 0, 100, 1); \
416 BM_ParseExampleV2(Type, 0, 1000, 1); \
417 BM_ParseExampleV2(Type, 0, 1, 10); \
418 BM_ParseExampleV2(Type, 0, 1, 100); \
419 BM_ParseExampleV2(Type, 0, 1, 1000); \
420 BM_ParseExampleV2(Type, 0, 1, 10000); \
421 BM_ParseExampleV2(Type, 0, 1, 100000); \
422 BM_ParseExampleV2(Type, 0, 1, 1000000); \
423 BM_ParseExampleV2(Type, 0, 10, 100000); \
424 BM_ParseExampleV2(Type, 0, 100, 10000); \
425 BM_ParseExampleV2(Type, 0, 1000, 1000);
426
427 BM_AllParseExampleV2(SparseString);
428 BM_AllParseExampleV2(DenseString);
429 BM_AllParseExampleV2(VarLenDenseString);
430 BM_AllParseExampleV2(RaggedString);
431 BM_AllParseExampleV2(SparseInt64);
432 BM_AllParseExampleV2(DenseInt64);
433 BM_AllParseExampleV2(VarLenDenseInt64);
434 BM_AllParseExampleV2(RaggedInt64);
435 BM_AllParseExampleV2(SparseFloat);
436 BM_AllParseExampleV2(DenseFloat);
437 BM_AllParseExampleV2(VarLenDenseFloat);
438 BM_AllParseExampleV2(RaggedFloat);
439
440 // K == num_keys. F == feature_size.
441 // K must be one of 10, 100, 1000
442 #define BM_ParseSingleExample(TYPE, K, F) \
443 void BM_ParseSingleExample##_##TYPE##_1_##K##_##F( \
444 ::testing::benchmark::State& state) { \
445 int64 items_per_iter = K * F; \
446 test::Benchmark("cpu", ParseSingleExample<TYPE>(K, F), nullptr, nullptr, \
447 nullptr, "SINGLE_THREADED_EXECUTOR", \
448 /*old_benchmark_api=*/false) \
449 .Run(state); \
450 state.SetItemsProcessed(static_cast<int64>(state.iterations()) * \
451 items_per_iter); \
452 } \
453 BENCHMARK(BM_ParseSingleExample##_##TYPE##_1_##K##_##F)->UseRealTime();
454
455 #define BM_AllParseSingleExample(Type) \
456 BM_ParseSingleExample(Type, 10, 1); \
457 BM_ParseSingleExample(Type, 100, 1); \
458 BM_ParseSingleExample(Type, 1000, 1); \
459 BM_ParseSingleExample(Type, 1, 10); \
460 BM_ParseSingleExample(Type, 1, 100); \
461 BM_ParseSingleExample(Type, 1, 1000); \
462 BM_ParseSingleExample(Type, 1, 10000); \
463 BM_ParseSingleExample(Type, 1, 100000); \
464 BM_ParseSingleExample(Type, 1, 1000000); \
465 BM_ParseSingleExample(Type, 10, 100000); \
466 BM_ParseSingleExample(Type, 100, 10000); \
467 BM_ParseSingleExample(Type, 1000, 1000);
468
469 BM_AllParseSingleExample(SparseString);
470 BM_AllParseSingleExample(DenseString);
471 BM_AllParseSingleExample(VarLenDenseString);
472 BM_AllParseSingleExample(SparseInt64);
473 BM_AllParseSingleExample(DenseInt64);
474 BM_AllParseSingleExample(VarLenDenseInt64);
475 BM_AllParseSingleExample(SparseFloat);
476 BM_AllParseSingleExample(DenseFloat);
477 BM_AllParseSingleExample(VarLenDenseFloat);
478
479 } // end namespace tensorflow
480