1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
17 #include "tensorflow/core/framework/tensor.h"
18 #include "tensorflow/core/framework/tensor_testutil.h"
19 #include "tensorflow/core/graph/node_builder.h"
20 #include "tensorflow/core/lib/random/random.h"
21 #include "tensorflow/core/platform/test.h"
22 #include "tensorflow/core/platform/test_benchmark.h"
23 #include "tensorflow/core/public/session_options.h"
24 
25 namespace tensorflow {
26 
27 namespace {
28 
GetSingleThreadedOptions()29 const SessionOptions* GetSingleThreadedOptions() {
30   static const SessionOptions* const kSessionOptions = []() {
31     SessionOptions* const result = new SessionOptions();
32     result->config.set_intra_op_parallelism_threads(1);
33     result->config.set_inter_op_parallelism_threads(1);
34     result->config.add_session_inter_op_thread_pool()->set_num_threads(1);
35     return result;
36   }();
37   return kSessionOptions;
38 }
39 
GetMultiThreadedOptions()40 const SessionOptions* GetMultiThreadedOptions() {
41   static const SessionOptions* const kSessionOptions = []() {
42     SessionOptions* const result = new SessionOptions();
43     result->config.set_intra_op_parallelism_threads(0);  // Auto-configured.
44     result->config.set_inter_op_parallelism_threads(0);  // Auto-configured.
45     result->config.add_session_inter_op_thread_pool()->set_num_threads(
46         0);  // Auto-configured.
47     return result;
48   }();
49   return kSessionOptions;
50 }
51 
Var(Graph * const g,const int n)52 Node* Var(Graph* const g, const int n) {
53   return test::graph::Var(g, DT_FLOAT, TensorShape({n}));
54 }
55 
56 // Returns a vector of size 'nodes' with each node being of size 'node_size'.
VarVector(Graph * const g,const int nodes,const int node_size)57 std::vector<Node*> VarVector(Graph* const g, const int nodes,
58                              const int node_size) {
59   std::vector<Node*> result;
60   result.reserve(nodes);
61   for (int i = 0; i < nodes; ++i) {
62     result.push_back(Var(g, node_size));
63   }
64   return result;
65 }
66 
Zeros(Graph * const g,const TensorShape & shape)67 Node* Zeros(Graph* const g, const TensorShape& shape) {
68   Tensor data(DT_FLOAT, shape);
69   data.flat<float>().setZero();
70   return test::graph::Constant(g, data);
71 }
72 
Zeros(Graph * const g,const int n)73 Node* Zeros(Graph* const g, const int n) { return Zeros(g, TensorShape({n})); }
74 
Ones(Graph * const g,const int n)75 Node* Ones(Graph* const g, const int n) {
76   Tensor data(DT_FLOAT, TensorShape({n}));
77   test::FillFn<float>(&data, [](const int i) { return 1.0f; });
78   return test::graph::Constant(g, data);
79 }
80 
SparseIndices(Graph * const g,const int sparse_features_per_group)81 Node* SparseIndices(Graph* const g, const int sparse_features_per_group) {
82   Tensor data(DT_INT64, TensorShape({sparse_features_per_group}));
83   test::FillFn<int64>(&data, [&](const int i) { return i; });
84   return test::graph::Constant(g, data);
85 }
86 
SparseExampleIndices(Graph * const g,const int sparse_features_per_group,const int num_examples)87 Node* SparseExampleIndices(Graph* const g, const int sparse_features_per_group,
88                            const int num_examples) {
89   const int x_size = num_examples * 4;
90   Tensor data(DT_INT64, TensorShape({x_size}));
91   test::FillFn<int64>(&data, [&](const int i) { return i / 4; });
92   return test::graph::Constant(g, data);
93 }
94 
SparseFeatureIndices(Graph * const g,const int sparse_features_per_group,const int num_examples)95 Node* SparseFeatureIndices(Graph* const g, const int sparse_features_per_group,
96                            const int num_examples) {
97   const int x_size = num_examples * 4;
98   Tensor data(DT_INT64, TensorShape({x_size}));
99   test::FillFn<int64>(
100       &data, [&](const int i) { return i % sparse_features_per_group; });
101   return test::graph::Constant(g, data);
102 }
103 
RandomZeroOrOne(Graph * const g,const int n)104 Node* RandomZeroOrOne(Graph* const g, const int n) {
105   Tensor data(DT_FLOAT, TensorShape({n}));
106   test::FillFn<float>(&data, [](const int i) {
107     // Fill with 0.0 or 1.0 at random.
108     return (random::New64() % 2) == 0 ? 0.0f : 1.0f;
109   });
110   return test::graph::Constant(g, data);
111 }
112 
RandomZeroOrOneMatrix(Graph * const g,const int n,int d)113 Node* RandomZeroOrOneMatrix(Graph* const g, const int n, int d) {
114   Tensor data(DT_FLOAT, TensorShape({n, d}));
115   test::FillFn<float>(&data, [](const int i) {
116     // Fill with 0.0 or 1.0 at random.
117     return (random::New64() % 2) == 0 ? 0.0f : 1.0f;
118   });
119   return test::graph::Constant(g, data);
120 }
121 
GetGraphs(const int32 num_examples,const int32 num_sparse_feature_groups,const int32 sparse_features_per_group,const int32 num_dense_feature_groups,const int32 dense_features_per_group,Graph ** const init_g,Graph ** train_g)122 void GetGraphs(const int32 num_examples, const int32 num_sparse_feature_groups,
123                const int32 sparse_features_per_group,
124                const int32 num_dense_feature_groups,
125                const int32 dense_features_per_group, Graph** const init_g,
126                Graph** train_g) {
127   {
128     // Build initialization graph
129     Graph* g = new Graph(OpRegistry::Global());
130 
131     // These nodes have to be created first, and in the same way as the
132     // nodes in the graph below.
133     std::vector<Node*> sparse_weight_nodes =
134         VarVector(g, num_sparse_feature_groups, sparse_features_per_group);
135     std::vector<Node*> dense_weight_nodes =
136         VarVector(g, num_dense_feature_groups, dense_features_per_group);
137     Node* const multi_zero = Zeros(g, sparse_features_per_group);
138     for (Node* n : sparse_weight_nodes) {
139       test::graph::Assign(g, n, multi_zero);
140     }
141     Node* const zero = Zeros(g, dense_features_per_group);
142     for (Node* n : dense_weight_nodes) {
143       test::graph::Assign(g, n, zero);
144     }
145 
146     *init_g = g;
147   }
148 
149   {
150     // Build execution graph
151     Graph* g = new Graph(OpRegistry::Global());
152 
153     // These nodes have to be created first, and in the same way as the
154     // nodes in the graph above.
155     std::vector<Node*> sparse_weight_nodes =
156         VarVector(g, num_sparse_feature_groups, sparse_features_per_group);
157     std::vector<Node*> dense_weight_nodes =
158         VarVector(g, num_dense_feature_groups, dense_features_per_group);
159 
160     std::vector<NodeBuilder::NodeOut> sparse_indices;
161     std::vector<NodeBuilder::NodeOut> sparse_weights;
162     for (Node* n : sparse_weight_nodes) {
163       sparse_indices.push_back(
164           NodeBuilder::NodeOut(SparseIndices(g, sparse_features_per_group)));
165       sparse_weights.push_back(NodeBuilder::NodeOut(n));
166     }
167     std::vector<NodeBuilder::NodeOut> dense_weights;
168     dense_weights.reserve(dense_weight_nodes.size());
169     for (Node* n : dense_weight_nodes) {
170       dense_weights.push_back(NodeBuilder::NodeOut(n));
171     }
172 
173     std::vector<NodeBuilder::NodeOut> sparse_example_indices;
174     std::vector<NodeBuilder::NodeOut> sparse_feature_indices;
175     std::vector<NodeBuilder::NodeOut> sparse_values;
176     sparse_example_indices.reserve(num_sparse_feature_groups);
177     for (int i = 0; i < num_sparse_feature_groups; ++i) {
178       sparse_example_indices.push_back(NodeBuilder::NodeOut(
179           SparseExampleIndices(g, sparse_features_per_group, num_examples)));
180     }
181     sparse_feature_indices.reserve(num_sparse_feature_groups);
182     for (int i = 0; i < num_sparse_feature_groups; ++i) {
183       sparse_feature_indices.push_back(NodeBuilder::NodeOut(
184           SparseFeatureIndices(g, sparse_features_per_group, num_examples)));
185     }
186     sparse_values.reserve(num_sparse_feature_groups);
187     for (int i = 0; i < num_sparse_feature_groups; ++i) {
188       sparse_values.push_back(
189           NodeBuilder::NodeOut(RandomZeroOrOne(g, num_examples * 4)));
190     }
191 
192     std::vector<NodeBuilder::NodeOut> dense_features;
193     dense_features.reserve(num_dense_feature_groups);
194     for (int i = 0; i < num_dense_feature_groups; ++i) {
195       dense_features.push_back(NodeBuilder::NodeOut(
196           RandomZeroOrOneMatrix(g, num_examples, dense_features_per_group)));
197     }
198 
199     Node* const weights = Ones(g, num_examples);
200     Node* const labels = RandomZeroOrOne(g, num_examples);
201     Node* const example_state_data = Zeros(g, TensorShape({num_examples, 4}));
202 
203     Node* sdca = nullptr;
204     TF_CHECK_OK(
205         NodeBuilder(g->NewName("sdca"), "SdcaOptimizer")
206             .Attr("loss_type", "logistic_loss")
207             .Attr("num_sparse_features", num_sparse_feature_groups)
208             .Attr("num_sparse_features_with_values", num_sparse_feature_groups)
209             .Attr("num_dense_features", num_dense_feature_groups)
210             .Attr("l1", 0.0)
211             .Attr("l2", 1.0)
212             .Attr("num_loss_partitions", 1)
213             .Attr("num_inner_iterations", 2)
214             .Input(sparse_example_indices)
215             .Input(sparse_feature_indices)
216             .Input(sparse_values)
217             .Input(dense_features)
218             .Input(weights)
219             .Input(labels)
220             .Input(sparse_indices)
221             .Input(sparse_weights)
222             .Input(dense_weights)
223             .Input(example_state_data)
224             .Finalize(g, &sdca));
225 
226     *train_g = g;
227   }
228 }
229 
BM_SDCA(const int iters,const int num_examples)230 void BM_SDCA(const int iters, const int num_examples) {
231   testing::StopTiming();
232   Graph* init = nullptr;
233   Graph* train = nullptr;
234   GetGraphs(num_examples, 20 /* sparse feature groups */,
235             5 /* sparse features per group */, 1 /* dense feature groups*/,
236             20 /* dense features per group */, &init, &train);
237   testing::StartTiming();
238   test::Benchmark("cpu", train, GetSingleThreadedOptions(), init).Run(iters);
239 }
240 
BM_SDCA_LARGE_DENSE(const int iters,const int num_examples)241 void BM_SDCA_LARGE_DENSE(const int iters, const int num_examples) {
242   testing::StopTiming();
243   Graph* init = nullptr;
244   Graph* train = nullptr;
245   GetGraphs(num_examples, 0 /* sparse feature groups */,
246             0 /* sparse features per group */, 5 /* dense feature groups*/,
247             200000 /* dense features per group */, &init, &train);
248   testing::StartTiming();
249   test::Benchmark("cpu", train, GetSingleThreadedOptions(), init).Run(iters);
250 }
251 
BM_SDCA_LARGE_SPARSE(const int iters,const int num_examples)252 void BM_SDCA_LARGE_SPARSE(const int iters, const int num_examples) {
253   testing::StopTiming();
254   Graph* init = nullptr;
255   Graph* train = nullptr;
256   GetGraphs(num_examples, 65 /* sparse feature groups */,
257             1e6 /* sparse features per group */, 0 /* dense feature groups*/,
258             0 /* dense features per group */, &init, &train);
259   testing::StartTiming();
260   test::Benchmark("cpu", train, GetMultiThreadedOptions(), init).Run(iters);
261 }
262 }  // namespace
263 
264 BENCHMARK(BM_SDCA)->Arg(128)->Arg(256)->Arg(512)->Arg(1024);
265 BENCHMARK(BM_SDCA_LARGE_DENSE)->Arg(128)->Arg(256)->Arg(512)->Arg(1024);
266 BENCHMARK(BM_SDCA_LARGE_SPARSE)->Arg(128)->Arg(256)->Arg(512)->Arg(1024);
267 
268 }  // namespace tensorflow
269