1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <algorithm>
7 #include <cmath>
8 #include <functional>
9 #include <random>
10 #include <vector>
11 
12 #include <xnnpack.h>
13 
14 #include <benchmark/benchmark.h>
15 
16 #include "bench/end2end.h"
17 #include "bench/utils.h"
18 #include "models/models.h"
19 #include <xnnpack/dwconv.h>
20 #include <xnnpack/params.h>
21 
22 
DWConvEnd2EndBenchmark(benchmark::State & state,models::ExecutionPlanFactory model_factory,xnn_f32_dwconv_minmax_unipass_ukernel_function dwconv,uint8_t channel_tile,uint8_t primary_tile,benchmark::utils::IsaCheckFunction isa_check=nullptr)23 static void DWConvEnd2EndBenchmark(
24   benchmark::State& state,
25   models::ExecutionPlanFactory model_factory,
26   xnn_f32_dwconv_minmax_unipass_ukernel_function dwconv,
27   uint8_t channel_tile, uint8_t primary_tile,
28   benchmark::utils::IsaCheckFunction isa_check = nullptr)
29 {
30   if (isa_check && !isa_check(state)) {
31     return;
32   }
33   if (xnn_initialize(nullptr /* allocator */) != xnn_status_success) {
34     state.SkipWithError("failed to initialize XNNPACK");
35     return;
36   }
37 
38   // Override microkernels chosen in xnn_initialize
39   for (size_t i = 0; i < XNN_MAX_F32_DWCONV_UKERNELS; i++) {
40     // Replace only the microkernel the matching kernel size.
41     if (xnn_params.f32.dwconv[i].primary_tile == primary_tile) {
42       // Note: do not directly assign to xnn_params.f32.dwconv[i] because it breaks older gcc.
43       xnn_params.f32.dwconv[i].minmax.unipass = xnn_dwconv_unipass_ukernel_function(dwconv);
44       xnn_params.f32.dwconv[i].channel_tile = channel_tile;
45       xnn_params.f32.dwconv[i].primary_tile = primary_tile;
46       xnn_params.f32.dwconv[i].incremental_tile = 0;
47       break;
48     }
49   }
50 
51   auto execution_plan = model_factory(nullptr);
52   if (execution_plan.empty()) {
53     state.SkipWithError("failed to create a model");
54     return;
55   }
56 
57   for (auto _ : state) {
58     for (const std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)>& op : execution_plan) {
59       xnn_status status = xnn_run_operator(op.get(), nullptr);
60       if (status != xnn_status_success) {
61         state.SkipWithError("failed to run a model");
62         return;
63       }
64     }
65   }
66 
67   const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
68   if (cpu_frequency != 0) {
69     state.counters["cpufreq"] = cpu_frequency;
70   }
71 }
72 
73 #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
f32_dwconv_up4x9__aarch64_neonfma(benchmark::State & state,models::ExecutionPlanFactory model)74   static void f32_dwconv_up4x9__aarch64_neonfma(benchmark::State& state, models::ExecutionPlanFactory model) {
75     DWConvEnd2EndBenchmark(state, model,
76       xnn_f32_dwconv_minmax_ukernel_up4x9__aarch64_neonfma,
77       4 /* cr */, 9 /* mr */);
78   }
79 
f32_dwconv_up4x9__aarch64_neonfma_cortex_a55(benchmark::State & state,models::ExecutionPlanFactory model)80   static void f32_dwconv_up4x9__aarch64_neonfma_cortex_a55(benchmark::State& state, models::ExecutionPlanFactory model) {
81     DWConvEnd2EndBenchmark(state, model,
82       xnn_f32_dwconv_minmax_ukernel_up4x9__aarch64_neonfma_cortex_a55,
83       4 /* cr */, 9 /* mr */);
84   }
85 
86   BENCHMARK_FP32_END2END(f32_dwconv_up4x9__aarch64_neonfma);
87   BENCHMARK_FP32_END2END(f32_dwconv_up4x9__aarch64_neonfma_cortex_a55);
88 #endif  // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
89 
90 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
f32_dwconv_up4x9__neon(benchmark::State & state,models::ExecutionPlanFactory model)91   static void f32_dwconv_up4x9__neon(benchmark::State& state, models::ExecutionPlanFactory model) {
92     DWConvEnd2EndBenchmark(state, model,
93       xnn_f32_dwconv_minmax_ukernel_up4x9__neon,
94       4 /* cr */, 9 /* mr */, benchmark::utils::CheckNEON);
95   }
96 
f32_dwconv_up4x9__neon_acc2(benchmark::State & state,models::ExecutionPlanFactory model)97   static void f32_dwconv_up4x9__neon_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
98     DWConvEnd2EndBenchmark(state, model,
99       xnn_f32_dwconv_minmax_ukernel_up4x9__neon_acc2,
100       4 /* cr */, 9 /* mr */, benchmark::utils::CheckNEON);
101   }
102 
f32_dwconv_up8x9__neon(benchmark::State & state,models::ExecutionPlanFactory model)103   static void f32_dwconv_up8x9__neon(benchmark::State& state, models::ExecutionPlanFactory model) {
104     DWConvEnd2EndBenchmark(state, model,
105       xnn_f32_dwconv_minmax_ukernel_up8x9__neon,
106       8 /* cr */, 9 /* mr */, benchmark::utils::CheckNEON);
107   }
108 
f32_dwconv_up8x9__neon_acc2(benchmark::State & state,models::ExecutionPlanFactory model)109   static void f32_dwconv_up8x9__neon_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
110     DWConvEnd2EndBenchmark(state, model,
111       xnn_f32_dwconv_minmax_ukernel_up8x9__neon_acc2,
112       8 /* cr */, 9 /* mr */, benchmark::utils::CheckNEON);
113   }
114 
f32_dwconv_up4x9__neonfma(benchmark::State & state,models::ExecutionPlanFactory model)115   static void f32_dwconv_up4x9__neonfma(benchmark::State& state, models::ExecutionPlanFactory model) {
116     DWConvEnd2EndBenchmark(state, model,
117       xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma,
118       4 /* cr */, 9 /* mr */, benchmark::utils::CheckNEONFMA);
119   }
120 
f32_dwconv_up4x9__neonfma_acc2(benchmark::State & state,models::ExecutionPlanFactory model)121   static void f32_dwconv_up4x9__neonfma_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
122     DWConvEnd2EndBenchmark(state, model,
123       xnn_f32_dwconv_minmax_ukernel_up4x9__neonfma_acc2,
124       4 /* cr */, 9 /* mr */, benchmark::utils::CheckNEONFMA);
125   }
126 
f32_dwconv_up8x9__neonfma(benchmark::State & state,models::ExecutionPlanFactory model)127   static void f32_dwconv_up8x9__neonfma(benchmark::State& state, models::ExecutionPlanFactory model) {
128     DWConvEnd2EndBenchmark(state, model,
129       xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma,
130       8 /* cr */, 9 /* mr */, benchmark::utils::CheckNEONFMA);
131   }
132 
f32_dwconv_up8x9__neonfma_acc2(benchmark::State & state,models::ExecutionPlanFactory model)133   static void f32_dwconv_up8x9__neonfma_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
134     DWConvEnd2EndBenchmark(state, model,
135       xnn_f32_dwconv_minmax_ukernel_up8x9__neonfma_acc2,
136       8 /* cr */, 9 /* mr */, benchmark::utils::CheckNEONFMA);
137   }
138 
139   BENCHMARK_FP32_END2END(f32_dwconv_up4x9__neon);
140   BENCHMARK_FP32_END2END(f32_dwconv_up4x9__neon_acc2);
141   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__neon);
142   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__neon_acc2);
143 
144   BENCHMARK_FP32_END2END(f32_dwconv_up4x9__neonfma);
145   BENCHMARK_FP32_END2END(f32_dwconv_up4x9__neonfma_acc2);
146   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__neonfma);
147   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__neonfma_acc2);
148 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
149 
150 
151 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
f32_dwconv_up4x9__sse(benchmark::State & state,models::ExecutionPlanFactory model)152   static void f32_dwconv_up4x9__sse(benchmark::State& state, models::ExecutionPlanFactory model) {
153     DWConvEnd2EndBenchmark(state, model,
154       xnn_f32_dwconv_minmax_ukernel_up4x9__sse,
155       4 /* cr */, 9 /* mr */);
156   }
f32_dwconv_up4x9__sse_acc2(benchmark::State & state,models::ExecutionPlanFactory model)157   static void f32_dwconv_up4x9__sse_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
158     DWConvEnd2EndBenchmark(state, model,
159       xnn_f32_dwconv_minmax_ukernel_up4x9__sse_acc2,
160       4 /* cr */, 9 /* mr */);
161   }
f32_dwconv_up8x9__sse(benchmark::State & state,models::ExecutionPlanFactory model)162   static void f32_dwconv_up8x9__sse(benchmark::State& state, models::ExecutionPlanFactory model) {
163     DWConvEnd2EndBenchmark(state, model,
164       xnn_f32_dwconv_minmax_ukernel_up8x9__sse,
165       8 /* cr */, 9 /* mr */);
166   }
f32_dwconv_up8x9__sse_acc2(benchmark::State & state,models::ExecutionPlanFactory model)167   static void f32_dwconv_up8x9__sse_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
168     DWConvEnd2EndBenchmark(state, model,
169       xnn_f32_dwconv_minmax_ukernel_up8x9__sse_acc2,
170       8 /* cr */, 9 /* mr */);
171   }
172 
f32_dwconv_up8x9__avx(benchmark::State & state,models::ExecutionPlanFactory model)173   static void f32_dwconv_up8x9__avx(benchmark::State& state, models::ExecutionPlanFactory model) {
174     DWConvEnd2EndBenchmark(state, model,
175       xnn_f32_dwconv_minmax_ukernel_up8x9__avx,
176       8 /* cr */, 9 /* mr */, benchmark::utils::CheckAVX);
177   }
f32_dwconv_up8x9__avx_acc2(benchmark::State & state,models::ExecutionPlanFactory model)178   static void f32_dwconv_up8x9__avx_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
179     DWConvEnd2EndBenchmark(state, model,
180       xnn_f32_dwconv_minmax_ukernel_up8x9__avx_acc2,
181       8 /* cr */, 9 /* mr */, benchmark::utils::CheckAVX);
182   }
f32_dwconv_up16x9__avx(benchmark::State & state,models::ExecutionPlanFactory model)183   static void f32_dwconv_up16x9__avx(benchmark::State& state, models::ExecutionPlanFactory model) {
184     DWConvEnd2EndBenchmark(state, model,
185       xnn_f32_dwconv_minmax_ukernel_up16x9__avx,
186       16 /* cr */, 9 /* mr */, benchmark::utils::CheckAVX);
187   }
f32_dwconv_up16x9__avx_acc2(benchmark::State & state,models::ExecutionPlanFactory model)188   static void f32_dwconv_up16x9__avx_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
189     DWConvEnd2EndBenchmark(state, model,
190       xnn_f32_dwconv_minmax_ukernel_up16x9__avx_acc2,
191       16 /* cr */, 9 /* mr */, benchmark::utils::CheckAVX);
192   }
193 
f32_dwconv_up8x9__fma3(benchmark::State & state,models::ExecutionPlanFactory model)194   static void f32_dwconv_up8x9__fma3(benchmark::State& state, models::ExecutionPlanFactory model) {
195     DWConvEnd2EndBenchmark(state, model,
196       xnn_f32_dwconv_minmax_ukernel_up8x9__fma3,
197       8 /* cr */, 9 /* mr */, benchmark::utils::CheckFMA3);
198   }
f32_dwconv_up8x9__fma3_acc2(benchmark::State & state,models::ExecutionPlanFactory model)199   static void f32_dwconv_up8x9__fma3_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
200     DWConvEnd2EndBenchmark(state, model,
201       xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2,
202       8 /* cr */, 9 /* mr */, benchmark::utils::CheckFMA3);
203   }
f32_dwconv_up16x9__fma3(benchmark::State & state,models::ExecutionPlanFactory model)204   static void f32_dwconv_up16x9__fma3(benchmark::State& state, models::ExecutionPlanFactory model) {
205     DWConvEnd2EndBenchmark(state, model,
206       xnn_f32_dwconv_minmax_ukernel_up16x9__fma3,
207       16 /* cr */, 9 /* mr */, benchmark::utils::CheckFMA3);
208   }
f32_dwconv_up16x9__fma3_acc2(benchmark::State & state,models::ExecutionPlanFactory model)209   static void f32_dwconv_up16x9__fma3_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
210     DWConvEnd2EndBenchmark(state, model,
211       xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2,
212       16 /* cr */, 9 /* mr */, benchmark::utils::CheckFMA3);
213   }
214 
f32_dwconv_up16x9__avx512f(benchmark::State & state,models::ExecutionPlanFactory model)215   static void f32_dwconv_up16x9__avx512f(benchmark::State& state, models::ExecutionPlanFactory model) {
216     DWConvEnd2EndBenchmark(state, model,
217       xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f,
218       16 /* cr */, 9 /* mr */, benchmark::utils::CheckAVX512F);
219   }
f32_dwconv_up16x9__avx512f_acc2(benchmark::State & state,models::ExecutionPlanFactory model)220   static void f32_dwconv_up16x9__avx512f_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
221     DWConvEnd2EndBenchmark(state, model,
222       xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2,
223       16 /* cr */, 9 /* mr */, benchmark::utils::CheckAVX512F);
224   }
f32_dwconv_up32x9__avx512f(benchmark::State & state,models::ExecutionPlanFactory model)225   static void f32_dwconv_up32x9__avx512f(benchmark::State& state, models::ExecutionPlanFactory model) {
226     DWConvEnd2EndBenchmark(state, model,
227       xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f,
228       32 /* cr */, 9 /* mr */, benchmark::utils::CheckAVX512F);
229   }
f32_dwconv_up32x9__avx512f_acc2(benchmark::State & state,models::ExecutionPlanFactory model)230   static void f32_dwconv_up32x9__avx512f_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
231     DWConvEnd2EndBenchmark(state, model,
232       xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2,
233       32 /* cr */, 9 /* mr */, benchmark::utils::CheckAVX512F);
234   }
235 
236   BENCHMARK_FP32_END2END(f32_dwconv_up16x9__avx512f);
237   BENCHMARK_FP32_END2END(f32_dwconv_up16x9__avx512f_acc2);
238   BENCHMARK_FP32_END2END(f32_dwconv_up32x9__avx512f);
239   BENCHMARK_FP32_END2END(f32_dwconv_up32x9__avx512f_acc2);
240 
241   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__fma3);
242   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__fma3_acc2);
243   BENCHMARK_FP32_END2END(f32_dwconv_up16x9__fma3);
244   BENCHMARK_FP32_END2END(f32_dwconv_up16x9__fma3_acc2);
245 
246   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__avx);
247   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__avx_acc2);
248   BENCHMARK_FP32_END2END(f32_dwconv_up16x9__avx);
249   BENCHMARK_FP32_END2END(f32_dwconv_up16x9__avx_acc2);
250 
251   BENCHMARK_FP32_END2END(f32_dwconv_up4x9__sse);
252   BENCHMARK_FP32_END2END(f32_dwconv_up4x9__sse_acc2);
253   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__sse);
254   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__sse_acc2);
255 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
256 
257 #if XNN_ARCH_WASMSIMD
f32_dwconv_up4x9__wasmsimd_arm(benchmark::State & state,models::ExecutionPlanFactory model)258   static void f32_dwconv_up4x9__wasmsimd_arm(benchmark::State& state, models::ExecutionPlanFactory model) {
259     DWConvEnd2EndBenchmark(state, model,
260       xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm,
261       4 /* cr */, 9 /* mr */);
262   }
263 
f32_dwconv_up4x9__wasmsimd_arm_acc2(benchmark::State & state,models::ExecutionPlanFactory model)264   static void f32_dwconv_up4x9__wasmsimd_arm_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
265     DWConvEnd2EndBenchmark(state, model,
266       xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_arm_acc2,
267       4 /* cr */, 9 /* mr */);
268   }
269 
f32_dwconv_up8x9__wasmsimd_arm(benchmark::State & state,models::ExecutionPlanFactory model)270   static void f32_dwconv_up8x9__wasmsimd_arm(benchmark::State& state, models::ExecutionPlanFactory model) {
271     DWConvEnd2EndBenchmark(state, model,
272       xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm,
273       8 /* cr */, 9 /* mr */);
274   }
275 
f32_dwconv_up8x9__wasmsimd_arm_acc2(benchmark::State & state,models::ExecutionPlanFactory model)276   static void f32_dwconv_up8x9__wasmsimd_arm_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
277     DWConvEnd2EndBenchmark(state, model,
278       xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_arm_acc2,
279       8 /* cr */, 9 /* mr */);
280   }
281 
f32_dwconv_up4x9__wasmsimd_x86(benchmark::State & state,models::ExecutionPlanFactory model)282   static void f32_dwconv_up4x9__wasmsimd_x86(benchmark::State& state, models::ExecutionPlanFactory model) {
283     DWConvEnd2EndBenchmark(state, model,
284       xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86,
285       4 /* cr */, 9 /* mr */);
286   }
287 
f32_dwconv_up4x9__wasmsimd_x86_acc2(benchmark::State & state,models::ExecutionPlanFactory model)288   static void f32_dwconv_up4x9__wasmsimd_x86_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
289     DWConvEnd2EndBenchmark(state, model,
290       xnn_f32_dwconv_minmax_ukernel_up4x9__wasmsimd_x86_acc2,
291       4 /* cr */, 9 /* mr */);
292   }
293 
f32_dwconv_up8x9__wasmsimd_x86(benchmark::State & state,models::ExecutionPlanFactory model)294   static void f32_dwconv_up8x9__wasmsimd_x86(benchmark::State& state, models::ExecutionPlanFactory model) {
295     DWConvEnd2EndBenchmark(state, model,
296       xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86,
297       8 /* cr */, 9 /* mr */);
298   }
299 
f32_dwconv_up8x9__wasmsimd_x86_acc2(benchmark::State & state,models::ExecutionPlanFactory model)300   static void f32_dwconv_up8x9__wasmsimd_x86_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
301     DWConvEnd2EndBenchmark(state, model,
302       xnn_f32_dwconv_minmax_ukernel_up8x9__wasmsimd_x86_acc2,
303       8 /* cr */, 9 /* mr */);
304   }
305 
306   BENCHMARK_FP32_END2END(f32_dwconv_up4x9__wasmsimd_arm);
307   BENCHMARK_FP32_END2END(f32_dwconv_up4x9__wasmsimd_arm_acc2);
308   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__wasmsimd_arm);
309   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__wasmsimd_arm_acc2);
310 
311   BENCHMARK_FP32_END2END(f32_dwconv_up4x9__wasmsimd_x86);
312   BENCHMARK_FP32_END2END(f32_dwconv_up4x9__wasmsimd_x86_acc2);
313   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__wasmsimd_x86);
314   BENCHMARK_FP32_END2END(f32_dwconv_up8x9__wasmsimd_x86_acc2);
315 #endif  // XNN_ARCH_WASMSIMD
316 
f32_dwconv_up1x9__scalar(benchmark::State & state,models::ExecutionPlanFactory model)317 static void f32_dwconv_up1x9__scalar(benchmark::State& state, models::ExecutionPlanFactory model) {
318   DWConvEnd2EndBenchmark(state, model,
319     xnn_f32_dwconv_minmax_ukernel_up1x9__scalar,
320       1 /* cr */, 9 /* mr */);
321 }
322 
f32_dwconv_up1x9__scalar_acc2(benchmark::State & state,models::ExecutionPlanFactory model)323 static void f32_dwconv_up1x9__scalar_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
324   DWConvEnd2EndBenchmark(state, model,
325     xnn_f32_dwconv_minmax_ukernel_up1x9__scalar_acc2,
326       1 /* cr */, 9 /* mr */);
327 }
328 
f32_dwconv_up2x9__scalar(benchmark::State & state,models::ExecutionPlanFactory model)329 static void f32_dwconv_up2x9__scalar(benchmark::State& state, models::ExecutionPlanFactory model) {
330   DWConvEnd2EndBenchmark(state, model,
331     xnn_f32_dwconv_minmax_ukernel_up2x9__scalar,
332       2 /* cr */, 9 /* mr */);
333 }
334 
f32_dwconv_up2x9__scalar_acc2(benchmark::State & state,models::ExecutionPlanFactory model)335 static void f32_dwconv_up2x9__scalar_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
336   DWConvEnd2EndBenchmark(state, model,
337     xnn_f32_dwconv_minmax_ukernel_up2x9__scalar_acc2,
338       2 /* cr */, 9 /* mr */);
339 }
340 
341 BENCHMARK_FP32_END2END(f32_dwconv_up1x9__scalar);
342 BENCHMARK_FP32_END2END(f32_dwconv_up1x9__scalar_acc2);
343 BENCHMARK_FP32_END2END(f32_dwconv_up2x9__scalar);
344 BENCHMARK_FP32_END2END(f32_dwconv_up2x9__scalar_acc2);
345 
346 #ifndef XNNPACK_BENCHMARK_NO_MAIN
347 BENCHMARK_MAIN();
348 #endif
349