1 /*
2  * Microbenchmark for math functions.
3  *
4  * Copyright (c) 2018, Arm Limited.
5  * SPDX-License-Identifier: MIT
6  */
7 
8 #undef _GNU_SOURCE
9 #define _GNU_SOURCE 1
10 #include <stdint.h>
11 #include <stdlib.h>
12 #include <stdio.h>
13 #include <string.h>
14 #include <time.h>
15 #include <math.h>
16 #include "mathlib.h"
17 
18 #ifndef WANT_VMATH
19 /* Enable the build of vector math code.  */
20 # define WANT_VMATH 1
21 #endif
22 
23 /* Number of measurements, best result is reported.  */
24 #define MEASURE 60
25 /* Array size.  */
26 #define N 8000
27 /* Iterations over the array.  */
28 #define ITER 125
29 
30 static double *Trace;
31 static size_t trace_size;
32 static double A[N];
33 static float Af[N];
34 static long measurecount = MEASURE;
35 static long itercount = ITER;
36 
37 #if __aarch64__ && WANT_VMATH
38 typedef __f64x2_t v_double;
39 
40 #define v_double_len() 2
41 
42 static inline v_double
43 v_double_load (const double *p)
44 {
45   return (v_double){p[0], p[1]};
46 }
47 
48 static inline v_double
49 v_double_dup (double x)
50 {
51   return (v_double){x, x};
52 }
53 
54 typedef __f32x4_t v_float;
55 
56 #define v_float_len() 4
57 
58 static inline v_float
59 v_float_load (const float *p)
60 {
61   return (v_float){p[0], p[1], p[2], p[3]};
62 }
63 
64 static inline v_float
65 v_float_dup (float x)
66 {
67   return (v_float){x, x, x, x};
68 }
69 #else
70 /* dummy definitions to make things compile.  */
71 typedef double v_double;
72 typedef float v_float;
73 #define v_double_len(x) 1
74 #define v_double_load(x) (x)[0]
75 #define v_double_dup(x) (x)
76 #define v_float_len(x) 1
77 #define v_float_load(x) (x)[0]
78 #define v_float_dup(x) (x)
79 #endif
80 
81 static double
82 dummy (double x)
83 {
84   return x;
85 }
86 
87 static float
88 dummyf (float x)
89 {
90   return x;
91 }
92 
93 #if WANT_VMATH
94 #if __aarch64__
95 static v_double
96 __v_dummy (v_double x)
97 {
98   return x;
99 }
100 
101 static v_float
102 __v_dummyf (v_float x)
103 {
104   return x;
105 }
106 
107 #ifdef __vpcs
108 __vpcs static v_double
109 __vn_dummy (v_double x)
110 {
111   return x;
112 }
113 
114 __vpcs static v_float
115 __vn_dummyf (v_float x)
116 {
117   return x;
118 }
119 
120 __vpcs static v_float
121 xy__vn_powf (v_float x)
122 {
123   return __vn_powf (x, x);
124 }
125 
126 __vpcs static v_float
127 xy_Z_powf (v_float x)
128 {
129   return _ZGVnN4vv_powf (x, x);
130 }
131 
132 __vpcs static v_double
133 xy__vn_pow (v_double x)
134 {
135   return __vn_pow (x, x);
136 }
137 
138 __vpcs static v_double
139 xy_Z_pow (v_double x)
140 {
141   return _ZGVnN2vv_pow (x, x);
142 }
143 #endif
144 
145 static v_float
146 xy__v_powf (v_float x)
147 {
148   return __v_powf (x, x);
149 }
150 
151 static v_double
152 xy__v_pow (v_double x)
153 {
154   return __v_pow (x, x);
155 }
156 #endif
157 
158 static float
159 xy__s_powf (float x)
160 {
161   return __s_powf (x, x);
162 }
163 
164 static double
165 xy__s_pow (double x)
166 {
167   return __s_pow (x, x);
168 }
169 #endif
170 
171 static double
172 xypow (double x)
173 {
174   return pow (x, x);
175 }
176 
177 static float
178 xypowf (float x)
179 {
180   return powf (x, x);
181 }
182 
183 static double
184 xpow (double x)
185 {
186   return pow (x, 23.4);
187 }
188 
189 static float
190 xpowf (float x)
191 {
192   return powf (x, 23.4f);
193 }
194 
195 static double
196 ypow (double x)
197 {
198   return pow (2.34, x);
199 }
200 
201 static float
202 ypowf (float x)
203 {
204   return powf (2.34f, x);
205 }
206 
207 static float
208 sincosf_wrap (float x)
209 {
210   float s, c;
211   sincosf (x, &s, &c);
212   return s + c;
213 }
214 
215 static const struct fun
216 {
217   const char *name;
218   int prec;
219   int vec;
220   double lo;
221   double hi;
222   union
223   {
224     double (*d) (double);
225     float (*f) (float);
226     v_double (*vd) (v_double);
227     v_float (*vf) (v_float);
228 #ifdef __vpcs
229     __vpcs v_double (*vnd) (v_double);
230     __vpcs v_float (*vnf) (v_float);
231 #endif
232   } fun;
233 } funtab[] = {
234 #define D(func, lo, hi) {#func, 'd', 0, lo, hi, {.d = func}},
235 #define F(func, lo, hi) {#func, 'f', 0, lo, hi, {.f = func}},
236 #define VD(func, lo, hi) {#func, 'd', 'v', lo, hi, {.vd = func}},
237 #define VF(func, lo, hi) {#func, 'f', 'v', lo, hi, {.vf = func}},
238 #define VND(func, lo, hi) {#func, 'd', 'n', lo, hi, {.vnd = func}},
239 #define VNF(func, lo, hi) {#func, 'f', 'n', lo, hi, {.vnf = func}},
240 D (dummy, 1.0, 2.0)
241 D (exp, -9.9, 9.9)
242 D (exp, 0.5, 1.0)
243 D (exp2, -9.9, 9.9)
244 D (log, 0.01, 11.1)
245 D (log, 0.999, 1.001)
246 D (log2, 0.01, 11.1)
247 D (log2, 0.999, 1.001)
248 {"pow", 'd', 0, 0.01, 11.1, {.d = xypow}},
249 D (xpow, 0.01, 11.1)
250 D (ypow, -9.9, 9.9)
251 
252 F (dummyf, 1.0, 2.0)
253 F (expf, -9.9, 9.9)
254 F (exp2f, -9.9, 9.9)
255 F (logf, 0.01, 11.1)
256 F (log2f, 0.01, 11.1)
257 {"powf", 'f', 0, 0.01, 11.1, {.f = xypowf}},
258 F (xpowf, 0.01, 11.1)
259 F (ypowf, -9.9, 9.9)
260 {"sincosf", 'f', 0, 0.1, 0.7, {.f = sincosf_wrap}},
261 {"sincosf", 'f', 0, 0.8, 3.1, {.f = sincosf_wrap}},
262 {"sincosf", 'f', 0, -3.1, 3.1, {.f = sincosf_wrap}},
263 {"sincosf", 'f', 0, 3.3, 33.3, {.f = sincosf_wrap}},
264 {"sincosf", 'f', 0, 100, 1000, {.f = sincosf_wrap}},
265 {"sincosf", 'f', 0, 1e6, 1e32, {.f = sincosf_wrap}},
266 F (sinf, 0.1, 0.7)
267 F (sinf, 0.8, 3.1)
268 F (sinf, -3.1, 3.1)
269 F (sinf, 3.3, 33.3)
270 F (sinf, 100, 1000)
271 F (sinf, 1e6, 1e32)
272 F (cosf, 0.1, 0.7)
273 F (cosf, 0.8, 3.1)
274 F (cosf, -3.1, 3.1)
275 F (cosf, 3.3, 33.3)
276 F (cosf, 100, 1000)
277 F (cosf, 1e6, 1e32)
278 #if WANT_VMATH
279 D (__s_sin, -3.1, 3.1)
280 D (__s_cos, -3.1, 3.1)
281 D (__s_exp, -9.9, 9.9)
282 D (__s_log, 0.01, 11.1)
283 {"__s_pow", 'd', 0, 0.01, 11.1, {.d = xy__s_pow}},
284 F (__s_expf, -9.9, 9.9)
285 F (__s_expf_1u, -9.9, 9.9)
286 F (__s_exp2f, -9.9, 9.9)
287 F (__s_exp2f_1u, -9.9, 9.9)
288 F (__s_logf, 0.01, 11.1)
289 {"__s_powf", 'f', 0, 0.01, 11.1, {.f = xy__s_powf}},
290 F (__s_sinf, -3.1, 3.1)
291 F (__s_cosf, -3.1, 3.1)
292 #if __aarch64__
293 VD (__v_dummy, 1.0, 2.0)
294 VD (__v_sin, -3.1, 3.1)
295 VD (__v_cos, -3.1, 3.1)
296 VD (__v_exp, -9.9, 9.9)
297 VD (__v_log, 0.01, 11.1)
298 {"__v_pow", 'd', 'v', 0.01, 11.1, {.vd = xy__v_pow}},
299 VF (__v_dummyf, 1.0, 2.0)
300 VF (__v_expf, -9.9, 9.9)
301 VF (__v_expf_1u, -9.9, 9.9)
302 VF (__v_exp2f, -9.9, 9.9)
303 VF (__v_exp2f_1u, -9.9, 9.9)
304 VF (__v_logf, 0.01, 11.1)
305 {"__v_powf", 'f', 'v', 0.01, 11.1, {.vf = xy__v_powf}},
306 VF (__v_sinf, -3.1, 3.1)
307 VF (__v_cosf, -3.1, 3.1)
308 #ifdef __vpcs
309 VND (__vn_dummy, 1.0, 2.0)
310 VND (__vn_exp, -9.9, 9.9)
311 VND (_ZGVnN2v_exp, -9.9, 9.9)
312 VND (__vn_log, 0.01, 11.1)
313 VND (_ZGVnN2v_log, 0.01, 11.1)
314 {"__vn_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy__vn_pow}},
315 {"_ZGVnN2vv_pow", 'd', 'n', 0.01, 11.1, {.vnd = xy_Z_pow}},
316 VND (__vn_sin, -3.1, 3.1)
317 VND (_ZGVnN2v_sin, -3.1, 3.1)
318 VND (__vn_cos, -3.1, 3.1)
319 VND (_ZGVnN2v_cos, -3.1, 3.1)
320 VNF (__vn_dummyf, 1.0, 2.0)
321 VNF (__vn_expf, -9.9, 9.9)
322 VNF (_ZGVnN4v_expf, -9.9, 9.9)
323 VNF (__vn_expf_1u, -9.9, 9.9)
324 VNF (__vn_exp2f, -9.9, 9.9)
325 VNF (_ZGVnN4v_exp2f, -9.9, 9.9)
326 VNF (__vn_exp2f_1u, -9.9, 9.9)
327 VNF (__vn_logf, 0.01, 11.1)
328 VNF (_ZGVnN4v_logf, 0.01, 11.1)
329 {"__vn_powf", 'f', 'n', 0.01, 11.1, {.vnf = xy__vn_powf}},
330 {"_ZGVnN4vv_powf", 'f', 'n', 0.01, 11.1, {.vnf = xy_Z_powf}},
331 VNF (__vn_sinf, -3.1, 3.1)
332 VNF (_ZGVnN4v_sinf, -3.1, 3.1)
333 VNF (__vn_cosf, -3.1, 3.1)
334 VNF (_ZGVnN4v_cosf, -3.1, 3.1)
335 #endif
336 #endif
337 #endif
338 {0},
339 #undef F
340 #undef D
341 #undef VF
342 #undef VD
343 #undef VNF
344 #undef VND
345 };
346 
347 static void
348 gen_linear (double lo, double hi)
349 {
350   for (int i = 0; i < N; i++)
351     A[i] = (lo * (N - i) + hi * i) / N;
352 }
353 
354 static void
355 genf_linear (double lo, double hi)
356 {
357   for (int i = 0; i < N; i++)
358     Af[i] = (float)(lo * (N - i) + hi * i) / N;
359 }
360 
361 static inline double
362 asdouble (uint64_t i)
363 {
364   union
365   {
366     uint64_t i;
367     double f;
368   } u = {i};
369   return u.f;
370 }
371 
372 static uint64_t seed = 0x0123456789abcdef;
373 
374 static double
375 frand (double lo, double hi)
376 {
377   seed = 6364136223846793005ULL * seed + 1;
378   return lo + (hi - lo) * (asdouble (seed >> 12 | 0x3ffULL << 52) - 1.0);
379 }
380 
381 static void
382 gen_rand (double lo, double hi)
383 {
384   for (int i = 0; i < N; i++)
385     A[i] = frand (lo, hi);
386 }
387 
388 static void
389 genf_rand (double lo, double hi)
390 {
391   for (int i = 0; i < N; i++)
392     Af[i] = (float)frand (lo, hi);
393 }
394 
395 static void
396 gen_trace (int index)
397 {
398   for (int i = 0; i < N; i++)
399     A[i] = Trace[index + i];
400 }
401 
402 static void
403 genf_trace (int index)
404 {
405   for (int i = 0; i < N; i++)
406     Af[i] = (float)Trace[index + i];
407 }
408 
409 static void
410 run_thruput (double f (double))
411 {
412   for (int i = 0; i < N; i++)
413     f (A[i]);
414 }
415 
416 static void
417 runf_thruput (float f (float))
418 {
419   for (int i = 0; i < N; i++)
420     f (Af[i]);
421 }
422 
423 volatile double zero = 0;
424 
425 static void
426 run_latency (double f (double))
427 {
428   double z = zero;
429   double prev = z;
430   for (int i = 0; i < N; i++)
431     prev = f (A[i] + prev * z);
432 }
433 
434 static void
435 runf_latency (float f (float))
436 {
437   float z = (float)zero;
438   float prev = z;
439   for (int i = 0; i < N; i++)
440     prev = f (Af[i] + prev * z);
441 }
442 
443 static void
444 run_v_thruput (v_double f (v_double))
445 {
446   for (int i = 0; i < N; i += v_double_len ())
447     f (v_double_load (A+i));
448 }
449 
450 static void
451 runf_v_thruput (v_float f (v_float))
452 {
453   for (int i = 0; i < N; i += v_float_len ())
454     f (v_float_load (Af+i));
455 }
456 
457 static void
458 run_v_latency (v_double f (v_double))
459 {
460   v_double z = v_double_dup (zero);
461   v_double prev = z;
462   for (int i = 0; i < N; i += v_double_len ())
463     prev = f (v_double_load (A+i) + prev * z);
464 }
465 
466 static void
467 runf_v_latency (v_float f (v_float))
468 {
469   v_float z = v_float_dup (zero);
470   v_float prev = z;
471   for (int i = 0; i < N; i += v_float_len ())
472     prev = f (v_float_load (Af+i) + prev * z);
473 }
474 
475 #ifdef __vpcs
476 static void
477 run_vn_thruput (__vpcs v_double f (v_double))
478 {
479   for (int i = 0; i < N; i += v_double_len ())
480     f (v_double_load (A+i));
481 }
482 
483 static void
484 runf_vn_thruput (__vpcs v_float f (v_float))
485 {
486   for (int i = 0; i < N; i += v_float_len ())
487     f (v_float_load (Af+i));
488 }
489 
490 static void
491 run_vn_latency (__vpcs v_double f (v_double))
492 {
493   v_double z = v_double_dup (zero);
494   v_double prev = z;
495   for (int i = 0; i < N; i += v_double_len ())
496     prev = f (v_double_load (A+i) + prev * z);
497 }
498 
499 static void
500 runf_vn_latency (__vpcs v_float f (v_float))
501 {
502   v_float z = v_float_dup (zero);
503   v_float prev = z;
504   for (int i = 0; i < N; i += v_float_len ())
505     prev = f (v_float_load (Af+i) + prev * z);
506 }
507 #endif
508 
509 static uint64_t
510 tic (void)
511 {
512   struct timespec ts;
513   if (clock_gettime (CLOCK_REALTIME, &ts))
514     abort ();
515   return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
516 }
517 
518 #define TIMEIT(run, f) do { \
519   dt = -1; \
520   run (f); /* Warm up.  */ \
521   for (int j = 0; j < measurecount; j++) \
522     { \
523       uint64_t t0 = tic (); \
524       for (int i = 0; i < itercount; i++) \
525 	run (f); \
526       uint64_t t1 = tic (); \
527       if (t1 - t0 < dt) \
528 	dt = t1 - t0; \
529     } \
530 } while (0)
531 
532 static void
533 bench1 (const struct fun *f, int type, double lo, double hi)
534 {
535   uint64_t dt = 0;
536   uint64_t ns100;
537   const char *s = type == 't' ? "rthruput" : "latency";
538   int vlen = 1;
539 
540   if (f->vec && f->prec == 'd')
541     vlen = v_double_len();
542   else if (f->vec && f->prec == 'f')
543     vlen = v_float_len();
544 
545   if (f->prec == 'd' && type == 't' && f->vec == 0)
546     TIMEIT (run_thruput, f->fun.d);
547   else if (f->prec == 'd' && type == 'l' && f->vec == 0)
548     TIMEIT (run_latency, f->fun.d);
549   else if (f->prec == 'f' && type == 't' && f->vec == 0)
550     TIMEIT (runf_thruput, f->fun.f);
551   else if (f->prec == 'f' && type == 'l' && f->vec == 0)
552     TIMEIT (runf_latency, f->fun.f);
553   else if (f->prec == 'd' && type == 't' && f->vec == 'v')
554     TIMEIT (run_v_thruput, f->fun.vd);
555   else if (f->prec == 'd' && type == 'l' && f->vec == 'v')
556     TIMEIT (run_v_latency, f->fun.vd);
557   else if (f->prec == 'f' && type == 't' && f->vec == 'v')
558     TIMEIT (runf_v_thruput, f->fun.vf);
559   else if (f->prec == 'f' && type == 'l' && f->vec == 'v')
560     TIMEIT (runf_v_latency, f->fun.vf);
561 #ifdef __vpcs
562   else if (f->prec == 'd' && type == 't' && f->vec == 'n')
563     TIMEIT (run_vn_thruput, f->fun.vnd);
564   else if (f->prec == 'd' && type == 'l' && f->vec == 'n')
565     TIMEIT (run_vn_latency, f->fun.vnd);
566   else if (f->prec == 'f' && type == 't' && f->vec == 'n')
567     TIMEIT (runf_vn_thruput, f->fun.vnf);
568   else if (f->prec == 'f' && type == 'l' && f->vec == 'n')
569     TIMEIT (runf_vn_latency, f->fun.vnf);
570 #endif
571 
572   if (type == 't')
573     {
574       ns100 = (100 * dt + itercount * N / 2) / (itercount * N);
575       printf ("%9s %8s: %4u.%02u ns/elem %10llu ns in [%g %g]\n", f->name, s,
576 	      (unsigned) (ns100 / 100), (unsigned) (ns100 % 100),
577 	      (unsigned long long) dt, lo, hi);
578     }
579   else if (type == 'l')
580     {
581       ns100 = (100 * dt + itercount * N / vlen / 2) / (itercount * N / vlen);
582       printf ("%9s %8s: %4u.%02u ns/call %10llu ns in [%g %g]\n", f->name, s,
583 	      (unsigned) (ns100 / 100), (unsigned) (ns100 % 100),
584 	      (unsigned long long) dt, lo, hi);
585     }
586   fflush (stdout);
587 }
588 
589 static void
590 bench (const struct fun *f, double lo, double hi, int type, int gen)
591 {
592   if (f->prec == 'd' && gen == 'r')
593     gen_rand (lo, hi);
594   else if (f->prec == 'd' && gen == 'l')
595     gen_linear (lo, hi);
596   else if (f->prec == 'd' && gen == 't')
597     gen_trace (0);
598   else if (f->prec == 'f' && gen == 'r')
599     genf_rand (lo, hi);
600   else if (f->prec == 'f' && gen == 'l')
601     genf_linear (lo, hi);
602   else if (f->prec == 'f' && gen == 't')
603     genf_trace (0);
604 
605   if (gen == 't')
606     hi = trace_size / N;
607 
608   if (type == 'b' || type == 't')
609     bench1 (f, 't', lo, hi);
610 
611   if (type == 'b' || type == 'l')
612     bench1 (f, 'l', lo, hi);
613 
614   for (int i = N; i < trace_size; i += N)
615     {
616       if (f->prec == 'd')
617 	gen_trace (i);
618       else
619 	genf_trace (i);
620 
621       lo = i / N;
622       if (type == 'b' || type == 't')
623 	bench1 (f, 't', lo, hi);
624 
625       if (type == 'b' || type == 'l')
626 	bench1 (f, 'l', lo, hi);
627     }
628 }
629 
630 static void
631 readtrace (const char *name)
632 {
633 	int n = 0;
634 	FILE *f = strcmp (name, "-") == 0 ? stdin : fopen (name, "r");
635 	if (!f)
636 	  {
637 	    printf ("openning \"%s\" failed: %m\n", name);
638 	    exit (1);
639 	  }
640 	for (;;)
641 	  {
642 	    if (n >= trace_size)
643 	      {
644 		trace_size += N;
645 		Trace = realloc (Trace, trace_size * sizeof (Trace[0]));
646 		if (Trace == NULL)
647 		  {
648 		    printf ("out of memory\n");
649 		    exit (1);
650 		  }
651 	      }
652 	    if (fscanf (f, "%lf", Trace + n) != 1)
653 	      break;
654 	    n++;
655 	  }
656 	if (ferror (f) || n == 0)
657 	  {
658 	    printf ("reading \"%s\" failed: %m\n", name);
659 	    exit (1);
660 	  }
661 	fclose (f);
662 	if (n % N == 0)
663 	  trace_size = n;
664 	for (int i = 0; n < trace_size; n++, i++)
665 	  Trace[n] = Trace[i];
666 }
667 
668 static void
669 usage (void)
670 {
671   printf ("usage: ./mathbench [-g rand|linear|trace] [-t latency|thruput|both] "
672 	  "[-i low high] [-f tracefile] [-m measurements] [-c iterations] func "
673 	  "[func2 ..]\n");
674   printf ("func:\n");
675   printf ("%7s [run all benchmarks]\n", "all");
676   for (const struct fun *f = funtab; f->name; f++)
677     printf ("%7s [low: %g high: %g]\n", f->name, f->lo, f->hi);
678   exit (1);
679 }
680 
681 int
682 main (int argc, char *argv[])
683 {
684   int usergen = 0, gen = 'r', type = 'b', all = 0;
685   double lo = 0, hi = 0;
686   const char *tracefile = "-";
687 
688   argv++;
689   argc--;
690   for (;;)
691     {
692       if (argc <= 0)
693 	usage ();
694       if (argv[0][0] != '-')
695 	break;
696       else if (argc >= 3 && strcmp (argv[0], "-i") == 0)
697 	{
698 	  usergen = 1;
699 	  lo = strtod (argv[1], 0);
700 	  hi = strtod (argv[2], 0);
701 	  argv += 3;
702 	  argc -= 3;
703 	}
704       else if (argc >= 2 && strcmp (argv[0], "-m") == 0)
705 	{
706 	  measurecount = strtol (argv[1], 0, 0);
707 	  argv += 2;
708 	  argc -= 2;
709 	}
710       else if (argc >= 2 && strcmp (argv[0], "-c") == 0)
711 	{
712 	  itercount = strtol (argv[1], 0, 0);
713 	  argv += 2;
714 	  argc -= 2;
715 	}
716       else if (argc >= 2 && strcmp (argv[0], "-g") == 0)
717 	{
718 	  gen = argv[1][0];
719 	  if (strchr ("rlt", gen) == 0)
720 	    usage ();
721 	  argv += 2;
722 	  argc -= 2;
723 	}
724       else if (argc >= 2 && strcmp (argv[0], "-f") == 0)
725 	{
726 	  gen = 't';  /* -f implies -g trace.  */
727 	  tracefile = argv[1];
728 	  argv += 2;
729 	  argc -= 2;
730 	}
731       else if (argc >= 2 && strcmp (argv[0], "-t") == 0)
732 	{
733 	  type = argv[1][0];
734 	  if (strchr ("ltb", type) == 0)
735 	    usage ();
736 	  argv += 2;
737 	  argc -= 2;
738 	}
739       else
740 	usage ();
741     }
742   if (gen == 't')
743     {
744       readtrace (tracefile);
745       lo = hi = 0;
746       usergen = 1;
747     }
748   while (argc > 0)
749     {
750       int found = 0;
751       all = strcmp (argv[0], "all") == 0;
752       for (const struct fun *f = funtab; f->name; f++)
753 	if (all || strcmp (argv[0], f->name) == 0)
754 	  {
755 	    found = 1;
756 	    if (!usergen)
757 	      {
758 		lo = f->lo;
759 		hi = f->hi;
760 	      }
761 	    bench (f, lo, hi, type, gen);
762 	    if (usergen && !all)
763 	      break;
764 	  }
765       if (!found)
766 	printf ("unknown function: %s\n", argv[0]);
767       argv++;
768       argc--;
769     }
770   return 0;
771 }
772