1 char netcpu_kstat10_id[]="\
2 @(#)netcpu_kstat10.c (c) Copyright 2005-2012, Hewlett-Packard Company Version 2.6.0";
3
4 #if HAVE_CONFIG_H
5 # include <config.h>
6 #endif
7
8 #include <stdio.h>
9
10 #if HAVE_INTTYPES_H
11 # include <inttypes.h>
12 #else
13 # if HAVE_STDINT_H
14 # include <stdint.h>
15 # endif
16 #endif
17
18 #if HAVE_UNISTD_H
19 # include <unistd.h>
20 #endif
21 #if HAVE_STRINGS_H
22 # include <strings.h>
23 #endif
24 #if STDC_HEADERS
25 # include <stdlib.h>
26 # include <stddef.h>
27 #else
28 # if HAVE_STDLIB_H
29 # include <stdlib.h>
30 # endif
31 #endif
32
33 #include <errno.h>
34
35 #include <kstat.h>
36 #include <sys/sysinfo.h>
37
38 #include "netsh.h"
39 #include "netlib.h"
40
41 static kstat_ctl_t *kc = NULL;
42 static kid_t kcid = 0;
43
44 typedef struct cpu_time_counters {
45 uint64_t idle;
46 uint64_t user;
47 uint64_t kernel;
48 uint64_t interrupt;
49 } cpu_time_counters_t;
50
51 static cpu_time_counters_t starting_cpu_counters[MAXCPUS];
52 static cpu_time_counters_t ending_cpu_counters[MAXCPUS];
53 static cpu_time_counters_t delta_cpu_counters[MAXCPUS];
54 static cpu_time_counters_t corrected_cpu_counters[MAXCPUS];
55
56 static void
print_cpu_time_counters(char * name,int instance,cpu_time_counters_t * counters)57 print_cpu_time_counters(char *name, int instance, cpu_time_counters_t *counters)
58 {
59 fprintf(where,
60 "%s[%d]:\n"
61 "\t idle %llu\n"
62 "\t user %llu\n"
63 "\t kernel %llu\n"
64 "\t interrupt %llu\n",
65 name,instance,
66 counters[instance].idle,
67 counters[instance].user,
68 counters[instance].kernel,
69 counters[instance].interrupt);
70 }
71
72 void
cpu_util_init(void)73 cpu_util_init(void)
74 {
75 kstat_t *ksp;
76 int i;
77 kc = kstat_open();
78
79 if (kc == NULL) {
80 fprintf(where,
81 "cpu_util_init: kstat_open: errno %d %s\n",
82 errno,
83 strerror(errno));
84 fflush(where);
85 exit(-1);
86 }
87
88 /* lets flesh-out a CPU instance number map since it seems that some
89 systems, not even those which are partitioned, can have
90 non-contiguous CPU numbers. discovered "the hard way" on a
91 T5220. raj 20080804 */
92 i = 0;
93 for (ksp = kc->kc_chain, i = 0;
94 (ksp != NULL) && (i < MAXCPUS);
95 ksp = ksp->ks_next) {
96 if ((strcmp(ksp->ks_module,"cpu") == 0) &&
97 (strcmp(ksp->ks_name,"sys") == 0)) {
98 if (debug) {
99 fprintf(where,"Mapping CPU instance %d to entry %d\n",
100 ksp->ks_instance,i);
101 fflush(where);
102 }
103 lib_cpu_map[i++] = ksp->ks_instance;
104 }
105 }
106
107 if (MAXCPUS == i) {
108 fprintf(where,
109 "Sorry, this system has more CPUs (%d) than netperf can handle (%d).\n"
110 "Please alter MAXCPUS in netlib.h and recompile.\n",
111 i,
112 MAXCPUS);
113 fflush(where);
114 exit(1);
115 }
116
117 return;
118 }
119
120 void
cpu_util_terminate(void)121 cpu_util_terminate(void)
122 {
123 kstat_close(kc);
124 return;
125 }
126
127 int
get_cpu_method(void)128 get_cpu_method(void)
129 {
130 return KSTAT_10;
131 }
132
133 static void
print_unexpected_statistic_warning(char * who,char * what,char * why)134 print_unexpected_statistic_warning(char *who, char *what, char *why)
135 {
136 if (why) {
137 fprintf(where,
138 "WARNING! WARNING! WARNING! WARNING!\n"
139 "%s found an unexpected %s statistic %.16s\n",
140 who,
141 why,
142 what);
143 }
144 else {
145 fprintf(where,
146 "%s is ignoring statistic %.16s\n",
147 who,
148 what);
149 }
150 }
151
152 static void
get_cpu_counters(int cpu_num,cpu_time_counters_t * counters)153 get_cpu_counters(int cpu_num, cpu_time_counters_t *counters)
154 {
155
156 kstat_t *ksp;
157 int found=0;
158 kid_t nkcid;
159 kstat_named_t *knp;
160 int i;
161
162 ksp = kstat_lookup(kc, "cpu", lib_cpu_map[cpu_num], "sys");
163 if ((ksp) && (ksp->ks_type == KSTAT_TYPE_NAMED)) {
164 /* happiness and joy, keep going */
165 nkcid = kstat_read(kc, ksp, NULL);
166 if (nkcid != -1) {
167 /* happiness and joy, keep going. we could consider adding a
168 "found < 3" to the end conditions, but then we wouldn't
169 search to the end and find that Sun added some nsec. we
170 probably want to see if they add an nsec. raj 2005-01-28 */
171 for (i = ksp->ks_ndata, knp = ksp->ks_data;
172 i > 0;
173 knp++,i--) {
174 /* we would be hosed if the same name could appear twice */
175 if (!strcmp("cpu_nsec_idle",knp->name)) {
176 found++;
177 counters[cpu_num].idle = knp->value.ui64;
178 }
179 else if (!strcmp("cpu_nsec_user",knp->name)) {
180 found++;
181 counters[cpu_num].user = knp->value.ui64;
182 }
183 else if (!strcmp("cpu_nsec_kernel",knp->name)) {
184 found++;
185 counters[cpu_num].kernel = knp->value.ui64;
186 }
187 else if (!strcmp("cpu_nsec_intr",knp->name)) {
188 if (debug >= 2) {
189 fprintf(where,
190 "Found a cpu_nsec_intr but it doesn't do what we want\n");
191 fflush(where);
192 }
193 }
194 else if (strstr(knp->name,"nsec")) {
195 /* finding another nsec here means Sun have changed
196 something and we need to warn the user. raj 2005-01-28 */
197 print_unexpected_statistic_warning("get_cpu_counters",
198 knp->name,
199 "nsec");
200 }
201 else if (debug >=2) {
202
203 /* might want to tell people about what we are skipping.
204 however, only display other names debug >=2. raj
205 2005-01-28 */
206
207 print_unexpected_statistic_warning("get_cpu_counters",
208 knp->name,
209 NULL);
210 }
211 }
212 if (3 == found) {
213 /* happiness and joy */
214 return;
215 }
216 else {
217 fprintf(where,
218 "get_cpu_counters could not find one or more of the expected counters!\n");
219 fflush(where);
220 exit(-1);
221 }
222 }
223 else {
224 /* the kstat_read returned an error or the chain changed */
225 fprintf(where,
226 "get_cpu_counters: kstat_read failed or chain id changed %d %s\n",
227 errno,
228 strerror(errno));
229 fflush(where);
230 exit(-1);
231 }
232 }
233 else {
234 /* the lookup failed or found the wrong type */
235 fprintf(where,
236 "get_cpu_counters: kstat_lookup failed for module 'cpu' number %d instance %d name 'sys' and KSTAT_TYPE_NAMED: errno %d %s\n",
237 cpu_num,
238 lib_cpu_map[cpu_num],
239 errno,
240 strerror(errno));
241 fflush(where);
242 exit(-1);
243 }
244 }
245
246 static void
get_interrupt_counters(int cpu_num,cpu_time_counters_t * counters)247 get_interrupt_counters(int cpu_num, cpu_time_counters_t *counters)
248 {
249 kstat_t *ksp;
250 int found=0;
251 kid_t nkcid;
252 kstat_named_t *knp;
253 int i;
254
255 ksp = kstat_lookup(kc, "cpu", lib_cpu_map[cpu_num], "intrstat");
256
257 counters[cpu_num].interrupt = 0;
258 if ((ksp) && (ksp->ks_type == KSTAT_TYPE_NAMED)) {
259 /* happiness and joy, keep going */
260 nkcid = kstat_read(kc, ksp, NULL);
261 if (nkcid != -1) {
262 /* happiness and joy, keep going. we could consider adding a
263 "found < 15" to the end conditions, but then we wouldn't
264 search to the end and find that Sun added some "time." we
265 probably want to see if they add a "nsec." raj 2005-01-28 */
266 for (i = ksp->ks_ndata, knp = ksp->ks_data;
267 i > 0;
268 knp++,i--) {
269 if (strstr(knp->name,"time")) {
270 found++;
271 counters[cpu_num].interrupt += knp->value.ui64;
272 }
273 else if (debug >=2) {
274
275 /* might want to tell people about what we are skipping.
276 however, only display other names debug >=2. raj
277 2005-01-28
278 */
279
280 print_unexpected_statistic_warning("get_cpu_counters",
281 knp->name,
282 NULL);
283 }
284 }
285 if (15 == found) {
286 /* happiness and joy */
287 return;
288 }
289 else {
290 fprintf(where,
291 "get_cpu_counters could not find one or more of the expected counters!\n");
292 fflush(where);
293 exit(-1);
294 }
295 }
296 else {
297 /* the kstat_read returned an error or the chain changed */
298 fprintf(where,
299 "get_cpu_counters: kstat_read failed or chain id changed %d %s\n",
300 errno,
301 strerror(errno));
302 fflush(where);
303 exit(-1);
304 }
305 }
306 else {
307 /* the lookup failed or found the wrong type */
308 fprintf(where,
309 "get_cpu_counters: kstat_lookup failed for module 'cpu' %d instance %d class 'intrstat' and KSTAT_TYPE_NAMED: errno %d %s\n",
310 cpu_num,
311 lib_cpu_map[cpu_num],
312 errno,
313 strerror(errno));
314 fflush(where);
315 exit(-1);
316 }
317
318 }
319
320 static void
get_cpu_time_counters(cpu_time_counters_t * counters)321 get_cpu_time_counters(cpu_time_counters_t *counters)
322 {
323
324 int i;
325
326 for (i = 0; i < lib_num_loc_cpus; i++){
327 get_cpu_counters(i, counters);
328 get_interrupt_counters(i, counters);
329 }
330
331 return;
332 }
333
334 /* the kstat10 mechanism, since it is based on actual nanosecond
335 counters is not going to use a comparison to an idle rate. so, the
336 calibrate_idle_rate routine will be rather simple :) raj 2005-01-28
337 */
338
339 float
calibrate_idle_rate(int iterations,int interval)340 calibrate_idle_rate(int iterations, int interval)
341 {
342 return 0.0;
343 }
344
345 float
calc_cpu_util_internal(float elapsed_time)346 calc_cpu_util_internal(float elapsed_time)
347 {
348 int i;
349 float correction_factor;
350 float actual_rate;
351
352 uint64_t total_cpu_nsec;
353
354 /* multiply by 100 and divide by total and you get whole
355 percentages. multiply by 1000 and divide by total and you get
356 tenths of percentages. multiply by 10000 and divide by total and
357 you get hundredths of percentages. etc etc etc raj 2005-01-28 */
358
359 #define CALC_PERCENT 100
360 #define CALC_TENTH_PERCENT 1000
361 #define CALC_HUNDREDTH_PERCENT 10000
362 #define CALC_THOUSANDTH_PERCENT 100000
363 #define CALC_ACCURACY CALC_THOUSANDTH_PERCENT
364
365 uint64_t fraction_idle;
366 uint64_t fraction_user;
367 uint64_t fraction_kernel;
368 uint64_t fraction_interrupt;
369
370 uint64_t interrupt_idle;
371 uint64_t interrupt_user;
372 uint64_t interrupt_kernel;
373
374 memset(&lib_local_cpu_stats, 0, sizeof(lib_local_cpu_stats));
375
376 /* It is possible that the library measured a time other than the
377 one that the user want for the cpu utilization calculations - for
378 example, tests that were ended by watchdog timers such as the udp
379 stream test. We let these tests tell up what the elapsed time
380 should be. */
381
382 if (elapsed_time != 0.0) {
383 correction_factor = (float) 1.0 +
384 ((lib_elapsed - elapsed_time) / elapsed_time);
385 }
386 else {
387 correction_factor = (float) 1.0;
388 }
389
390 for (i = 0; i < lib_num_loc_cpus; i++) {
391
392 /* this is now the fun part. we have the nanoseconds _allegedly_
393 spent in user, idle and kernel. We also have nanoseconds spent
394 servicing interrupts. Sadly, in the developer's finite wisdom,
395 the interrupt time accounting is in parallel with the other
396 accounting. this means that time accounted in user, kernel or
397 idle will also include time spent in interrupt. for netperf's
398 porpoises we do not really care about that for user and kernel,
399 but we certainly do care for idle. the $64B question becomes -
400 how to "correct" for this?
401
402 we could just subtract interrupt time from idle. that has the
403 virtue of simplicity and also "punishes" Sun for doing
404 something that seems to be so stupid. however, we probably
405 have to be "fair" even to the allegedly stupid so the other
406 mechanism, suggested by a Sun engineer is to subtract interrupt
407 time from each of user, kernel and idle in proportion to their
408 numbers. then we sum the corrected user, kernel and idle along
409 with the interrupt time and use that to calculate a new idle
410 percentage and thus a CPU util percentage.
411
412 that is what we will attempt to do here. raj 2005-01-28
413
414 of course, we also have to wonder what we should do if there is
415 more interrupt time than the sum of user, kernel and idle.
416 that is a theoretical possibility I suppose, but for the
417 time-being, one that we will blythly ignore, except perhaps for
418 a quick check. raj 2005-01-31
419 */
420
421 /* we ass-u-me that these counters will never wrap during a
422 netperf run. this may not be a particularly safe thing to
423 do. raj 2005-01-28 */
424 delta_cpu_counters[i].idle = ending_cpu_counters[i].idle -
425 starting_cpu_counters[i].idle;
426 delta_cpu_counters[i].user = ending_cpu_counters[i].user -
427 starting_cpu_counters[i].user;
428 delta_cpu_counters[i].kernel = ending_cpu_counters[i].kernel -
429 starting_cpu_counters[i].kernel;
430 delta_cpu_counters[i].interrupt = ending_cpu_counters[i].interrupt -
431 starting_cpu_counters[i].interrupt;
432
433 if (debug) {
434 print_cpu_time_counters("delta_cpu_counters",i,delta_cpu_counters);
435 }
436
437 /* for this summation, we do not include interrupt time */
438 total_cpu_nsec =
439 delta_cpu_counters[i].idle +
440 delta_cpu_counters[i].user +
441 delta_cpu_counters[i].kernel;
442
443 if (debug) {
444 fprintf(where,"total_cpu_nsec %llu\n",total_cpu_nsec);
445 }
446
447 if (delta_cpu_counters[i].interrupt > total_cpu_nsec) {
448 /* we are not in Kansas any more Toto, and I am not quite sure
449 the best way to get our tails out of here so let us just
450 punt. raj 2005-01-31 */
451 fprintf(where,
452 "WARNING! WARNING! WARNING! WARNING! WARNING! \n"
453 "calc_cpu_util_internal: more interrupt time than others combined!\n"
454 "\tso CPU util cannot be estimated\n"
455 "\t delta[%d].interrupt %llu\n"
456 "\t delta[%d].idle %llu\n"
457 "\t delta[%d].user %llu\n"
458 "\t delta[%d].kernel %llu\n",
459 i,delta_cpu_counters[i].interrupt,
460 i,delta_cpu_counters[i].idle,
461 i,delta_cpu_counters[i].user,
462 i,delta_cpu_counters[i].kernel);
463 fflush(where);
464
465 lib_local_cpu_stats.cpu_util = -1.0;
466 lib_local_per_cpu_util[i] = -1.0;
467 return -1.0;
468 }
469
470 /* and now some fun with integer math. i initially tried to
471 promote things to long doubled but that didn't seem to result
472 in happiness and joy. raj 2005-01-28 */
473
474 fraction_idle =
475 (delta_cpu_counters[i].idle * CALC_ACCURACY) / total_cpu_nsec;
476
477 fraction_user =
478 (delta_cpu_counters[i].user * CALC_ACCURACY) / total_cpu_nsec;
479
480 fraction_kernel =
481 (delta_cpu_counters[i].kernel * CALC_ACCURACY) / total_cpu_nsec;
482
483 /* ok, we have our fractions, now we want to take that fraction of
484 the interrupt time and subtract that from the bucket. */
485
486 interrupt_idle = ((delta_cpu_counters[i].interrupt * fraction_idle) /
487 CALC_ACCURACY);
488
489 interrupt_user = ((delta_cpu_counters[i].interrupt * fraction_user) /
490 CALC_ACCURACY);
491
492 interrupt_kernel = ((delta_cpu_counters[i].interrupt * fraction_kernel) /
493 CALC_ACCURACY);
494
495 if (debug) {
496 fprintf(where,
497 "\tfraction_idle %llu interrupt_idle %llu\n"
498 "\tfraction_user %llu interrupt_user %llu\n"
499 "\tfraction_kernel %llu interrupt_kernel %llu\n",
500 fraction_idle,
501 interrupt_idle,
502 fraction_user,
503 interrupt_user,
504 fraction_kernel,
505 interrupt_kernel);
506 }
507
508 corrected_cpu_counters[i].idle = delta_cpu_counters[i].idle -
509 interrupt_idle;
510
511 corrected_cpu_counters[i].user = delta_cpu_counters[i].user -
512 interrupt_user;
513
514 corrected_cpu_counters[i].kernel = delta_cpu_counters[i].kernel -
515 interrupt_kernel;
516
517 corrected_cpu_counters[i].interrupt = delta_cpu_counters[i].interrupt;
518
519 if (debug) {
520 print_cpu_time_counters("corrected_cpu_counters",
521 i,
522 corrected_cpu_counters);
523 }
524
525 /* I was going to check for going less than zero, but since all
526 the calculations are in unsigned quantities that would seem to
527 be a triffle silly... raj 2005-01-28 */
528
529 /* ok, now we sum the numbers again, this time including interrupt
530 */
531
532 total_cpu_nsec =
533 corrected_cpu_counters[i].idle +
534 corrected_cpu_counters[i].user +
535 corrected_cpu_counters[i].kernel +
536 corrected_cpu_counters[i].interrupt;
537
538 /* and recalculate our fractions we are really only going to use
539 fraction_idle, but lets calculate the rest just for the heck of
540 it. one day we may want to display them. raj 2005-01-28 */
541
542 /* multiply by 100 and divide by total and you get whole
543 percentages. multiply by 1000 and divide by total and you get
544 tenths of percentages. multiply by 10000 and divide by total
545 and you get hundredths of percentages. etc etc etc raj
546 2005-01-28 */
547 fraction_idle =
548 (corrected_cpu_counters[i].idle * CALC_ACCURACY) / total_cpu_nsec;
549
550 fraction_user =
551 (corrected_cpu_counters[i].user * CALC_ACCURACY) / total_cpu_nsec;
552
553 fraction_kernel =
554 (corrected_cpu_counters[i].kernel * CALC_ACCURACY) / total_cpu_nsec;
555
556 fraction_interrupt =
557 (corrected_cpu_counters[i].interrupt * CALC_ACCURACY) / total_cpu_nsec;
558
559 if (debug) {
560 fprintf(where,"\tfraction_idle %lu\n",fraction_idle);
561 fprintf(where,"\tfraction_user %lu\n",fraction_user);
562 fprintf(where,"\tfraction_kernel %lu\n",fraction_kernel);
563 fprintf(where,"\tfraction_interrupt %lu\n",fraction_interrupt);
564 }
565
566 /* and finally, what is our CPU utilization? */
567 lib_local_per_cpu_util[i] = 100.0 - (((float)fraction_idle /
568 (float)CALC_ACCURACY) * 100.0);
569 lib_local_per_cpu_util[i] *= correction_factor;
570 if (debug) {
571 fprintf(where,
572 "lib_local_per_cpu_util[%d] %g cf %f\n",
573 i,
574 lib_local_per_cpu_util[i],
575 correction_factor);
576 }
577 lib_local_cpu_stats.cpu_util += lib_local_per_cpu_util[i];
578 }
579 /* we want the average across all n processors */
580 lib_local_cpu_stats.cpu_util /= (float)lib_num_loc_cpus;
581
582 return lib_local_cpu_stats.cpu_util;
583 }
584
585 void
cpu_start_internal(void)586 cpu_start_internal(void)
587 {
588 get_cpu_time_counters(starting_cpu_counters);
589 return;
590 }
591
592 void
cpu_stop_internal(void)593 cpu_stop_internal(void)
594 {
595 get_cpu_time_counters(ending_cpu_counters);
596 }
597