1 char   netcpu_looper_id[]="\
2 @(#)netcpu_looper.c (c) Copyright 2005-2012. Version 2.6.0";
3 
4 /* netcpu_looper.c
5 
6    Implement the soaker process specific portions of netperf CPU
7    utilization measurements. These are broken-out into a separate file
8    to make life much nicer over in netlib.c which had become a maze of
9    twisty, CPU-util-related, #ifdefs, all different.  raj 2005-01-26
10    */
11 
12 #ifdef HAVE_CONFIG_H
13 #include <config.h>
14 #endif
15 
16 #include <stdio.h>
17 
18 #ifdef HAVE_FCNTL_H
19 # include <fcntl.h>
20 #endif
21 #if HAVE_UNISTD_H
22 # include <unistd.h>
23 #endif
24 #if defined(HAVE_MMAP) || defined(HAVE_SYS_MMAN_H)
25 # include <sys/mman.h>
26 #else
27 # error netcpu_looper requires mmap
28 #endif
29 
30 #if TIME_WITH_SYS_TIME
31 # include <sys/time.h>
32 # include <time.h>
33 #else
34 # if HAVE_SYS_TIME_H
35 #  include <sys/time.h>
36 # else
37 #  include <time.h>
38 # endif
39 #endif
40 
41 #if HAVE_SYS_TYPES_H
42 # include <sys/types.h>
43 #endif
44 
45 #if HAVE_SYS_WAIT_H
46 # include <sys/wait.h>
47 #endif
48 
49 #ifdef HAVE_SIGNAL_H
50 #include <signal.h>
51 #endif
52 
53 #ifdef HAVE_ERRNO_H
54 #include <errno.h>
55 #endif
56 
57 #include "netsh.h"
58 #include "netlib.h"
59 
60 #define PAGES_PER_CHILD 2
61 
62 /* the lib_start_count and lib_end_count arrays hold the starting
63    and ending values of whatever is counting when the system is
64    idle. The rate at which this increments during a test is compared
65    with a previous calibrarion to arrive at a CPU utilization
66    percentage. raj 2005-01-26 */
67 static uint64_t  lib_start_count[MAXCPUS];
68 static uint64_t  lib_end_count[MAXCPUS];
69 
70 static int *cpu_mappings;
71 
72 static int lib_idle_fd;
73 static uint64_t *lib_idle_address[MAXCPUS];
74 static long     *lib_base_pointer;
75 static pid_t     lib_idle_pids[MAXCPUS];
76 static int       lib_loopers_running=0;
77 
78 /* we used to use this code to bind the loopers, but since we have
79    decided to enable processor affinity for the actual
80    netperf/netserver processes we will use that affinity routine,
81    which happens to know about more systems than this */
82 
83 #ifdef NOTDEF
84 static void
bind_to_processor(int child_num)85 bind_to_processor(int child_num)
86 {
87   /* This routine will bind the calling process to a particular */
88   /* processor. We are not choosy as to which processor, so it will be */
89   /* the process id mod the number of processors - shifted by one for */
90   /* those systems which name processor starting from one instead of */
91   /* zero. on those systems where I do not yet know how to bind a */
92   /* process to a processor, this routine will be a no-op raj 10/95 */
93 
94   /* just as a reminder, this is *only* for the looper processes, not */
95   /* the actual measurement processes. those will, should, MUST float */
96   /* or not float from CPU to CPU as controlled by the operating */
97   /* system defaults. raj 12/95 */
98 
99 #ifdef __hpux
100 #include <sys/syscall.h>
101 #include <sys/mp.h>
102 
103   int old_cpu = -2;
104 
105   if (debug) {
106     fprintf(where,
107             "child %d asking for CPU %d as pid %d with %d CPUs\n",
108             child_num,
109             (child_num % lib_num_loc_cpus),
110             getpid(),
111             lib_num_loc_cpus);
112     fflush(where);
113   }
114 
115   SETPROCESS((child_num % lib_num_loc_cpus), getpid());
116   return;
117 
118 #else
119 #if defined(__sun) && defined(__SVR4)
120  /* should only be Solaris */
121 #include <sys/processor.h>
122 #include <sys/procset.h>
123 
124   int old_binding;
125 
126   if (debug) {
127     fprintf(where,
128             "bind_to_processor: child %d asking for CPU %d as pid %d with %d CPUs\n",
129             child_num,
130             (child_num % lib_num_loc_cpus),
131             getpid(),
132             lib_num_loc_cpus);
133     fflush(where);
134   }
135 
136   if (processor_bind(P_PID,
137                      getpid(),
138                      (child_num % lib_num_loc_cpus),
139                       &old_binding) != 0) {
140     fprintf(where,"bind_to_processor: unable to perform processor binding\n");
141     fprintf(where,"                   errno %d\n",errno);
142     fflush(where);
143   }
144   return;
145 #else
146 #ifdef WIN32
147 
148   if (!SetThreadAffinityMask(GetCurrentThread(), (ULONG_PTR)1 << (child_num % lib_num_loc_cpus))) {
149     perror("SetThreadAffinityMask failed");
150     fflush(stderr);
151   }
152 
153   if (debug) {
154     fprintf(where,
155             "bind_to_processor: child %d asking for CPU %d of %d CPUs\n",
156             child_num,
157             (child_num % lib_num_loc_cpus),
158             lib_num_loc_cpus);
159     fflush(where);
160   }
161 
162 #endif
163   return;
164 #endif /* __sun && _SVR4 */
165 #endif /* __hpux */
166 }
167 #endif
168 
169  /* sit_and_spin will just spin about incrementing a value */
170  /* this value will either be in a memory mapped region on Unix shared */
171  /* by each looper process, or something appropriate on Windows/NT */
172  /* (malloc'd or such). This routine is reasonably ugly in that it has */
173  /* priority manipulating code for lots of different operating */
174  /* systems. This routine never returns. raj 1/96 */
175 
176 static void
sit_and_spin(int child_index)177 sit_and_spin(int child_index)
178 
179 {
180   uint64_t *my_counter_ptr;
181 
182  /* only use C stuff if we are not WIN32 unless and until we */
183  /* switch from CreateThread to _beginthread. raj 1/96 */
184 #ifndef WIN32
185   /* we are the child. we could decide to exec some separate */
186   /* program, but that doesn't really seem worthwhile - raj 4/95 */
187   if (debug > 1) {
188     fprintf(where,
189             "Looper child %d is born, pid %d\n",
190             child_index,
191             getpid());
192     fflush(where);
193   }
194 
195 #endif /* WIN32 */
196 
197   /* reset our base pointer to be at the appropriate offset */
198   my_counter_ptr = (uint64_t *) ((char *)lib_base_pointer +
199                              (netlib_get_page_size() *
200                               PAGES_PER_CHILD * child_index));
201 
202   /* in the event we are running on an MP system, it would */
203   /* probably be good to bind the soaker processes to specific */
204   /* processors. I *think* this is the most reasonable thing to */
205   /* do, and would be closes to simulating the information we get */
206   /* on HP-UX with pstat. I could put all the system-specific code */
207   /* here, but will "abstract it into another routine to keep this */
208   /* area more readable. I'll probably do the same thine with the */
209   /* "low pri code" raj 10/95 */
210 
211   /* since we are "flying blind" wrt where we should bind the looper
212      processes, we want to use the cpu_map that was prepared by netlib
213      rather than assume that the CPU ids on the system start at zero
214      and are contiguous. raj 2006-04-03 */
215   bind_to_specific_processor(child_index % lib_num_loc_cpus,1);
216 
217   for (*my_counter_ptr = 0L;
218        ;
219        (*my_counter_ptr)++) {
220     if (!(*lib_base_pointer % 1)) {
221       /* every once and again, make sure that our process priority is */
222       /* nice and low. also, by making system calls, it may be easier */
223       /* for us to be pre-empted by something that needs to do useful */
224       /* work - like the thread of execution actually sending and */
225       /* receiving data across the network :) */
226 #ifdef _AIX
227       int pid,prio;
228 
229       prio = PRIORITY;
230       pid = getpid();
231       /* if you are not root, this call will return EPERM - why one */
232       /* cannot change one's own priority to  lower value is beyond */
233       /* me. raj 2/26/96 */
234       setpri(pid, prio);
235 #else /* _AIX */
236 #ifdef __sgi
237       int pid,prio;
238 
239       prio = PRIORITY;
240       pid = getpid();
241       schedctl(NDPRI, pid, prio);
242       sginap(0);
243 #else /* __sgi */
244 #ifdef WIN32
245       SetThreadPriority(GetCurrentThread(),THREAD_PRIORITY_IDLE);
246 #else /* WIN32 */
247 #if defined(__sun) && defined(__SVR4)
248 #include <sys/types.h>
249 #include <sys/priocntl.h>
250 #include <sys/rtpriocntl.h>
251 #include <sys/tspriocntl.h>
252       /* I would *really* like to know how to use priocntl to make the */
253       /* priority low for this looper process. however, either my mind */
254       /* is addled, or the manpage in section two for priocntl is not */
255       /* terribly helpful - for one, it has no examples :( so, if you */
256       /* can help, I'd love to hear from you. in the meantime, we will */
257       /* rely on nice(39). raj 2/26/96 */
258       nice(39);
259 #else /* __sun && __SVR4 */
260       nice(39);
261 #endif /* __sun && _SVR4 */
262 #endif /* WIN32 */
263 #endif /* __sgi */
264 #endif /* _AIX */
265     }
266   }
267 }
268 
269 
270 
271  /* this routine will start all the looper processes or threads for */
272  /* measuring CPU utilization. */
273 
274 static void
start_looper_processes()275 start_looper_processes()
276 {
277 
278   unsigned int      i, file_size;
279 
280   /* we want at least two pages for each processor. the */
281   /* child for any one processor will write to the first of his two */
282   /* pages, and the second page will be a buffer in case there is page */
283   /* prefetching. if your system pre-fetches more than a single page, */
284   /* well, you'll have to modify this or live with it :( raj 4/95 */
285 
286   file_size = ((netlib_get_page_size() * PAGES_PER_CHILD) *
287                lib_num_loc_cpus);
288 
289 #ifndef WIN32
290 
291   /* we we are not using WINDOWS NT (or 95 actually :), then we want */
292   /* to create a memory mapped region so we can see all the counting */
293   /* rates of the loopers */
294 
295   /* could we just use an anonymous memory region for this? it is */
296   /* possible that using a mmap()'ed "real" file, while convenient for */
297   /* debugging, could result in some filesystem activity - like */
298   /* metadata updates? raj 4/96 */
299   lib_idle_fd = open("/tmp/netperf_cpu",O_RDWR | O_CREAT | O_EXCL);
300 
301   if (lib_idle_fd == -1) {
302     fprintf(where,"create_looper: file creation; errno %d\n",errno);
303     fflush(where);
304     exit(1);
305   }
306 
307   if (chmod("/tmp/netperf_cpu",0644) == -1) {
308     fprintf(where,"create_looper: chmod; errno %d\n",errno);
309     fflush(where);
310     exit(1);
311   }
312 
313   /* with the file descriptor in place, lets be sure that the file is */
314   /* large enough. */
315 
316   if (truncate("/tmp/netperf_cpu",file_size) == -1) {
317     fprintf(where,"create_looper: truncate: errno %d\n",errno);
318     fflush(where);
319     exit(1);
320   }
321 
322   /* the file should be large enough now, so we can mmap it */
323 
324   /* if the system does not have MAP_VARIABLE, just define it to */
325   /* be zero. it is only used/needed on HP-UX (?) raj 4/95 */
326 #ifndef MAP_VARIABLE
327 #define MAP_VARIABLE 0x0000
328 #endif /* MAP_VARIABLE */
329 #ifndef MAP_FILE
330 #define MAP_FILE 0x0000
331 #endif /* MAP_FILE */
332   if ((lib_base_pointer = (long *)mmap(NULL,
333                                        file_size,
334                                        PROT_READ | PROT_WRITE,
335                                        MAP_FILE | MAP_SHARED | MAP_VARIABLE,
336                                        lib_idle_fd,
337                                        0)) == (long *)-1) {
338     fprintf(where,"create_looper: mmap: errno %d\n",errno);
339     fflush(where);
340     exit(1);
341   }
342 
343 
344   if (debug > 1) {
345     fprintf(where,"num CPUs %d, file_size %d, lib_base_pointer %p\n",
346             lib_num_loc_cpus,
347             file_size,
348             lib_base_pointer);
349     fflush(where);
350   }
351 
352   /* we should have a valid base pointer. lets fork */
353 
354   for (i = 0; i < (unsigned int)lib_num_loc_cpus; i++) {
355     switch (lib_idle_pids[i] = fork()) {
356     case -1:
357       perror("netperf: fork");
358       exit(1);
359     case 0:
360       /* we are the child. we could decide to exec some separate */
361       /* program, but that doesn't really seem worthwhile - raj 4/95 */
362 
363       signal(SIGTERM, SIG_DFL);
364       sit_and_spin(i);
365 
366       /* we should never really get here, but if we do, just exit(0) */
367       exit(0);
368       break;
369     default:
370       /* we must be the parent */
371       lib_idle_address[i] = (uint64_t *) ((char *)lib_base_pointer +
372                                       (netlib_get_page_size() *
373                                        PAGES_PER_CHILD * i));
374       if (debug) {
375         fprintf(where,"lib_idle_address[%d] is %p\n",
376                 i,
377                 lib_idle_address[i]);
378         fflush(where);
379       }
380     }
381   }
382 #else
383   /* we are compiled -DWIN32 */
384   if ((lib_base_pointer = malloc(file_size)) == NULL) {
385     fprintf(where,
386             "create_looper_process could not malloc %d bytes\n",
387             file_size);
388     fflush(where);
389     exit(1);
390   }
391 
392   /* now, create all the threads */
393   for(i = 0; i < (unsigned int)lib_num_loc_cpus; i++) {
394     long place_holder;
395     if ((lib_idle_pids[i] = CreateThread(0,
396                                          0,
397                                          (LPTHREAD_START_ROUTINE)sit_and_spin,
398                                          (LPVOID)(ULONG_PTR)i,
399                                          0,
400                                          &place_holder)) == NULL ) {
401       fprintf(where,
402               "create_looper_process: CreateThread failed\n");
403       fflush(where);
404       /* I wonder if I need to look for other threads to kill? */
405       exit(1);
406     }
407     lib_idle_address[i] = (long *) ((char *)lib_base_pointer +
408                                     (netlib_get_page_size() *
409                                      PAGES_PER_CHILD * i));
410     if (debug) {
411       fprintf(where,"lib_idle_address[%d] is %p\n",
412               i,
413               lib_idle_address[i]);
414       fflush(where);
415     }
416   }
417 #endif /* WIN32 */
418 
419   /* we need to have the looper processes settled-in before we do */
420   /* anything with them, so lets sleep for say 30 seconds. raj 4/95 */
421 
422   sleep(30);
423 }
424 
425 void
cpu_util_init(void)426 cpu_util_init(void)
427 {
428   cpu_method = LOOPER;
429 
430   /* we want to get the looper processes going */
431   if (!lib_loopers_running) {
432     start_looper_processes();
433     lib_loopers_running = 1;
434   }
435 
436   return;
437 }
438 
439 /* clean-up any left-over CPU util resources - looper processes,
440    files, whatever.  raj 2005-01-26 */
441 void
cpu_util_terminate()442 cpu_util_terminate() {
443 
444 #ifdef WIN32
445   /* it would seem that if/when the process exits, all the threads */
446   /* will go away too, so I don't think I need any explicit thread */
447   /* killing calls here. raj 1/96 */
448 #else
449 
450   int i;
451 
452   /* now go through and kill-off all the child processes */
453   for (i = 0; i < lib_num_loc_cpus; i++){
454     /* SIGKILL can leave core files behind - thanks to Steinar Haug */
455     /* for pointing that out. */
456     kill(lib_idle_pids[i],SIGTERM);
457   }
458   lib_loopers_running = 0;
459   /* reap the children */
460   while(waitpid(-1, NULL, WNOHANG) > 0) { }
461 
462   /* finally, unlink the mmaped file */
463   munmap((caddr_t)lib_base_pointer,
464          ((netlib_get_page_size() * PAGES_PER_CHILD) *
465           lib_num_loc_cpus));
466   unlink("/tmp/netperf_cpu");
467 #endif
468   return;
469 }
470 
471 int
get_cpu_method(void)472 get_cpu_method(void)
473 {
474   return LOOPER;
475 }
476 
477  /* calibrate_looper */
478 
479  /* Loop a number of iterations, sleeping interval seconds each and */
480  /* count how high the idle counter gets each time. Return  the */
481  /* measured cpu rate to the calling routine. raj 4/95 */
482 
483 float
calibrate_idle_rate(int iterations,int interval)484 calibrate_idle_rate (int iterations, int interval)
485 {
486 
487   uint64_t
488     firstcnt[MAXCPUS],
489     secondcnt[MAXCPUS];
490 
491   float
492     elapsed,
493     temp_rate,
494     rate[MAXTIMES],
495     local_maxrate;
496 
497   long
498     sec,
499     usec;
500 
501   int
502     i,
503     j;
504 
505   struct  timeval time1, time2 ;
506   struct  timezone tz;
507 
508   if (iterations > MAXTIMES) {
509     iterations = MAXTIMES;
510   }
511 
512   local_maxrate = (float)-1.0;
513 
514   for(i = 0; i < iterations; i++) {
515     rate[i] = (float)0.0;
516     for (j = 0; j < lib_num_loc_cpus; j++) {
517       firstcnt[j] = *(lib_idle_address[j]);
518     }
519     gettimeofday (&time1, &tz);
520     sleep(interval);
521     gettimeofday (&time2, &tz);
522 
523     if (time2.tv_usec < time1.tv_usec)
524       {
525         time2.tv_usec += 1000000;
526         time2.tv_sec -=1;
527       }
528     sec = time2.tv_sec - time1.tv_sec;
529     usec = time2.tv_usec - time1.tv_usec;
530     elapsed = (float)sec + ((float)usec/(float)1000000.0);
531 
532     if(debug) {
533       fprintf(where, "Calibration for counter run: %d\n",i);
534       fprintf(where,"\tsec = %ld usec = %ld\n",sec,usec);
535       fprintf(where,"\telapsed time = %g\n",elapsed);
536     }
537 
538     for (j = 0; j < lib_num_loc_cpus; j++) {
539       secondcnt[j] = *(lib_idle_address[j]);
540       if(debug) {
541         /* I know that there are situations where compilers know about */
542         /* long long, but the library fucntions do not... raj 4/95 */
543         fprintf(where,
544                 "\tfirstcnt[%d] = 0x%8.8lx%8.8lx secondcnt[%d] = 0x%8.8lx%8.8lx\n",
545                 j,
546                 (uint32_t)(firstcnt[j]>>32),
547                 (uint32_t)(firstcnt[j]&0xffffffff),
548                 j,
549                 (uint32_t)(secondcnt[j]>>32),
550                 (uint32_t)(secondcnt[j]&0xffffffff));
551       }
552       /* we assume that it would wrap no more than once. we also */
553       /* assume that the result of subtracting will "fit" raj 4/95 */
554       temp_rate = (secondcnt[j] >= firstcnt[j]) ?
555         (float)(secondcnt[j] - firstcnt[j])/elapsed :
556           (float)(secondcnt[j]-firstcnt[j]+MAXLONG)/elapsed;
557       if (temp_rate > rate[i]) rate[i] = temp_rate;
558       if(debug) {
559         fprintf(where,"\trate[%d] = %g\n",i,rate[i]);
560         fflush(where);
561       }
562       if (local_maxrate < rate[i]) local_maxrate = rate[i];
563     }
564   }
565   if(debug) {
566     fprintf(where,"\tlocal maxrate = %g per sec. \n",local_maxrate);
567     fflush(where);
568   }
569   return local_maxrate;
570 }
571 
572 
573 static void
get_cpu_idle(uint64_t * res)574 get_cpu_idle (uint64_t *res)
575 {
576   int i;
577 
578   for (i = 0; i < lib_num_loc_cpus; i++){
579     res[i] = *lib_idle_address[i];
580   }
581 
582 }
583 
584 float
calc_cpu_util_internal(float elapsed_time)585 calc_cpu_util_internal(float elapsed_time)
586 {
587   int i;
588   float correction_factor;
589   float actual_rate;
590 
591   memset(&lib_local_cpu_stats, 0, sizeof(lib_local_cpu_stats));
592 
593   /* It is possible that the library measured a time other than */
594   /* the one that the user want for the cpu utilization */
595   /* calculations - for example, tests that were ended by */
596   /* watchdog timers such as the udp stream test. We let these */
597   /* tests tell up what the elapsed time should be. */
598 
599   if (elapsed_time != 0.0) {
600     correction_factor = (float) 1.0 +
601       ((lib_elapsed - elapsed_time) / elapsed_time);
602   }
603   else {
604     correction_factor = (float) 1.0;
605   }
606 
607   for (i = 0; i < lib_num_loc_cpus; i++) {
608 
609     /* it would appear that on some systems, in loopback, nice is
610      *very* effective, causing the looper process to stop dead in its
611      tracks. if this happens, we need to ensure that the calculation
612      does not go south. raj 6/95 and if we run completely out of idle,
613      the same thing could in theory happen to the USE_KSTAT path. raj
614      8/2000 */
615 
616     if (lib_end_count[i] == lib_start_count[i]) {
617       lib_end_count[i]++;
618     }
619 
620     actual_rate = (lib_end_count[i] > lib_start_count[i]) ?
621       (float)(lib_end_count[i] - lib_start_count[i])/lib_elapsed :
622       (float)(lib_end_count[i] - lib_start_count[i] +
623 	      MAXLONG)/ lib_elapsed;
624     if (debug) {
625       fprintf(where,
626               "calc_cpu_util: actual_rate on processor %d is %f start 0x%8.8lx%8.8lx end 0x%8.8lx%8.8lx\n",
627               i,
628               actual_rate,
629               (uint32_t)(lib_start_count[i]>>32),
630               (uint32_t)(lib_start_count[i]&0xffffffff),
631               (uint32_t)(lib_end_count[i]>>32),
632               (uint32_t)(lib_end_count[i]&0xffffffff));
633     }
634     lib_local_per_cpu_util[i] = (lib_local_maxrate - actual_rate) /
635       lib_local_maxrate * 100;
636     lib_local_per_cpu_util[i] *= correction_factor;
637     lib_local_cpu_stats.cpu_util += lib_local_per_cpu_util[i];
638   }
639   /* we want the average across all n processors */
640   lib_local_cpu_stats.cpu_util /= (float)lib_num_loc_cpus;
641 
642   return lib_local_cpu_stats.cpu_util;
643 }
644 
645 void
cpu_start_internal(void)646 cpu_start_internal(void)
647 {
648   get_cpu_idle(lib_start_count);
649   return;
650 }
651 
652 void
cpu_stop_internal(void)653 cpu_stop_internal(void)
654 {
655   get_cpu_idle(lib_end_count);
656 }
657