1 /*
2 * block queue tracing application
3 *
4 * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5 * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
6 *
7 * Rewrite to have a single thread per CPU (managing all devices on that CPU)
8 * Alan D. Brunelle <alan.brunelle@hp.com> - January 2009
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 *
24 */
25
26 #include <errno.h>
27 #include <stdarg.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <fcntl.h>
32 #include <getopt.h>
33 #include <sched.h>
34 #include <unistd.h>
35 #include <poll.h>
36 #include <signal.h>
37 #include <pthread.h>
38 #include <locale.h>
39 #include <sys/ioctl.h>
40 #include <sys/types.h>
41 #include <sys/stat.h>
42 #include <sys/vfs.h>
43 #include <sys/mman.h>
44 #include <sys/param.h>
45 #include <sys/time.h>
46 #include <sys/resource.h>
47 #include <sys/socket.h>
48 #include <netinet/in.h>
49 #include <arpa/inet.h>
50 #include <netdb.h>
51 #include <sys/sendfile.h>
52
53 #include "btt/list.h"
54 #include "blktrace.h"
55
56 /*
57 * You may want to increase this even more, if you are logging at a high
58 * rate and see skipped/missed events
59 */
60 #define BUF_SIZE (512 * 1024)
61 #define BUF_NR (4)
62
63 #define FILE_VBUF_SIZE (128 * 1024)
64
65 #define DEBUGFS_TYPE (0x64626720)
66 #define TRACE_NET_PORT (8462)
67
68 enum {
69 Net_none = 0,
70 Net_server,
71 Net_client,
72 };
73
74 enum thread_status {
75 Th_running,
76 Th_leaving,
77 Th_error
78 };
79
80 /*
81 * Generic stats collected: nevents can be _roughly_ estimated by data_read
82 * (discounting pdu...)
83 *
84 * These fields are updated w/ pdc_dr_update & pdc_nev_update below.
85 */
86 struct pdc_stats {
87 unsigned long long data_read;
88 unsigned long long nevents;
89 };
90
91 struct devpath {
92 struct list_head head;
93 char *path; /* path to device special file */
94 char *buts_name; /* name returned from bt kernel code */
95 struct pdc_stats *stats;
96 int fd, idx, ncpus;
97 unsigned long long drops;
98
99 /*
100 * For piped output only:
101 *
102 * Each tracer will have a tracer_devpath_head that it will add new
103 * data onto. It's list is protected above (tracer_devpath_head.mutex)
104 * and it will signal the processing thread using the dp_cond,
105 * dp_mutex & dp_entries variables above.
106 */
107 struct tracer_devpath_head *heads;
108
109 /*
110 * For network server mode only:
111 */
112 struct cl_host *ch;
113 u32 cl_id;
114 time_t cl_connect_time;
115 struct io_info *ios;
116 };
117
118 /*
119 * For piped output to stdout we will have each tracer thread (one per dev)
120 * tack buffers read from the relay queues on a per-device list.
121 *
122 * The main thread will then collect trace buffers from each of lists in turn.
123 *
124 * We will use a mutex to guard each of the trace_buf list. The tracers
125 * can then signal the main thread using <dp_cond,dp_mutex> and
126 * dp_entries. (When dp_entries is 0, and a tracer adds an entry it will
127 * signal. When dp_entries is 0, the main thread will wait for that condition
128 * to be signalled.)
129 *
130 * adb: It may be better just to have a large buffer per tracer per dev,
131 * and then use it as a ring-buffer. This would certainly cut down a lot
132 * of malloc/free thrashing, at the cost of more memory movements (potentially).
133 */
134 struct trace_buf {
135 struct list_head head;
136 struct devpath *dpp;
137 void *buf;
138 int cpu, len;
139 };
140
141 struct tracer_devpath_head {
142 pthread_mutex_t mutex;
143 struct list_head head;
144 struct trace_buf *prev;
145 };
146
147 /*
148 * Used to handle the mmap() interfaces for output file (containing traces)
149 */
150 struct mmap_info {
151 void *fs_buf;
152 unsigned long long fs_size, fs_max_size, fs_off, fs_buf_len;
153 unsigned long buf_size, buf_nr;
154 int pagesize;
155 };
156
157 /*
158 * Each thread doing work on a (client) side of blktrace will have one
159 * of these. The ios array contains input/output information, pfds holds
160 * poll() data. The volatile's provide flags to/from the main executing
161 * thread.
162 */
163 struct tracer {
164 struct list_head head;
165 struct io_info *ios;
166 struct pollfd *pfds;
167 pthread_t thread;
168 int cpu, nios;
169 volatile int status, is_done;
170 };
171
172 /*
173 * networking stuff follows. we include a magic number so we know whether
174 * to endianness convert or not.
175 *
176 * The len field is overloaded:
177 * 0 - Indicates an "open" - allowing the server to set up for a dev/cpu
178 * 1 - Indicates a "close" - Shut down connection orderly
179 *
180 * The cpu field is overloaded on close: it will contain the number of drops.
181 */
182 struct blktrace_net_hdr {
183 u32 magic; /* same as trace magic */
184 char buts_name[32]; /* trace name */
185 u32 cpu; /* for which cpu */
186 u32 max_cpus;
187 u32 len; /* length of following trace data */
188 u32 cl_id; /* id for set of client per-cpu connections */
189 u32 buf_size; /* client buf_size for this trace */
190 u32 buf_nr; /* client buf_nr for this trace */
191 u32 page_size; /* client page_size for this trace */
192 };
193
194 /*
195 * Each host encountered has one of these. The head is used to link this
196 * on to the network server's ch_list. Connections associated with this
197 * host are linked on conn_list, and any devices traced on that host
198 * are connected on the devpaths list.
199 */
200 struct cl_host {
201 struct list_head head;
202 struct list_head conn_list;
203 struct list_head devpaths;
204 struct net_server_s *ns;
205 char *hostname;
206 struct in_addr cl_in_addr;
207 int connects, ndevs, cl_opens;
208 };
209
210 /*
211 * Each connection (client to server socket ('fd')) has one of these. A
212 * back reference to the host ('ch'), and lists headers (for the host
213 * list, and the network server conn_list) are also included.
214 */
215 struct cl_conn {
216 struct list_head ch_head, ns_head;
217 struct cl_host *ch;
218 int fd, ncpus;
219 time_t connect_time;
220 };
221
222 /*
223 * The network server requires some poll structures to be maintained -
224 * one per conection currently on conn_list. The nchs/ch_list values
225 * are for each host connected to this server. The addr field is used
226 * for scratch as new connections are established.
227 */
228 struct net_server_s {
229 struct list_head conn_list;
230 struct list_head ch_list;
231 struct pollfd *pfds;
232 int listen_fd, connects, nchs;
233 struct sockaddr_in addr;
234 };
235
236 /*
237 * This structure is (generically) used to providide information
238 * for a read-to-write set of values.
239 *
240 * ifn & ifd represent input information
241 *
242 * ofn, ofd, ofp, obuf & mmap_info are used for output file (optionally).
243 */
244 struct io_info {
245 struct devpath *dpp;
246 FILE *ofp;
247 char *obuf;
248 struct cl_conn *nc; /* Server network connection */
249
250 /*
251 * mmap controlled output files
252 */
253 struct mmap_info mmap_info;
254
255 /*
256 * Client network fields
257 */
258 unsigned int ready;
259 unsigned long long data_queued;
260
261 /*
262 * Input/output file descriptors & names
263 */
264 int ifd, ofd;
265 char ifn[MAXPATHLEN + 64];
266 char ofn[MAXPATHLEN + 64];
267 };
268
269 static char blktrace_version[] = "2.0.0";
270
271 /*
272 * Linkage to blktrace helper routines (trace conversions)
273 */
274 int data_is_native = -1;
275
276 static int ndevs;
277 static int ncpus;
278 static int pagesize;
279 static int act_mask = ~0U;
280 static int kill_running_trace;
281 static int stop_watch;
282 static int piped_output;
283
284 static char *debugfs_path = "/sys/kernel/debug";
285 static char *output_name;
286 static char *output_dir;
287
288 static unsigned long buf_size = BUF_SIZE;
289 static unsigned long buf_nr = BUF_NR;
290
291 static FILE *pfp;
292
293 static LIST_HEAD(devpaths);
294 static LIST_HEAD(tracers);
295
296 static volatile int done;
297
298 /*
299 * tracer threads add entries, the main thread takes them off and processes
300 * them. These protect the dp_entries variable.
301 */
302 static pthread_cond_t dp_cond = PTHREAD_COND_INITIALIZER;
303 static pthread_mutex_t dp_mutex = PTHREAD_MUTEX_INITIALIZER;
304 static volatile int dp_entries;
305
306 /*
307 * These synchronize master / thread interactions.
308 */
309 static pthread_cond_t mt_cond = PTHREAD_COND_INITIALIZER;
310 static pthread_mutex_t mt_mutex = PTHREAD_MUTEX_INITIALIZER;
311 static volatile int nthreads_running;
312 static volatile int nthreads_leaving;
313 static volatile int nthreads_error;
314 static volatile int tracers_run;
315
316 /*
317 * network cmd line params
318 */
319 static struct sockaddr_in hostname_addr;
320 static char hostname[MAXHOSTNAMELEN];
321 static int net_port = TRACE_NET_PORT;
322 static int net_use_sendfile = 1;
323 static int net_mode;
324 static int *cl_fds;
325
326 static int (*handle_pfds)(struct tracer *, int, int);
327 static int (*handle_list)(struct tracer_devpath_head *, struct list_head *);
328
329 #define S_OPTS "d:a:A:r:o:kw:vVb:n:D:lh:p:sI:"
330 static struct option l_opts[] = {
331 {
332 .name = "dev",
333 .has_arg = required_argument,
334 .flag = NULL,
335 .val = 'd'
336 },
337 {
338 .name = "input-devs",
339 .has_arg = required_argument,
340 .flag = NULL,
341 .val = 'I'
342 },
343 {
344 .name = "act-mask",
345 .has_arg = required_argument,
346 .flag = NULL,
347 .val = 'a'
348 },
349 {
350 .name = "set-mask",
351 .has_arg = required_argument,
352 .flag = NULL,
353 .val = 'A'
354 },
355 {
356 .name = "relay",
357 .has_arg = required_argument,
358 .flag = NULL,
359 .val = 'r'
360 },
361 {
362 .name = "output",
363 .has_arg = required_argument,
364 .flag = NULL,
365 .val = 'o'
366 },
367 {
368 .name = "kill",
369 .has_arg = no_argument,
370 .flag = NULL,
371 .val = 'k'
372 },
373 {
374 .name = "stopwatch",
375 .has_arg = required_argument,
376 .flag = NULL,
377 .val = 'w'
378 },
379 {
380 .name = "version",
381 .has_arg = no_argument,
382 .flag = NULL,
383 .val = 'v'
384 },
385 {
386 .name = "version",
387 .has_arg = no_argument,
388 .flag = NULL,
389 .val = 'V'
390 },
391 {
392 .name = "buffer-size",
393 .has_arg = required_argument,
394 .flag = NULL,
395 .val = 'b'
396 },
397 {
398 .name = "num-sub-buffers",
399 .has_arg = required_argument,
400 .flag = NULL,
401 .val = 'n'
402 },
403 {
404 .name = "output-dir",
405 .has_arg = required_argument,
406 .flag = NULL,
407 .val = 'D'
408 },
409 {
410 .name = "listen",
411 .has_arg = no_argument,
412 .flag = NULL,
413 .val = 'l'
414 },
415 {
416 .name = "host",
417 .has_arg = required_argument,
418 .flag = NULL,
419 .val = 'h'
420 },
421 {
422 .name = "port",
423 .has_arg = required_argument,
424 .flag = NULL,
425 .val = 'p'
426 },
427 {
428 .name = "no-sendfile",
429 .has_arg = no_argument,
430 .flag = NULL,
431 .val = 's'
432 },
433 {
434 .name = NULL,
435 }
436 };
437
438 static char usage_str[] = \
439 "-d <dev> [ -r debugfs path ] [ -o <output> ] [-k ] [ -w time ]\n" \
440 "[ -a action ] [ -A action mask ] [ -I <devs file> ] [ -v ]\n\n" \
441 "\t-d Use specified device. May also be given last after options\n" \
442 "\t-r Path to mounted debugfs, defaults to /sys/kernel/debug\n" \
443 "\t-o File(s) to send output to\n" \
444 "\t-D Directory to prepend to output file names\n" \
445 "\t-k Kill a running trace\n" \
446 "\t-w Stop after defined time, in seconds\n" \
447 "\t-a Only trace specified actions. See documentation\n" \
448 "\t-A Give trace mask as a single value. See documentation\n" \
449 "\t-b Sub buffer size in KiB\n" \
450 "\t-n Number of sub buffers\n" \
451 "\t-l Run in network listen mode (blktrace server)\n" \
452 "\t-h Run in network client mode, connecting to the given host\n" \
453 "\t-p Network port to use (default 8462)\n" \
454 "\t-s Make the network client NOT use sendfile() to transfer data\n" \
455 "\t-I Add devices found in <devs file>\n" \
456 "\t-V Print program version info\n\n";
457
clear_events(struct pollfd * pfd)458 static void clear_events(struct pollfd *pfd)
459 {
460 pfd->events = 0;
461 pfd->revents = 0;
462 }
463
net_client_use_sendfile(void)464 static inline int net_client_use_sendfile(void)
465 {
466 return net_mode == Net_client && net_use_sendfile;
467 }
468
net_client_use_send(void)469 static inline int net_client_use_send(void)
470 {
471 return net_mode == Net_client && !net_use_sendfile;
472 }
473
use_tracer_devpaths(void)474 static inline int use_tracer_devpaths(void)
475 {
476 return piped_output || net_client_use_send();
477 }
478
in_addr_eq(struct in_addr a,struct in_addr b)479 static inline int in_addr_eq(struct in_addr a, struct in_addr b)
480 {
481 return a.s_addr == b.s_addr;
482 }
483
pdc_dr_update(struct devpath * dpp,int cpu,int data_read)484 static inline void pdc_dr_update(struct devpath *dpp, int cpu, int data_read)
485 {
486 dpp->stats[cpu].data_read += data_read;
487 }
488
pdc_nev_update(struct devpath * dpp,int cpu,int nevents)489 static inline void pdc_nev_update(struct devpath *dpp, int cpu, int nevents)
490 {
491 dpp->stats[cpu].nevents += nevents;
492 }
493
show_usage(char * prog)494 static void show_usage(char *prog)
495 {
496 fprintf(stderr, "Usage: %s %s %s", prog, blktrace_version, usage_str);
497 }
498
499 /*
500 * Create a timespec 'msec' milliseconds into the future
501 */
make_timespec(struct timespec * tsp,long delta_msec)502 static inline void make_timespec(struct timespec *tsp, long delta_msec)
503 {
504 struct timeval now;
505
506 gettimeofday(&now, NULL);
507 tsp->tv_sec = now.tv_sec;
508 tsp->tv_nsec = 1000L * now.tv_usec;
509
510 tsp->tv_nsec += (delta_msec * 1000000L);
511 if (tsp->tv_nsec > 1000000000L) {
512 long secs = tsp->tv_nsec / 1000000000L;
513
514 tsp->tv_sec += secs;
515 tsp->tv_nsec -= (secs * 1000000000L);
516 }
517 }
518
519 /*
520 * Add a timer to ensure wait ends
521 */
t_pthread_cond_wait(pthread_cond_t * cond,pthread_mutex_t * mutex)522 static void t_pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex)
523 {
524 struct timespec ts;
525
526 make_timespec(&ts, 50);
527 pthread_cond_timedwait(cond, mutex, &ts);
528 }
529
unblock_tracers(void)530 static void unblock_tracers(void)
531 {
532 pthread_mutex_lock(&mt_mutex);
533 tracers_run = 1;
534 pthread_cond_broadcast(&mt_cond);
535 pthread_mutex_unlock(&mt_mutex);
536 }
537
tracer_wait_unblock(struct tracer * tp)538 static void tracer_wait_unblock(struct tracer *tp)
539 {
540 pthread_mutex_lock(&mt_mutex);
541 while (!tp->is_done && !tracers_run)
542 pthread_cond_wait(&mt_cond, &mt_mutex);
543 pthread_mutex_unlock(&mt_mutex);
544 }
545
tracer_signal_ready(struct tracer * tp,enum thread_status th_status,int status)546 static void tracer_signal_ready(struct tracer *tp,
547 enum thread_status th_status,
548 int status)
549 {
550 pthread_mutex_lock(&mt_mutex);
551 tp->status = status;
552
553 if (th_status == Th_running)
554 nthreads_running++;
555 else if (th_status == Th_error)
556 nthreads_error++;
557 else
558 nthreads_leaving++;
559
560 pthread_cond_signal(&mt_cond);
561 pthread_mutex_unlock(&mt_mutex);
562 }
563
wait_tracers_ready(int ncpus_started)564 static void wait_tracers_ready(int ncpus_started)
565 {
566 pthread_mutex_lock(&mt_mutex);
567 while ((nthreads_running + nthreads_error) < ncpus_started)
568 t_pthread_cond_wait(&mt_cond, &mt_mutex);
569 pthread_mutex_unlock(&mt_mutex);
570 }
571
wait_tracers_leaving(void)572 static void wait_tracers_leaving(void)
573 {
574 pthread_mutex_lock(&mt_mutex);
575 while (nthreads_leaving < nthreads_running)
576 t_pthread_cond_wait(&mt_cond, &mt_mutex);
577 pthread_mutex_unlock(&mt_mutex);
578 }
579
init_mmap_info(struct mmap_info * mip)580 static void init_mmap_info(struct mmap_info *mip)
581 {
582 mip->buf_size = buf_size;
583 mip->buf_nr = buf_nr;
584 mip->pagesize = pagesize;
585 }
586
net_close_connection(int * fd)587 static void net_close_connection(int *fd)
588 {
589 shutdown(*fd, SHUT_RDWR);
590 close(*fd);
591 *fd = -1;
592 }
593
dpp_free(struct devpath * dpp)594 static void dpp_free(struct devpath *dpp)
595 {
596 if (dpp->stats)
597 free(dpp->stats);
598 if (dpp->ios)
599 free(dpp->ios);
600 if (dpp->path)
601 free(dpp->path);
602 if (dpp->buts_name)
603 free(dpp->buts_name);
604 free(dpp);
605 }
606
lock_on_cpu(int cpu)607 static int lock_on_cpu(int cpu)
608 {
609 #ifndef _ANDROID_
610 cpu_set_t cpu_mask;
611
612 CPU_ZERO(&cpu_mask);
613 CPU_SET(cpu, &cpu_mask);
614 if (sched_setaffinity(0, sizeof(cpu_mask), &cpu_mask) < 0)
615 return errno;
616 #endif
617
618 return 0;
619 }
620
621 #ifndef _ANDROID_
increase_limit(int resource,rlim_t increase)622 static int increase_limit(int resource, rlim_t increase)
623 {
624 struct rlimit rlim;
625 int save_errno = errno;
626
627 if (!getrlimit(resource, &rlim)) {
628 rlim.rlim_cur += increase;
629 if (rlim.rlim_cur >= rlim.rlim_max)
630 rlim.rlim_max = rlim.rlim_cur + increase;
631
632 if (!setrlimit(resource, &rlim))
633 return 1;
634 }
635
636 errno = save_errno;
637 return 0;
638 }
639 #endif
640
handle_open_failure(void)641 static int handle_open_failure(void)
642 {
643 if (errno == ENFILE || errno == EMFILE)
644 #ifndef _ANDROID_
645 return increase_limit(RLIMIT_NOFILE, 16);
646 #else
647 return -ENOSYS;
648 #endif
649 return 0;
650 }
651
handle_mem_failure(size_t length)652 static int handle_mem_failure(size_t length)
653 {
654 if (errno == ENFILE)
655 return handle_open_failure();
656 else if (errno == ENOMEM)
657 #ifndef _ANDROID_
658 return increase_limit(RLIMIT_MEMLOCK, 2 * length);
659 #else
660 return -ENOSYS;
661 #endif
662 return 0;
663 }
664
my_fopen(const char * path,const char * mode)665 static FILE *my_fopen(const char *path, const char *mode)
666 {
667 FILE *fp;
668
669 do {
670 fp = fopen(path, mode);
671 } while (fp == NULL && handle_open_failure());
672
673 return fp;
674 }
675
my_open(const char * path,int flags)676 static int my_open(const char *path, int flags)
677 {
678 int fd;
679
680 do {
681 fd = open(path, flags);
682 } while (fd < 0 && handle_open_failure());
683
684 return fd;
685 }
686
my_socket(int domain,int type,int protocol)687 static int my_socket(int domain, int type, int protocol)
688 {
689 int fd;
690
691 do {
692 fd = socket(domain, type, protocol);
693 } while (fd < 0 && handle_open_failure());
694
695 return fd;
696 }
697
my_accept(int sockfd,struct sockaddr * addr,socklen_t * addrlen)698 static int my_accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen)
699 {
700 int fd;
701
702 do {
703 fd = accept(sockfd, addr, addrlen);
704 } while (fd < 0 && handle_open_failure());
705
706 return fd;
707 }
708
my_mmap(void * addr,size_t length,int prot,int flags,int fd,off_t offset)709 static void *my_mmap(void *addr, size_t length, int prot, int flags, int fd,
710 off_t offset)
711 {
712 void *new;
713
714 do {
715 new = mmap(addr, length, prot, flags, fd, offset);
716 } while (new == MAP_FAILED && handle_mem_failure(length));
717
718 return new;
719 }
720
my_mlock(const void * addr,size_t len)721 static int my_mlock(const void *addr, size_t len)
722 {
723 int ret;
724
725 do {
726 ret = mlock(addr, len);
727 } while (ret < 0 && handle_mem_failure(len));
728
729 return ret;
730 }
731
setup_mmap(int fd,unsigned int maxlen,struct mmap_info * mip)732 static int setup_mmap(int fd, unsigned int maxlen, struct mmap_info *mip)
733 {
734 if (mip->fs_off + maxlen > mip->fs_buf_len) {
735 unsigned long nr = max(16, mip->buf_nr);
736
737 if (mip->fs_buf) {
738 munlock(mip->fs_buf, mip->fs_buf_len);
739 munmap(mip->fs_buf, mip->fs_buf_len);
740 mip->fs_buf = NULL;
741 }
742
743 mip->fs_off = mip->fs_size & (mip->pagesize - 1);
744 mip->fs_buf_len = (nr * mip->buf_size) - mip->fs_off;
745 mip->fs_max_size += mip->fs_buf_len;
746
747 if (ftruncate(fd, mip->fs_max_size) < 0) {
748 perror("setup_mmap: ftruncate");
749 return 1;
750 }
751
752 mip->fs_buf = my_mmap(NULL, mip->fs_buf_len, PROT_WRITE,
753 MAP_SHARED, fd,
754 mip->fs_size - mip->fs_off);
755 if (mip->fs_buf == MAP_FAILED) {
756 perror("setup_mmap: mmap");
757 return 1;
758 }
759 my_mlock(mip->fs_buf, mip->fs_buf_len);
760 }
761
762 return 0;
763 }
764
__stop_trace(int fd)765 static int __stop_trace(int fd)
766 {
767 /*
768 * Should be stopped, don't complain if it isn't
769 */
770 ioctl(fd, BLKTRACESTOP);
771 return ioctl(fd, BLKTRACETEARDOWN);
772 }
773
write_data(char * buf,int len)774 static int write_data(char *buf, int len)
775 {
776 int ret;
777
778 rewrite:
779 ret = fwrite(buf, len, 1, pfp);
780 if (ferror(pfp) || ret != 1) {
781 if (errno == EINTR) {
782 clearerr(pfp);
783 goto rewrite;
784 }
785
786 if (!piped_output || (errno != EPIPE && errno != EBADF)) {
787 fprintf(stderr, "write(%d) failed: %d/%s\n",
788 len, errno, strerror(errno));
789 }
790 goto err;
791 }
792
793 fflush(pfp);
794 return 0;
795
796 err:
797 clearerr(pfp);
798 return 1;
799 }
800
801 /*
802 * Returns the number of bytes read (successfully)
803 */
__net_recv_data(int fd,void * buf,unsigned int len)804 static int __net_recv_data(int fd, void *buf, unsigned int len)
805 {
806 unsigned int bytes_left = len;
807
808 while (bytes_left && !done) {
809 int ret = recv(fd, buf, bytes_left, MSG_WAITALL);
810
811 if (ret == 0)
812 break;
813 else if (ret < 0) {
814 if (errno == EAGAIN) {
815 usleep(50);
816 continue;
817 }
818 perror("server: net_recv_data: recv failed");
819 break;
820 } else {
821 buf += ret;
822 bytes_left -= ret;
823 }
824 }
825
826 return len - bytes_left;
827 }
828
net_recv_data(int fd,void * buf,unsigned int len)829 static int net_recv_data(int fd, void *buf, unsigned int len)
830 {
831 return __net_recv_data(fd, buf, len);
832 }
833
834 /*
835 * Returns number of bytes written
836 */
net_send_data(int fd,void * buf,unsigned int buf_len)837 static int net_send_data(int fd, void *buf, unsigned int buf_len)
838 {
839 int ret;
840 unsigned int bytes_left = buf_len;
841
842 while (bytes_left) {
843 ret = send(fd, buf, bytes_left, 0);
844 if (ret < 0) {
845 perror("send");
846 break;
847 }
848
849 buf += ret;
850 bytes_left -= ret;
851 }
852
853 return buf_len - bytes_left;
854 }
855
net_send_header(int fd,int cpu,char * buts_name,int len)856 static int net_send_header(int fd, int cpu, char *buts_name, int len)
857 {
858 struct blktrace_net_hdr hdr;
859
860 memset(&hdr, 0, sizeof(hdr));
861
862 hdr.magic = BLK_IO_TRACE_MAGIC;
863 strncpy(hdr.buts_name, buts_name, sizeof(hdr.buts_name));
864 hdr.buts_name[sizeof(hdr.buts_name)-1] = '\0';
865 hdr.cpu = cpu;
866 hdr.max_cpus = ncpus;
867 hdr.len = len;
868 hdr.cl_id = getpid();
869 hdr.buf_size = buf_size;
870 hdr.buf_nr = buf_nr;
871 hdr.page_size = pagesize;
872
873 return net_send_data(fd, &hdr, sizeof(hdr)) != sizeof(hdr);
874 }
875
net_send_open_close(int fd,int cpu,char * buts_name,int len)876 static void net_send_open_close(int fd, int cpu, char *buts_name, int len)
877 {
878 struct blktrace_net_hdr ret_hdr;
879
880 net_send_header(fd, cpu, buts_name, len);
881 net_recv_data(fd, &ret_hdr, sizeof(ret_hdr));
882 }
883
net_send_open(int fd,int cpu,char * buts_name)884 static void net_send_open(int fd, int cpu, char *buts_name)
885 {
886 net_send_open_close(fd, cpu, buts_name, 0);
887 }
888
net_send_close(int fd,char * buts_name,int drops)889 static void net_send_close(int fd, char *buts_name, int drops)
890 {
891 /*
892 * Overload CPU w/ number of drops
893 *
894 * XXX: Need to clear/set done around call - done=1 (which
895 * is true here) stops reads from happening... :-(
896 */
897 done = 0;
898 net_send_open_close(fd, drops, buts_name, 1);
899 done = 1;
900 }
901
ack_open_close(int fd,char * buts_name)902 static void ack_open_close(int fd, char *buts_name)
903 {
904 net_send_header(fd, 0, buts_name, 2);
905 }
906
net_send_drops(int fd)907 static void net_send_drops(int fd)
908 {
909 struct list_head *p;
910
911 __list_for_each(p, &devpaths) {
912 struct devpath *dpp = list_entry(p, struct devpath, head);
913
914 net_send_close(fd, dpp->buts_name, dpp->drops);
915 }
916 }
917
918 /*
919 * Returns:
920 * 0: "EOF"
921 * 1: OK
922 * -1: Error
923 */
net_get_header(struct cl_conn * nc,struct blktrace_net_hdr * bnh)924 static int net_get_header(struct cl_conn *nc, struct blktrace_net_hdr *bnh)
925 {
926 int bytes_read;
927 int fl = fcntl(nc->fd, F_GETFL);
928
929 fcntl(nc->fd, F_SETFL, fl | O_NONBLOCK);
930 bytes_read = __net_recv_data(nc->fd, bnh, sizeof(*bnh));
931 fcntl(nc->fd, F_SETFL, fl & ~O_NONBLOCK);
932
933 if (bytes_read == sizeof(*bnh))
934 return 1;
935 else if (bytes_read == 0)
936 return 0;
937 else
938 return -1;
939 }
940
net_setup_addr(void)941 static int net_setup_addr(void)
942 {
943 struct sockaddr_in *addr = &hostname_addr;
944
945 memset(addr, 0, sizeof(*addr));
946 addr->sin_family = AF_INET;
947 addr->sin_port = htons(net_port);
948
949 if (inet_aton(hostname, &addr->sin_addr) != 1) {
950 struct hostent *hent;
951 retry:
952 hent = gethostbyname(hostname);
953 if (!hent) {
954 if (h_errno == TRY_AGAIN) {
955 usleep(100);
956 goto retry;
957 } else if (h_errno == NO_RECOVERY) {
958 fprintf(stderr, "gethostbyname(%s)"
959 "non-recoverable error encountered\n",
960 hostname);
961 } else {
962 /*
963 * HOST_NOT_FOUND, NO_ADDRESS or NO_DATA
964 */
965 fprintf(stderr, "Host %s not found\n",
966 hostname);
967 }
968 return 1;
969 }
970
971 memcpy(&addr->sin_addr, hent->h_addr, 4);
972 strcpy(hostname, hent->h_name);
973 }
974
975 return 0;
976 }
977
net_setup_client(void)978 static int net_setup_client(void)
979 {
980 int fd;
981 struct sockaddr_in *addr = &hostname_addr;
982
983 fd = my_socket(AF_INET, SOCK_STREAM, 0);
984 if (fd < 0) {
985 perror("client: socket");
986 return -1;
987 }
988
989 if (connect(fd, (struct sockaddr *)addr, sizeof(*addr)) < 0) {
990 if (errno == ECONNREFUSED)
991 fprintf(stderr,
992 "\nclient: Connection to %s refused, "
993 "perhaps the server is not started?\n\n",
994 hostname);
995 else
996 perror("client: connect");
997
998 close(fd);
999 return -1;
1000 }
1001
1002 return fd;
1003 }
1004
open_client_connections(void)1005 static int open_client_connections(void)
1006 {
1007 int cpu;
1008
1009 cl_fds = calloc(ncpus, sizeof(*cl_fds));
1010 for (cpu = 0; cpu < ncpus; cpu++) {
1011 cl_fds[cpu] = net_setup_client();
1012 if (cl_fds[cpu] < 0)
1013 goto err;
1014 }
1015 return 0;
1016
1017 err:
1018 while (cpu > 0)
1019 close(cl_fds[cpu--]);
1020 free(cl_fds);
1021 return 1;
1022 }
1023
close_client_connections(void)1024 static void close_client_connections(void)
1025 {
1026 if (cl_fds) {
1027 int cpu, *fdp;
1028
1029 for (cpu = 0, fdp = cl_fds; cpu < ncpus; cpu++, fdp++) {
1030 if (*fdp >= 0) {
1031 net_send_drops(*fdp);
1032 net_close_connection(fdp);
1033 }
1034 }
1035 free(cl_fds);
1036 }
1037 }
1038
setup_buts(void)1039 static void setup_buts(void)
1040 {
1041 struct list_head *p;
1042
1043 __list_for_each(p, &devpaths) {
1044 struct blk_user_trace_setup buts;
1045 struct devpath *dpp = list_entry(p, struct devpath, head);
1046
1047 memset(&buts, 0, sizeof(buts));
1048 buts.buf_size = buf_size;
1049 buts.buf_nr = buf_nr;
1050 buts.act_mask = act_mask;
1051 if (ioctl(dpp->fd, BLKTRACESETUP, &buts) >= 0) {
1052 dpp->ncpus = ncpus;
1053 dpp->buts_name = strdup(buts.name);
1054 if (dpp->stats)
1055 free(dpp->stats);
1056 dpp->stats = calloc(dpp->ncpus, sizeof(*dpp->stats));
1057 memset(dpp->stats, 0, dpp->ncpus * sizeof(*dpp->stats));
1058 } else
1059 fprintf(stderr, "BLKTRACESETUP(2) %s failed: %d/%s\n",
1060 dpp->path, errno, strerror(errno));
1061 }
1062 }
1063
start_buts(void)1064 static void start_buts(void)
1065 {
1066 struct list_head *p;
1067
1068 __list_for_each(p, &devpaths) {
1069 struct devpath *dpp = list_entry(p, struct devpath, head);
1070
1071 if (ioctl(dpp->fd, BLKTRACESTART) < 0) {
1072 fprintf(stderr, "BLKTRACESTART %s failed: %d/%s\n",
1073 dpp->path, errno, strerror(errno));
1074 }
1075 }
1076 }
1077
get_drops(struct devpath * dpp)1078 static int get_drops(struct devpath *dpp)
1079 {
1080 int fd, drops = 0;
1081 char fn[MAXPATHLEN + 64], tmp[256];
1082
1083 snprintf(fn, sizeof(fn), "%s/block/%s/dropped", debugfs_path,
1084 dpp->buts_name);
1085
1086 fd = my_open(fn, O_RDONLY);
1087 if (fd < 0) {
1088 /*
1089 * This may be ok: the kernel may not support
1090 * dropped counts.
1091 */
1092 if (errno != ENOENT)
1093 fprintf(stderr, "Could not open %s: %d/%s\n",
1094 fn, errno, strerror(errno));
1095 return 0;
1096 } else if (read(fd, tmp, sizeof(tmp)) < 0) {
1097 fprintf(stderr, "Could not read %s: %d/%s\n",
1098 fn, errno, strerror(errno));
1099 } else
1100 drops = atoi(tmp);
1101 close(fd);
1102
1103 return drops;
1104 }
1105
get_all_drops(void)1106 static void get_all_drops(void)
1107 {
1108 struct list_head *p;
1109
1110 __list_for_each(p, &devpaths) {
1111 struct devpath *dpp = list_entry(p, struct devpath, head);
1112
1113 dpp->drops = get_drops(dpp);
1114 }
1115 }
1116
alloc_trace_buf(int cpu,int bufsize)1117 static inline struct trace_buf *alloc_trace_buf(int cpu, int bufsize)
1118 {
1119 struct trace_buf *tbp;
1120
1121 tbp = malloc(sizeof(*tbp) + bufsize);
1122 INIT_LIST_HEAD(&tbp->head);
1123 tbp->len = 0;
1124 tbp->buf = (void *)(tbp + 1);
1125 tbp->cpu = cpu;
1126 tbp->dpp = NULL; /* Will be set when tbp is added */
1127
1128 return tbp;
1129 }
1130
free_tracer_heads(struct devpath * dpp)1131 static void free_tracer_heads(struct devpath *dpp)
1132 {
1133 int cpu;
1134 struct tracer_devpath_head *hd;
1135
1136 for (cpu = 0, hd = dpp->heads; cpu < ncpus; cpu++, hd++) {
1137 if (hd->prev)
1138 free(hd->prev);
1139
1140 pthread_mutex_destroy(&hd->mutex);
1141 }
1142 free(dpp->heads);
1143 }
1144
setup_tracer_devpaths(void)1145 static int setup_tracer_devpaths(void)
1146 {
1147 struct list_head *p;
1148
1149 if (net_client_use_send())
1150 if (open_client_connections())
1151 return 1;
1152
1153 __list_for_each(p, &devpaths) {
1154 int cpu;
1155 struct tracer_devpath_head *hd;
1156 struct devpath *dpp = list_entry(p, struct devpath, head);
1157
1158 dpp->heads = calloc(ncpus, sizeof(struct tracer_devpath_head));
1159 for (cpu = 0, hd = dpp->heads; cpu < ncpus; cpu++, hd++) {
1160 INIT_LIST_HEAD(&hd->head);
1161 pthread_mutex_init(&hd->mutex, NULL);
1162 hd->prev = NULL;
1163 }
1164 }
1165
1166 return 0;
1167 }
1168
add_trace_buf(struct devpath * dpp,int cpu,struct trace_buf ** tbpp)1169 static inline void add_trace_buf(struct devpath *dpp, int cpu,
1170 struct trace_buf **tbpp)
1171 {
1172 struct trace_buf *tbp = *tbpp;
1173 struct tracer_devpath_head *hd = &dpp->heads[cpu];
1174
1175 tbp->dpp = dpp;
1176
1177 pthread_mutex_lock(&hd->mutex);
1178 list_add_tail(&tbp->head, &hd->head);
1179 pthread_mutex_unlock(&hd->mutex);
1180
1181 *tbpp = alloc_trace_buf(cpu, buf_size);
1182 }
1183
incr_entries(int entries_handled)1184 static inline void incr_entries(int entries_handled)
1185 {
1186 pthread_mutex_lock(&dp_mutex);
1187 if (dp_entries == 0)
1188 pthread_cond_signal(&dp_cond);
1189 dp_entries += entries_handled;
1190 pthread_mutex_unlock(&dp_mutex);
1191 }
1192
decr_entries(int handled)1193 static void decr_entries(int handled)
1194 {
1195 pthread_mutex_lock(&dp_mutex);
1196 dp_entries -= handled;
1197 pthread_mutex_unlock(&dp_mutex);
1198 }
1199
wait_empty_entries(void)1200 static int wait_empty_entries(void)
1201 {
1202 pthread_mutex_lock(&dp_mutex);
1203 while (!done && dp_entries == 0)
1204 t_pthread_cond_wait(&dp_cond, &dp_mutex);
1205 pthread_mutex_unlock(&dp_mutex);
1206
1207 return !done;
1208 }
1209
add_devpath(char * path)1210 static int add_devpath(char *path)
1211 {
1212 int fd;
1213 struct devpath *dpp;
1214
1215 /*
1216 * Verify device is valid before going too far
1217 */
1218 fd = my_open(path, O_RDONLY | O_NONBLOCK);
1219 if (fd < 0) {
1220 fprintf(stderr, "Invalid path %s specified: %d/%s\n",
1221 path, errno, strerror(errno));
1222 return 1;
1223 }
1224
1225 dpp = malloc(sizeof(*dpp));
1226 memset(dpp, 0, sizeof(*dpp));
1227 dpp->path = strdup(path);
1228 dpp->fd = fd;
1229 dpp->idx = ndevs++;
1230 list_add_tail(&dpp->head, &devpaths);
1231
1232 return 0;
1233 }
1234
rel_devpaths(void)1235 static void rel_devpaths(void)
1236 {
1237 struct list_head *p, *q;
1238
1239 list_for_each_safe(p, q, &devpaths) {
1240 struct devpath *dpp = list_entry(p, struct devpath, head);
1241
1242 list_del(&dpp->head);
1243 __stop_trace(dpp->fd);
1244 close(dpp->fd);
1245
1246 if (dpp->heads)
1247 free_tracer_heads(dpp);
1248
1249 dpp_free(dpp);
1250 ndevs--;
1251 }
1252 }
1253
flush_subbuf_net(struct trace_buf * tbp)1254 static int flush_subbuf_net(struct trace_buf *tbp)
1255 {
1256 int fd = cl_fds[tbp->cpu];
1257 struct devpath *dpp = tbp->dpp;
1258
1259 if (net_send_header(fd, tbp->cpu, dpp->buts_name, tbp->len))
1260 return 1;
1261 else if (net_send_data(fd, tbp->buf, tbp->len) != tbp->len)
1262 return 1;
1263
1264 return 0;
1265 }
1266
1267 static int
handle_list_net(struct tracer_devpath_head * hd,struct list_head * list)1268 handle_list_net(__attribute__((__unused__))struct tracer_devpath_head *hd,
1269 struct list_head *list)
1270 {
1271 struct trace_buf *tbp;
1272 struct list_head *p, *q;
1273 int entries_handled = 0;
1274
1275 list_for_each_safe(p, q, list) {
1276 tbp = list_entry(p, struct trace_buf, head);
1277
1278 list_del(&tbp->head);
1279 entries_handled++;
1280
1281 if (cl_fds[tbp->cpu] >= 0) {
1282 if (flush_subbuf_net(tbp)) {
1283 close(cl_fds[tbp->cpu]);
1284 cl_fds[tbp->cpu] = -1;
1285 }
1286 }
1287
1288 free(tbp);
1289 }
1290
1291 return entries_handled;
1292 }
1293
1294 /*
1295 * Tack 'tbp's buf onto the tail of 'prev's buf
1296 */
tb_combine(struct trace_buf * prev,struct trace_buf * tbp)1297 static struct trace_buf *tb_combine(struct trace_buf *prev,
1298 struct trace_buf *tbp)
1299 {
1300 unsigned long tot_len;
1301
1302 tot_len = prev->len + tbp->len;
1303 if (tot_len > buf_size) {
1304 /*
1305 * tbp->head isn't connected (it was 'prev'
1306 * so it had been taken off of the list
1307 * before). Therefore, we can realloc
1308 * the whole structures, as the other fields
1309 * are "static".
1310 */
1311 prev = realloc(prev->buf, sizeof(*prev) + tot_len);
1312 prev->buf = (void *)(prev + 1);
1313 }
1314
1315 memcpy(prev->buf + prev->len, tbp->buf, tbp->len);
1316 prev->len = tot_len;
1317
1318 free(tbp);
1319 return prev;
1320 }
1321
handle_list_file(struct tracer_devpath_head * hd,struct list_head * list)1322 static int handle_list_file(struct tracer_devpath_head *hd,
1323 struct list_head *list)
1324 {
1325 int off, t_len, nevents;
1326 struct blk_io_trace *t;
1327 struct list_head *p, *q;
1328 int entries_handled = 0;
1329 struct trace_buf *tbp, *prev;
1330
1331 prev = hd->prev;
1332 list_for_each_safe(p, q, list) {
1333 tbp = list_entry(p, struct trace_buf, head);
1334 list_del(&tbp->head);
1335 entries_handled++;
1336
1337 /*
1338 * If there was some leftover before, tack this new
1339 * entry onto the tail of the previous one.
1340 */
1341 if (prev)
1342 tbp = tb_combine(prev, tbp);
1343
1344 /*
1345 * See how many whole traces there are - send them
1346 * all out in one go.
1347 */
1348 off = 0;
1349 nevents = 0;
1350 while (off + (int)sizeof(*t) <= tbp->len) {
1351 t = (struct blk_io_trace *)(tbp->buf + off);
1352 t_len = sizeof(*t) + t->pdu_len;
1353 if (off + t_len > tbp->len)
1354 break;
1355
1356 off += t_len;
1357 nevents++;
1358 }
1359 if (nevents)
1360 pdc_nev_update(tbp->dpp, tbp->cpu, nevents);
1361
1362 /*
1363 * Write any full set of traces, any remaining data is kept
1364 * for the next pass.
1365 */
1366 if (off) {
1367 if (write_data(tbp->buf, off) || off == tbp->len) {
1368 free(tbp);
1369 prev = NULL;
1370 }
1371 else {
1372 /*
1373 * Move valid data to beginning of buffer
1374 */
1375 tbp->len -= off;
1376 memmove(tbp->buf, tbp->buf + off, tbp->len);
1377 prev = tbp;
1378 }
1379 } else
1380 prev = tbp;
1381 }
1382 hd->prev = prev;
1383
1384 return entries_handled;
1385 }
1386
__process_trace_bufs(void)1387 static void __process_trace_bufs(void)
1388 {
1389 int cpu;
1390 struct list_head *p;
1391 struct list_head list;
1392 int handled = 0;
1393
1394 __list_for_each(p, &devpaths) {
1395 struct devpath *dpp = list_entry(p, struct devpath, head);
1396 struct tracer_devpath_head *hd = dpp->heads;
1397
1398 for (cpu = 0; cpu < ncpus; cpu++, hd++) {
1399 pthread_mutex_lock(&hd->mutex);
1400 if (list_empty(&hd->head)) {
1401 pthread_mutex_unlock(&hd->mutex);
1402 continue;
1403 }
1404
1405 list_replace_init(&hd->head, &list);
1406 pthread_mutex_unlock(&hd->mutex);
1407
1408 handled += handle_list(hd, &list);
1409 }
1410 }
1411
1412 if (handled)
1413 decr_entries(handled);
1414 }
1415
process_trace_bufs(void)1416 static void process_trace_bufs(void)
1417 {
1418 while (wait_empty_entries())
1419 __process_trace_bufs();
1420 }
1421
clean_trace_bufs(void)1422 static void clean_trace_bufs(void)
1423 {
1424 /*
1425 * No mutex needed here: we're only reading from the lists,
1426 * tracers are done
1427 */
1428 while (dp_entries)
1429 __process_trace_bufs();
1430 }
1431
read_err(int cpu,char * ifn)1432 static inline void read_err(int cpu, char *ifn)
1433 {
1434 if (errno != EAGAIN)
1435 fprintf(stderr, "Thread %d failed read of %s: %d/%s\n",
1436 cpu, ifn, errno, strerror(errno));
1437 }
1438
net_sendfile(struct io_info * iop)1439 static int net_sendfile(struct io_info *iop)
1440 {
1441 int ret;
1442
1443 ret = sendfile(iop->ofd, iop->ifd, NULL, iop->ready);
1444 if (ret < 0) {
1445 perror("sendfile");
1446 return 1;
1447 } else if (ret < (int)iop->ready) {
1448 fprintf(stderr, "short sendfile send (%d of %d)\n",
1449 ret, iop->ready);
1450 return 1;
1451 }
1452
1453 return 0;
1454 }
1455
net_sendfile_data(struct tracer * tp,struct io_info * iop)1456 static inline int net_sendfile_data(struct tracer *tp, struct io_info *iop)
1457 {
1458 struct devpath *dpp = iop->dpp;
1459
1460 if (net_send_header(iop->ofd, tp->cpu, dpp->buts_name, iop->ready))
1461 return 1;
1462 return net_sendfile(iop);
1463 }
1464
fill_ofname(struct io_info * iop,int cpu)1465 static int fill_ofname(struct io_info *iop, int cpu)
1466 {
1467 int len;
1468 struct stat sb;
1469 char *dst = iop->ofn;
1470
1471 if (output_dir)
1472 len = snprintf(iop->ofn, sizeof(iop->ofn), "%s/", output_dir);
1473 else
1474 len = snprintf(iop->ofn, sizeof(iop->ofn), "./");
1475
1476 if (net_mode == Net_server) {
1477 struct cl_conn *nc = iop->nc;
1478
1479 len += sprintf(dst + len, "%s-", nc->ch->hostname);
1480 len += strftime(dst + len, 64, "%F-%T/",
1481 gmtime(&iop->dpp->cl_connect_time));
1482 }
1483
1484 if (stat(iop->ofn, &sb) < 0) {
1485 if (errno != ENOENT) {
1486 fprintf(stderr,
1487 "Destination dir %s stat failed: %d/%s\n",
1488 iop->ofn, errno, strerror(errno));
1489 return 1;
1490 }
1491 /*
1492 * There is no synchronization between multiple threads
1493 * trying to create the directory at once. It's harmless
1494 * to let them try, so just detect the problem and move on.
1495 */
1496 if (mkdir(iop->ofn, 0755) < 0 && errno != EEXIST) {
1497 fprintf(stderr,
1498 "Destination dir %s can't be made: %d/%s\n",
1499 iop->ofn, errno, strerror(errno));
1500 return 1;
1501 }
1502 }
1503
1504 if (output_name)
1505 snprintf(iop->ofn + len, sizeof(iop->ofn), "%s.blktrace.%d",
1506 output_name, cpu);
1507 else
1508 snprintf(iop->ofn + len, sizeof(iop->ofn), "%s.blktrace.%d",
1509 iop->dpp->buts_name, cpu);
1510
1511 return 0;
1512 }
1513
set_vbuf(struct io_info * iop,int mode,size_t size)1514 static int set_vbuf(struct io_info *iop, int mode, size_t size)
1515 {
1516 iop->obuf = malloc(size);
1517 if (setvbuf(iop->ofp, iop->obuf, mode, size) < 0) {
1518 fprintf(stderr, "setvbuf(%s, %d) failed: %d/%s\n",
1519 iop->dpp->path, (int)size, errno,
1520 strerror(errno));
1521 free(iop->obuf);
1522 return 1;
1523 }
1524
1525 return 0;
1526 }
1527
iop_open(struct io_info * iop,int cpu)1528 static int iop_open(struct io_info *iop, int cpu)
1529 {
1530 iop->ofd = -1;
1531 if (fill_ofname(iop, cpu))
1532 return 1;
1533
1534 iop->ofp = my_fopen(iop->ofn, "w+");
1535 if (iop->ofp == NULL) {
1536 fprintf(stderr, "Open output file %s failed: %d/%s\n",
1537 iop->ofn, errno, strerror(errno));
1538 return 1;
1539 }
1540
1541 if (set_vbuf(iop, _IOLBF, FILE_VBUF_SIZE)) {
1542 fprintf(stderr, "set_vbuf for file %s failed: %d/%s\n",
1543 iop->ofn, errno, strerror(errno));
1544 fclose(iop->ofp);
1545 return 1;
1546 }
1547
1548 iop->ofd = fileno(iop->ofp);
1549 return 0;
1550 }
1551
close_iop(struct io_info * iop)1552 static void close_iop(struct io_info *iop)
1553 {
1554 struct mmap_info *mip = &iop->mmap_info;
1555
1556 if (mip->fs_buf)
1557 munmap(mip->fs_buf, mip->fs_buf_len);
1558
1559 if (!piped_output) {
1560 if (ftruncate(fileno(iop->ofp), mip->fs_size) < 0) {
1561 fprintf(stderr,
1562 "Ignoring err: ftruncate(%s): %d/%s\n",
1563 iop->ofn, errno, strerror(errno));
1564 }
1565 }
1566
1567 if (iop->ofp)
1568 fclose(iop->ofp);
1569 if (iop->obuf)
1570 free(iop->obuf);
1571 }
1572
close_ios(struct tracer * tp)1573 static void close_ios(struct tracer *tp)
1574 {
1575 while (tp->nios > 0) {
1576 struct io_info *iop = &tp->ios[--tp->nios];
1577
1578 iop->dpp->drops = get_drops(iop->dpp);
1579 if (iop->ifd >= 0)
1580 close(iop->ifd);
1581
1582 if (iop->ofp)
1583 close_iop(iop);
1584 else if (iop->ofd >= 0) {
1585 struct devpath *dpp = iop->dpp;
1586
1587 net_send_close(iop->ofd, dpp->buts_name, dpp->drops);
1588 net_close_connection(&iop->ofd);
1589 }
1590 }
1591
1592 free(tp->ios);
1593 free(tp->pfds);
1594 }
1595
open_ios(struct tracer * tp)1596 static int open_ios(struct tracer *tp)
1597 {
1598 struct pollfd *pfd;
1599 struct io_info *iop;
1600 struct list_head *p;
1601
1602 tp->ios = calloc(ndevs, sizeof(struct io_info));
1603 memset(tp->ios, 0, ndevs * sizeof(struct io_info));
1604
1605 tp->pfds = calloc(ndevs, sizeof(struct pollfd));
1606 memset(tp->pfds, 0, ndevs * sizeof(struct pollfd));
1607
1608 tp->nios = 0;
1609 iop = tp->ios;
1610 pfd = tp->pfds;
1611 __list_for_each(p, &devpaths) {
1612 struct devpath *dpp = list_entry(p, struct devpath, head);
1613
1614 iop->dpp = dpp;
1615 iop->ofd = -1;
1616 snprintf(iop->ifn, sizeof(iop->ifn), "%s/block/%s/trace%d",
1617 debugfs_path, dpp->buts_name, tp->cpu);
1618
1619 iop->ifd = my_open(iop->ifn, O_RDONLY | O_NONBLOCK);
1620 if (iop->ifd < 0) {
1621 fprintf(stderr, "Thread %d failed open %s: %d/%s\n",
1622 tp->cpu, iop->ifn, errno, strerror(errno));
1623 return 1;
1624 }
1625
1626 init_mmap_info(&iop->mmap_info);
1627
1628 pfd->fd = iop->ifd;
1629 pfd->events = POLLIN;
1630
1631 if (piped_output)
1632 ;
1633 else if (net_client_use_sendfile()) {
1634 iop->ofd = net_setup_client();
1635 if (iop->ofd < 0)
1636 goto err;
1637 net_send_open(iop->ofd, tp->cpu, dpp->buts_name);
1638 } else if (net_mode == Net_none) {
1639 if (iop_open(iop, tp->cpu))
1640 goto err;
1641 } else {
1642 /*
1643 * This ensures that the server knows about all
1644 * connections & devices before _any_ closes
1645 */
1646 net_send_open(cl_fds[tp->cpu], tp->cpu, dpp->buts_name);
1647 }
1648
1649 pfd++;
1650 iop++;
1651 tp->nios++;
1652 }
1653
1654 return 0;
1655
1656 err:
1657 close(iop->ifd); /* tp->nios _not_ bumped */
1658 close_ios(tp);
1659 return 1;
1660 }
1661
handle_pfds_file(struct tracer * tp,int nevs,int force_read)1662 static int handle_pfds_file(struct tracer *tp, int nevs, int force_read)
1663 {
1664 struct mmap_info *mip;
1665 int i, ret, nentries = 0;
1666 struct pollfd *pfd = tp->pfds;
1667 struct io_info *iop = tp->ios;
1668
1669 for (i = 0; nevs > 0 && i < ndevs; i++, pfd++, iop++) {
1670 if (pfd->revents & POLLIN || force_read) {
1671 mip = &iop->mmap_info;
1672
1673 ret = setup_mmap(iop->ofd, buf_size, mip);
1674 if (ret < 0) {
1675 pfd->events = 0;
1676 break;
1677 }
1678
1679 ret = read(iop->ifd, mip->fs_buf + mip->fs_off,
1680 buf_size);
1681 if (ret > 0) {
1682 pdc_dr_update(iop->dpp, tp->cpu, ret);
1683 mip->fs_size += ret;
1684 mip->fs_off += ret;
1685 nentries++;
1686 } else if (ret == 0) {
1687 /*
1688 * Short reads after we're done stop us
1689 * from trying reads.
1690 */
1691 if (tp->is_done)
1692 clear_events(pfd);
1693 } else {
1694 read_err(tp->cpu, iop->ifn);
1695 if (errno != EAGAIN || tp->is_done)
1696 clear_events(pfd);
1697 }
1698 nevs--;
1699 }
1700 }
1701
1702 return nentries;
1703 }
1704
handle_pfds_netclient(struct tracer * tp,int nevs,int force_read)1705 static int handle_pfds_netclient(struct tracer *tp, int nevs, int force_read)
1706 {
1707 struct stat sb;
1708 int i, nentries = 0;
1709 struct pdc_stats *sp;
1710 struct pollfd *pfd = tp->pfds;
1711 struct io_info *iop = tp->ios;
1712
1713 for (i = 0; i < ndevs; i++, pfd++, iop++, sp++) {
1714 if (pfd->revents & POLLIN || force_read) {
1715 if (fstat(iop->ifd, &sb) < 0) {
1716 perror(iop->ifn);
1717 pfd->events = 0;
1718 } else if (sb.st_size > (off_t)iop->data_queued) {
1719 iop->ready = sb.st_size - iop->data_queued;
1720 iop->data_queued = sb.st_size;
1721
1722 if (!net_sendfile_data(tp, iop)) {
1723 pdc_dr_update(iop->dpp, tp->cpu,
1724 iop->ready);
1725 nentries++;
1726 } else
1727 clear_events(pfd);
1728 }
1729 if (--nevs == 0)
1730 break;
1731 }
1732 }
1733
1734 if (nentries)
1735 incr_entries(nentries);
1736
1737 return nentries;
1738 }
1739
handle_pfds_entries(struct tracer * tp,int nevs,int force_read)1740 static int handle_pfds_entries(struct tracer *tp, int nevs, int force_read)
1741 {
1742 int i, nentries = 0;
1743 struct trace_buf *tbp;
1744 struct pollfd *pfd = tp->pfds;
1745 struct io_info *iop = tp->ios;
1746
1747 tbp = alloc_trace_buf(tp->cpu, buf_size);
1748 for (i = 0; i < ndevs; i++, pfd++, iop++) {
1749 if (pfd->revents & POLLIN || force_read) {
1750 tbp->len = read(iop->ifd, tbp->buf, buf_size);
1751 if (tbp->len > 0) {
1752 pdc_dr_update(iop->dpp, tp->cpu, tbp->len);
1753 add_trace_buf(iop->dpp, tp->cpu, &tbp);
1754 nentries++;
1755 } else if (tbp->len == 0) {
1756 /*
1757 * Short reads after we're done stop us
1758 * from trying reads.
1759 */
1760 if (tp->is_done)
1761 clear_events(pfd);
1762 } else {
1763 read_err(tp->cpu, iop->ifn);
1764 if (errno != EAGAIN || tp->is_done)
1765 clear_events(pfd);
1766 }
1767 if (!piped_output && --nevs == 0)
1768 break;
1769 }
1770 }
1771 free(tbp);
1772
1773 if (nentries)
1774 incr_entries(nentries);
1775
1776 return nentries;
1777 }
1778
thread_main(void * arg)1779 static void *thread_main(void *arg)
1780 {
1781 int ret, ndone, to_val;
1782 struct tracer *tp = arg;
1783
1784 ret = lock_on_cpu(tp->cpu);
1785 if (ret)
1786 goto err;
1787
1788 ret = open_ios(tp);
1789 if (ret)
1790 goto err;
1791
1792 if (piped_output)
1793 to_val = 50; /* Frequent partial handles */
1794 else
1795 to_val = 500; /* 1/2 second intervals */
1796
1797
1798 tracer_signal_ready(tp, Th_running, 0);
1799 tracer_wait_unblock(tp);
1800
1801 while (!tp->is_done) {
1802 ndone = poll(tp->pfds, ndevs, to_val);
1803 if (ndone || piped_output)
1804 (void)handle_pfds(tp, ndone, piped_output);
1805 else if (ndone < 0 && errno != EINTR)
1806 fprintf(stderr, "Thread %d poll failed: %d/%s\n",
1807 tp->cpu, errno, strerror(errno));
1808 }
1809
1810 /*
1811 * Trace is stopped, pull data until we get a short read
1812 */
1813 while (handle_pfds(tp, ndevs, 1) > 0)
1814 ;
1815
1816 close_ios(tp);
1817 tracer_signal_ready(tp, Th_leaving, 0);
1818 return NULL;
1819
1820 err:
1821 tracer_signal_ready(tp, Th_error, ret);
1822 return NULL;
1823 }
1824
start_tracer(int cpu)1825 static int start_tracer(int cpu)
1826 {
1827 struct tracer *tp;
1828
1829 tp = malloc(sizeof(*tp));
1830 memset(tp, 0, sizeof(*tp));
1831
1832 INIT_LIST_HEAD(&tp->head);
1833 tp->status = 0;
1834 tp->cpu = cpu;
1835
1836 if (pthread_create(&tp->thread, NULL, thread_main, tp)) {
1837 fprintf(stderr, "FAILED to start thread on CPU %d: %d/%s\n",
1838 cpu, errno, strerror(errno));
1839 free(tp);
1840 return 1;
1841 }
1842
1843 list_add_tail(&tp->head, &tracers);
1844 return 0;
1845 }
1846
start_tracers(void)1847 static void start_tracers(void)
1848 {
1849 int cpu;
1850 struct list_head *p;
1851
1852 for (cpu = 0; cpu < ncpus; cpu++)
1853 if (start_tracer(cpu))
1854 break;
1855
1856 wait_tracers_ready(cpu);
1857
1858 __list_for_each(p, &tracers) {
1859 struct tracer *tp = list_entry(p, struct tracer, head);
1860 if (tp->status)
1861 fprintf(stderr,
1862 "FAILED to start thread on CPU %d: %d/%s\n",
1863 tp->cpu, tp->status, strerror(tp->status));
1864 }
1865 }
1866
stop_tracers(void)1867 static void stop_tracers(void)
1868 {
1869 struct list_head *p;
1870
1871 /*
1872 * Stop the tracing - makes the tracer threads clean up quicker.
1873 */
1874 __list_for_each(p, &devpaths) {
1875 struct devpath *dpp = list_entry(p, struct devpath, head);
1876 (void)ioctl(dpp->fd, BLKTRACESTOP);
1877 }
1878
1879 /*
1880 * Tell each tracer to quit
1881 */
1882 __list_for_each(p, &tracers) {
1883 struct tracer *tp = list_entry(p, struct tracer, head);
1884 tp->is_done = 1;
1885 }
1886 }
1887
del_tracers(void)1888 static void del_tracers(void)
1889 {
1890 struct list_head *p, *q;
1891
1892 list_for_each_safe(p, q, &tracers) {
1893 struct tracer *tp = list_entry(p, struct tracer, head);
1894
1895 list_del(&tp->head);
1896 free(tp);
1897 }
1898 }
1899
wait_tracers(void)1900 static void wait_tracers(void)
1901 {
1902 struct list_head *p;
1903
1904 if (use_tracer_devpaths())
1905 process_trace_bufs();
1906
1907 wait_tracers_leaving();
1908
1909 __list_for_each(p, &tracers) {
1910 int ret;
1911 struct tracer *tp = list_entry(p, struct tracer, head);
1912
1913 ret = pthread_join(tp->thread, NULL);
1914 if (ret)
1915 fprintf(stderr, "Thread join %d failed %d\n",
1916 tp->cpu, ret);
1917 }
1918
1919 if (use_tracer_devpaths())
1920 clean_trace_bufs();
1921
1922 get_all_drops();
1923 }
1924
exit_tracing(void)1925 static void exit_tracing(void)
1926 {
1927 signal(SIGINT, SIG_IGN);
1928 signal(SIGHUP, SIG_IGN);
1929 signal(SIGTERM, SIG_IGN);
1930 signal(SIGALRM, SIG_IGN);
1931
1932 stop_tracers();
1933 wait_tracers();
1934 del_tracers();
1935 rel_devpaths();
1936 }
1937
handle_sigint(int sig)1938 static void handle_sigint(__attribute__((__unused__)) int sig)
1939 {
1940 done = 1;
1941 stop_tracers();
1942 }
1943
show_stats(struct list_head * devpaths)1944 static void show_stats(struct list_head *devpaths)
1945 {
1946 FILE *ofp;
1947 struct list_head *p;
1948 unsigned long long nevents, data_read;
1949 unsigned long long total_drops = 0;
1950 unsigned long long total_events = 0;
1951
1952 if (piped_output)
1953 ofp = my_fopen("/dev/null", "w");
1954 else
1955 ofp = stdout;
1956
1957 __list_for_each(p, devpaths) {
1958 int cpu;
1959 struct pdc_stats *sp;
1960 struct devpath *dpp = list_entry(p, struct devpath, head);
1961
1962 if (net_mode == Net_server)
1963 printf("server: end of run for %s:%s\n",
1964 dpp->ch->hostname, dpp->buts_name);
1965
1966 data_read = 0;
1967 nevents = 0;
1968
1969 fprintf(ofp, "=== %s ===\n", dpp->buts_name);
1970 for (cpu = 0, sp = dpp->stats; cpu < dpp->ncpus; cpu++, sp++) {
1971 /*
1972 * Estimate events if not known...
1973 */
1974 if (sp->nevents == 0) {
1975 sp->nevents = sp->data_read /
1976 sizeof(struct blk_io_trace);
1977 }
1978
1979 fprintf(ofp,
1980 " CPU%3d: %20llu events, %8llu KiB data\n",
1981 cpu, sp->nevents, (sp->data_read + 1023) >> 10);
1982
1983 data_read += sp->data_read;
1984 nevents += sp->nevents;
1985 }
1986
1987 fprintf(ofp, " Total: %20llu events (dropped %llu),"
1988 " %8llu KiB data\n", nevents,
1989 dpp->drops, (data_read + 1024) >> 10);
1990
1991 total_drops += dpp->drops;
1992 total_events += (nevents + dpp->drops);
1993 }
1994
1995 fflush(ofp);
1996 if (piped_output)
1997 fclose(ofp);
1998
1999 if (total_drops) {
2000 double drops_ratio = 1.0;
2001
2002 if (total_events)
2003 drops_ratio = (double)total_drops/(double)total_events;
2004
2005 fprintf(stderr, "\nYou have %llu (%5.1lf%%) dropped events\n"
2006 "Consider using a larger buffer size (-b) "
2007 "and/or more buffers (-n)\n",
2008 total_drops, 100.0 * drops_ratio);
2009 }
2010 }
2011
handle_args(int argc,char * argv[])2012 static int handle_args(int argc, char *argv[])
2013 {
2014 int c, i;
2015 struct statfs st;
2016 int act_mask_tmp = 0;
2017
2018 while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) {
2019 switch (c) {
2020 case 'a':
2021 i = find_mask_map(optarg);
2022 if (i < 0) {
2023 fprintf(stderr, "Invalid action mask %s\n",
2024 optarg);
2025 return 1;
2026 }
2027 act_mask_tmp |= i;
2028 break;
2029
2030 case 'A':
2031 if ((sscanf(optarg, "%x", &i) != 1) ||
2032 !valid_act_opt(i)) {
2033 fprintf(stderr,
2034 "Invalid set action mask %s/0x%x\n",
2035 optarg, i);
2036 return 1;
2037 }
2038 act_mask_tmp = i;
2039 break;
2040
2041 case 'd':
2042 if (add_devpath(optarg) != 0)
2043 return 1;
2044 break;
2045
2046 case 'I': {
2047 char dev_line[256];
2048 FILE *ifp = my_fopen(optarg, "r");
2049
2050 if (!ifp) {
2051 fprintf(stderr,
2052 "Invalid file for devices %s\n",
2053 optarg);
2054 return 1;
2055 }
2056
2057 while (fscanf(ifp, "%s\n", dev_line) == 1)
2058 if (add_devpath(dev_line) != 0)
2059 return 1;
2060 break;
2061 }
2062
2063 case 'r':
2064 debugfs_path = optarg;
2065 break;
2066
2067 case 'o':
2068 output_name = optarg;
2069 break;
2070 case 'k':
2071 kill_running_trace = 1;
2072 break;
2073 case 'w':
2074 stop_watch = atoi(optarg);
2075 if (stop_watch <= 0) {
2076 fprintf(stderr,
2077 "Invalid stopwatch value (%d secs)\n",
2078 stop_watch);
2079 return 1;
2080 }
2081 break;
2082 case 'V':
2083 case 'v':
2084 printf("%s version %s\n", argv[0], blktrace_version);
2085 exit(0);
2086 /*NOTREACHED*/
2087 case 'b':
2088 buf_size = strtoul(optarg, NULL, 10);
2089 if (buf_size <= 0 || buf_size > 16*1024) {
2090 fprintf(stderr, "Invalid buffer size (%lu)\n",
2091 buf_size);
2092 return 1;
2093 }
2094 buf_size <<= 10;
2095 break;
2096 case 'n':
2097 buf_nr = strtoul(optarg, NULL, 10);
2098 if (buf_nr <= 0) {
2099 fprintf(stderr,
2100 "Invalid buffer nr (%lu)\n", buf_nr);
2101 return 1;
2102 }
2103 break;
2104 case 'D':
2105 output_dir = optarg;
2106 break;
2107 case 'h':
2108 net_mode = Net_client;
2109 strcpy(hostname, optarg);
2110 break;
2111 case 'l':
2112 net_mode = Net_server;
2113 break;
2114 case 'p':
2115 net_port = atoi(optarg);
2116 break;
2117 case 's':
2118 net_use_sendfile = 0;
2119 break;
2120 default:
2121 show_usage(argv[0]);
2122 exit(1);
2123 /*NOTREACHED*/
2124 }
2125 }
2126
2127 while (optind < argc)
2128 if (add_devpath(argv[optind++]) != 0)
2129 return 1;
2130
2131 if (net_mode != Net_server && ndevs == 0) {
2132 show_usage(argv[0]);
2133 return 1;
2134 }
2135
2136 if (statfs(debugfs_path, &st) < 0 || st.f_type != (long)DEBUGFS_TYPE) {
2137 fprintf(stderr, "Invalid debug path %s: %d/%s\n",
2138 debugfs_path, errno, strerror(errno));
2139 return 1;
2140 }
2141
2142 if (act_mask_tmp != 0)
2143 act_mask = act_mask_tmp;
2144
2145 if (net_mode == Net_client && net_setup_addr())
2146 return 1;
2147
2148 /*
2149 * Set up for appropriate PFD handler based upon output name.
2150 */
2151 if (net_client_use_sendfile())
2152 handle_pfds = handle_pfds_netclient;
2153 else if (net_client_use_send())
2154 handle_pfds = handle_pfds_entries;
2155 else if (output_name && (strcmp(output_name, "-") == 0)) {
2156 piped_output = 1;
2157 handle_pfds = handle_pfds_entries;
2158 pfp = stdout;
2159 setvbuf(pfp, NULL, _IONBF, 0);
2160 } else
2161 handle_pfds = handle_pfds_file;
2162 return 0;
2163 }
2164
ch_add_connection(struct net_server_s * ns,struct cl_host * ch,int fd)2165 static void ch_add_connection(struct net_server_s *ns, struct cl_host *ch,
2166 int fd)
2167 {
2168 struct cl_conn *nc;
2169
2170 nc = malloc(sizeof(*nc));
2171 memset(nc, 0, sizeof(*nc));
2172
2173 time(&nc->connect_time);
2174 nc->ch = ch;
2175 nc->fd = fd;
2176 nc->ncpus = -1;
2177
2178 list_add_tail(&nc->ch_head, &ch->conn_list);
2179 ch->connects++;
2180
2181 list_add_tail(&nc->ns_head, &ns->conn_list);
2182 ns->connects++;
2183 ns->pfds = realloc(ns->pfds, (ns->connects+1) * sizeof(struct pollfd));
2184 }
2185
ch_rem_connection(struct net_server_s * ns,struct cl_host * ch,struct cl_conn * nc)2186 static void ch_rem_connection(struct net_server_s *ns, struct cl_host *ch,
2187 struct cl_conn *nc)
2188 {
2189 net_close_connection(&nc->fd);
2190
2191 list_del(&nc->ch_head);
2192 ch->connects--;
2193
2194 list_del(&nc->ns_head);
2195 ns->connects--;
2196 ns->pfds = realloc(ns->pfds, (ns->connects+1) * sizeof(struct pollfd));
2197
2198 free(nc);
2199 }
2200
net_find_client_host(struct net_server_s * ns,struct in_addr cl_in_addr)2201 static struct cl_host *net_find_client_host(struct net_server_s *ns,
2202 struct in_addr cl_in_addr)
2203 {
2204 struct list_head *p;
2205
2206 __list_for_each(p, &ns->ch_list) {
2207 struct cl_host *ch = list_entry(p, struct cl_host, head);
2208
2209 if (in_addr_eq(ch->cl_in_addr, cl_in_addr))
2210 return ch;
2211 }
2212
2213 return NULL;
2214 }
2215
net_add_client_host(struct net_server_s * ns,struct sockaddr_in * addr)2216 static struct cl_host *net_add_client_host(struct net_server_s *ns,
2217 struct sockaddr_in *addr)
2218 {
2219 struct cl_host *ch;
2220
2221 ch = malloc(sizeof(*ch));
2222 memset(ch, 0, sizeof(*ch));
2223
2224 ch->ns = ns;
2225 ch->cl_in_addr = addr->sin_addr;
2226 list_add_tail(&ch->head, &ns->ch_list);
2227 ns->nchs++;
2228
2229 ch->hostname = strdup(inet_ntoa(addr->sin_addr));
2230 printf("server: connection from %s\n", ch->hostname);
2231
2232 INIT_LIST_HEAD(&ch->conn_list);
2233 INIT_LIST_HEAD(&ch->devpaths);
2234
2235 return ch;
2236 }
2237
device_done(struct devpath * dpp,int ncpus)2238 static void device_done(struct devpath *dpp, int ncpus)
2239 {
2240 int cpu;
2241 struct io_info *iop;
2242
2243 for (cpu = 0, iop = dpp->ios; cpu < ncpus; cpu++, iop++)
2244 close_iop(iop);
2245
2246 list_del(&dpp->head);
2247 dpp_free(dpp);
2248 }
2249
net_ch_remove(struct cl_host * ch,int ncpus)2250 static void net_ch_remove(struct cl_host *ch, int ncpus)
2251 {
2252 struct list_head *p, *q;
2253 struct net_server_s *ns = ch->ns;
2254
2255 list_for_each_safe(p, q, &ch->devpaths) {
2256 struct devpath *dpp = list_entry(p, struct devpath, head);
2257 device_done(dpp, ncpus);
2258 }
2259
2260 list_for_each_safe(p, q, &ch->conn_list) {
2261 struct cl_conn *nc = list_entry(p, struct cl_conn, ch_head);
2262
2263 ch_rem_connection(ns, ch, nc);
2264 }
2265
2266 list_del(&ch->head);
2267 ns->nchs--;
2268
2269 if (ch->hostname)
2270 free(ch->hostname);
2271 free(ch);
2272 }
2273
net_add_connection(struct net_server_s * ns)2274 static void net_add_connection(struct net_server_s *ns)
2275 {
2276 int fd;
2277 struct cl_host *ch;
2278 socklen_t socklen = sizeof(ns->addr);
2279
2280 fd = my_accept(ns->listen_fd, (struct sockaddr *)&ns->addr, &socklen);
2281 if (fd < 0) {
2282 /*
2283 * This is OK: we just won't accept this connection,
2284 * nothing fatal.
2285 */
2286 perror("accept");
2287 } else {
2288 ch = net_find_client_host(ns, ns->addr.sin_addr);
2289 if (!ch)
2290 ch = net_add_client_host(ns, &ns->addr);
2291
2292 ch_add_connection(ns, ch, fd);
2293 }
2294 }
2295
nc_add_dpp(struct cl_conn * nc,struct blktrace_net_hdr * bnh,time_t connect_time)2296 static struct devpath *nc_add_dpp(struct cl_conn *nc,
2297 struct blktrace_net_hdr *bnh,
2298 time_t connect_time)
2299 {
2300 int cpu;
2301 struct io_info *iop;
2302 struct devpath *dpp;
2303
2304 dpp = malloc(sizeof(*dpp));
2305 memset(dpp, 0, sizeof(*dpp));
2306
2307 dpp->buts_name = strdup(bnh->buts_name);
2308 dpp->path = strdup(bnh->buts_name);
2309 dpp->fd = -1;
2310 dpp->ch = nc->ch;
2311 dpp->cl_id = bnh->cl_id;
2312 dpp->cl_connect_time = connect_time;
2313 dpp->ncpus = nc->ncpus;
2314 dpp->stats = calloc(dpp->ncpus, sizeof(*dpp->stats));
2315 memset(dpp->stats, 0, dpp->ncpus * sizeof(*dpp->stats));
2316
2317 list_add_tail(&dpp->head, &nc->ch->devpaths);
2318 nc->ch->ndevs++;
2319
2320 dpp->ios = calloc(nc->ncpus, sizeof(*iop));
2321 memset(dpp->ios, 0, ndevs * sizeof(*iop));
2322
2323 for (cpu = 0, iop = dpp->ios; cpu < nc->ncpus; cpu++, iop++) {
2324 iop->dpp = dpp;
2325 iop->nc = nc;
2326 init_mmap_info(&iop->mmap_info);
2327
2328 if (iop_open(iop, cpu))
2329 goto err;
2330 }
2331
2332 return dpp;
2333
2334 err:
2335 /*
2336 * Need to unravel what's been done...
2337 */
2338 while (cpu >= 0)
2339 close_iop(&dpp->ios[cpu--]);
2340 dpp_free(dpp);
2341
2342 return NULL;
2343 }
2344
nc_find_dpp(struct cl_conn * nc,struct blktrace_net_hdr * bnh)2345 static struct devpath *nc_find_dpp(struct cl_conn *nc,
2346 struct blktrace_net_hdr *bnh)
2347 {
2348 struct list_head *p;
2349 time_t connect_time = nc->connect_time;
2350
2351 __list_for_each(p, &nc->ch->devpaths) {
2352 struct devpath *dpp = list_entry(p, struct devpath, head);
2353
2354 if (!strcmp(dpp->buts_name, bnh->buts_name))
2355 return dpp;
2356
2357 if (dpp->cl_id == bnh->cl_id)
2358 connect_time = dpp->cl_connect_time;
2359 }
2360
2361 return nc_add_dpp(nc, bnh, connect_time);
2362 }
2363
net_client_read_data(struct cl_conn * nc,struct devpath * dpp,struct blktrace_net_hdr * bnh)2364 static void net_client_read_data(struct cl_conn *nc, struct devpath *dpp,
2365 struct blktrace_net_hdr *bnh)
2366 {
2367 int ret;
2368 struct io_info *iop = &dpp->ios[bnh->cpu];
2369 struct mmap_info *mip = &iop->mmap_info;
2370
2371 if (setup_mmap(iop->ofd, bnh->len, &iop->mmap_info)) {
2372 fprintf(stderr, "ncd(%s:%d): mmap failed\n",
2373 nc->ch->hostname, nc->fd);
2374 exit(1);
2375 }
2376
2377 ret = net_recv_data(nc->fd, mip->fs_buf + mip->fs_off, bnh->len);
2378 if (ret > 0) {
2379 pdc_dr_update(dpp, bnh->cpu, ret);
2380 mip->fs_size += ret;
2381 mip->fs_off += ret;
2382 } else if (ret < 0)
2383 exit(1);
2384 }
2385
2386 /*
2387 * Returns 1 if we closed a host - invalidates other polling information
2388 * that may be present.
2389 */
net_client_data(struct cl_conn * nc)2390 static int net_client_data(struct cl_conn *nc)
2391 {
2392 int ret;
2393 struct devpath *dpp;
2394 struct blktrace_net_hdr bnh;
2395
2396 ret = net_get_header(nc, &bnh);
2397 if (ret == 0)
2398 return 0;
2399
2400 if (ret < 0) {
2401 fprintf(stderr, "ncd(%d): header read failed\n", nc->fd);
2402 exit(1);
2403 }
2404
2405 if (data_is_native == -1 && check_data_endianness(bnh.magic)) {
2406 fprintf(stderr, "ncd(%d): received data is bad\n", nc->fd);
2407 exit(1);
2408 }
2409
2410 if (!data_is_native) {
2411 bnh.magic = be32_to_cpu(bnh.magic);
2412 bnh.cpu = be32_to_cpu(bnh.cpu);
2413 bnh.max_cpus = be32_to_cpu(bnh.max_cpus);
2414 bnh.len = be32_to_cpu(bnh.len);
2415 bnh.cl_id = be32_to_cpu(bnh.cl_id);
2416 bnh.buf_size = be32_to_cpu(bnh.buf_size);
2417 bnh.buf_nr = be32_to_cpu(bnh.buf_nr);
2418 bnh.page_size = be32_to_cpu(bnh.page_size);
2419 }
2420
2421 if ((bnh.magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
2422 fprintf(stderr, "ncd(%s:%d): bad data magic\n",
2423 nc->ch->hostname, nc->fd);
2424 exit(1);
2425 }
2426
2427 if (nc->ncpus == -1)
2428 nc->ncpus = bnh.max_cpus;
2429
2430 /*
2431 * len == 0 means the other end is sending us a new connection/dpp
2432 * len == 1 means that the other end signalled end-of-run
2433 */
2434 dpp = nc_find_dpp(nc, &bnh);
2435 if (bnh.len == 0) {
2436 /*
2437 * Just adding in the dpp above is enough
2438 */
2439 ack_open_close(nc->fd, dpp->buts_name);
2440 nc->ch->cl_opens++;
2441 } else if (bnh.len == 1) {
2442 /*
2443 * overload cpu count with dropped events
2444 */
2445 dpp->drops = bnh.cpu;
2446
2447 ack_open_close(nc->fd, dpp->buts_name);
2448 if (--nc->ch->cl_opens == 0) {
2449 show_stats(&nc->ch->devpaths);
2450 net_ch_remove(nc->ch, nc->ncpus);
2451 return 1;
2452 }
2453 } else
2454 net_client_read_data(nc, dpp, &bnh);
2455
2456 return 0;
2457 }
2458
handle_client_data(struct net_server_s * ns,int events)2459 static void handle_client_data(struct net_server_s *ns, int events)
2460 {
2461 struct cl_conn *nc;
2462 struct pollfd *pfd;
2463 struct list_head *p, *q;
2464
2465 pfd = &ns->pfds[1];
2466 list_for_each_safe(p, q, &ns->conn_list) {
2467 if (pfd->revents & POLLIN) {
2468 nc = list_entry(p, struct cl_conn, ns_head);
2469
2470 if (net_client_data(nc) || --events == 0)
2471 break;
2472 }
2473 pfd++;
2474 }
2475 }
2476
net_setup_pfds(struct net_server_s * ns)2477 static void net_setup_pfds(struct net_server_s *ns)
2478 {
2479 struct pollfd *pfd;
2480 struct list_head *p;
2481
2482 ns->pfds[0].fd = ns->listen_fd;
2483 ns->pfds[0].events = POLLIN;
2484
2485 pfd = &ns->pfds[1];
2486 __list_for_each(p, &ns->conn_list) {
2487 struct cl_conn *nc = list_entry(p, struct cl_conn, ns_head);
2488
2489 pfd->fd = nc->fd;
2490 pfd->events = POLLIN;
2491 pfd++;
2492 }
2493 }
2494
net_server_handle_connections(struct net_server_s * ns)2495 static int net_server_handle_connections(struct net_server_s *ns)
2496 {
2497 int events;
2498
2499 printf("server: waiting for connections...\n");
2500
2501 while (!done) {
2502 net_setup_pfds(ns);
2503 events = poll(ns->pfds, ns->connects + 1, -1);
2504 if (events < 0) {
2505 if (errno != EINTR) {
2506 perror("FATAL: poll error");
2507 return 1;
2508 }
2509 } else if (events > 0) {
2510 if (ns->pfds[0].revents & POLLIN) {
2511 net_add_connection(ns);
2512 events--;
2513 }
2514
2515 if (events)
2516 handle_client_data(ns, events);
2517 }
2518 }
2519
2520 return 0;
2521 }
2522
net_server(void)2523 static int net_server(void)
2524 {
2525 int fd, opt;
2526 int ret = 1;
2527 struct net_server_s net_server;
2528 struct net_server_s *ns = &net_server;
2529
2530 memset(ns, 0, sizeof(*ns));
2531 INIT_LIST_HEAD(&ns->ch_list);
2532 INIT_LIST_HEAD(&ns->conn_list);
2533 ns->pfds = malloc(sizeof(struct pollfd));
2534
2535 fd = my_socket(AF_INET, SOCK_STREAM, 0);
2536 if (fd < 0) {
2537 perror("server: socket");
2538 goto out;
2539 }
2540
2541 opt = 1;
2542 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0) {
2543 perror("setsockopt");
2544 goto out;
2545 }
2546
2547 memset(&ns->addr, 0, sizeof(ns->addr));
2548 ns->addr.sin_family = AF_INET;
2549 ns->addr.sin_addr.s_addr = htonl(INADDR_ANY);
2550 ns->addr.sin_port = htons(net_port);
2551
2552 if (bind(fd, (struct sockaddr *) &ns->addr, sizeof(ns->addr)) < 0) {
2553 perror("bind");
2554 goto out;
2555 }
2556
2557 if (listen(fd, 1) < 0) {
2558 perror("listen");
2559 goto out;
2560 }
2561
2562 /*
2563 * The actual server looping is done here:
2564 */
2565 ns->listen_fd = fd;
2566 ret = net_server_handle_connections(ns);
2567
2568 /*
2569 * Clean up and return...
2570 */
2571 out:
2572 free(ns->pfds);
2573 return ret;
2574 }
2575
run_tracers(void)2576 static int run_tracers(void)
2577 {
2578 atexit(exit_tracing);
2579 if (net_mode == Net_client)
2580 printf("blktrace: connecting to %s\n", hostname);
2581
2582 setup_buts();
2583
2584 if (use_tracer_devpaths()) {
2585 if (setup_tracer_devpaths())
2586 return 1;
2587
2588 if (piped_output)
2589 handle_list = handle_list_file;
2590 else
2591 handle_list = handle_list_net;
2592 }
2593
2594 start_tracers();
2595 if (nthreads_running == ncpus) {
2596 unblock_tracers();
2597 start_buts();
2598 if (net_mode == Net_client)
2599 printf("blktrace: connected!\n");
2600 if (stop_watch)
2601 alarm(stop_watch);
2602 } else
2603 stop_tracers();
2604
2605 wait_tracers();
2606 if (nthreads_running == ncpus)
2607 show_stats(&devpaths);
2608 if (net_client_use_send())
2609 close_client_connections();
2610 del_tracers();
2611
2612 return 0;
2613 }
2614
main(int argc,char * argv[])2615 int main(int argc, char *argv[])
2616 {
2617 int ret = 0;
2618
2619 setlocale(LC_NUMERIC, "en_US");
2620 pagesize = getpagesize();
2621 ncpus = sysconf(_SC_NPROCESSORS_ONLN);
2622 if (ncpus < 0) {
2623 fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed %d/%s\n",
2624 errno, strerror(errno));
2625 ret = 1;
2626 goto out;
2627 } else if (handle_args(argc, argv)) {
2628 ret = 1;
2629 goto out;
2630 }
2631
2632 signal(SIGINT, handle_sigint);
2633 signal(SIGHUP, handle_sigint);
2634 signal(SIGTERM, handle_sigint);
2635 signal(SIGALRM, handle_sigint);
2636 signal(SIGPIPE, SIG_IGN);
2637
2638 if (kill_running_trace) {
2639 struct devpath *dpp;
2640 struct list_head *p;
2641
2642 __list_for_each(p, &devpaths) {
2643 dpp = list_entry(p, struct devpath, head);
2644 if (__stop_trace(dpp->fd)) {
2645 fprintf(stderr,
2646 "BLKTRACETEARDOWN %s failed: %d/%s\n",
2647 dpp->path, errno, strerror(errno));
2648 }
2649 }
2650 } else if (net_mode == Net_server) {
2651 if (output_name) {
2652 fprintf(stderr, "-o ignored in server mode\n");
2653 output_name = NULL;
2654 }
2655 ret = net_server();
2656 } else
2657 ret = run_tracers();
2658
2659 out:
2660 if (pfp)
2661 fclose(pfp);
2662 rel_devpaths();
2663 return ret;
2664 }
2665