1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #include "builtin.h"
9 
10 #include "perf.h"
11 
12 #include "util/build-id.h"
13 #include "util/util.h"
14 #include "util/parse-options.h"
15 #include "util/parse-events.h"
16 
17 #include "util/header.h"
18 #include "util/event.h"
19 #include "util/evlist.h"
20 #include "util/evsel.h"
21 #include "util/debug.h"
22 #include "util/session.h"
23 #include "util/tool.h"
24 #include "util/symbol.h"
25 #include "util/cpumap.h"
26 #include "util/thread_map.h"
27 
28 #include <unistd.h>
29 #include <sched.h>
30 #include <sys/mman.h>
31 
32 #ifndef HAVE_ON_EXIT
33 #ifndef ATEXIT_MAX
34 #define ATEXIT_MAX 32
35 #endif
36 static int __on_exit_count = 0;
37 typedef void (*on_exit_func_t) (int, void *);
38 static on_exit_func_t __on_exit_funcs[ATEXIT_MAX];
39 static void *__on_exit_args[ATEXIT_MAX];
40 static int __exitcode = 0;
41 static void __handle_on_exit_funcs(void);
42 static int on_exit(on_exit_func_t function, void *arg);
43 #define exit(x) (exit)(__exitcode = (x))
44 
on_exit(on_exit_func_t function,void * arg)45 static int on_exit(on_exit_func_t function, void *arg)
46 {
47 	if (__on_exit_count == ATEXIT_MAX)
48 		return -ENOMEM;
49 	else if (__on_exit_count == 0)
50 		atexit(__handle_on_exit_funcs);
51 	__on_exit_funcs[__on_exit_count] = function;
52 	__on_exit_args[__on_exit_count++] = arg;
53 	return 0;
54 }
55 
__handle_on_exit_funcs(void)56 static void __handle_on_exit_funcs(void)
57 {
58 	int i;
59 	for (i = 0; i < __on_exit_count; i++)
60 		__on_exit_funcs[i] (__exitcode, __on_exit_args[i]);
61 }
62 #endif
63 
64 struct perf_record {
65 	struct perf_tool	tool;
66 	struct perf_record_opts	opts;
67 	u64			bytes_written;
68 	const char		*output_name;
69 	struct perf_evlist	*evlist;
70 	struct perf_session	*session;
71 	const char		*progname;
72 	int			output;
73 	unsigned int		page_size;
74 	int			realtime_prio;
75 	bool			no_buildid;
76 	bool			no_buildid_cache;
77 	long			samples;
78 	off_t			post_processing_offset;
79 };
80 
advance_output(struct perf_record * rec,size_t size)81 static void advance_output(struct perf_record *rec, size_t size)
82 {
83 	rec->bytes_written += size;
84 }
85 
write_output(struct perf_record * rec,void * buf,size_t size)86 static int write_output(struct perf_record *rec, void *buf, size_t size)
87 {
88 	while (size) {
89 		int ret = write(rec->output, buf, size);
90 
91 		if (ret < 0) {
92 			pr_err("failed to write\n");
93 			return -1;
94 		}
95 
96 		size -= ret;
97 		buf += ret;
98 
99 		rec->bytes_written += ret;
100 	}
101 
102 	return 0;
103 }
104 
process_synthesized_event(struct perf_tool * tool,union perf_event * event,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)105 static int process_synthesized_event(struct perf_tool *tool,
106 				     union perf_event *event,
107 				     struct perf_sample *sample __maybe_unused,
108 				     struct machine *machine __maybe_unused)
109 {
110 	struct perf_record *rec = container_of(tool, struct perf_record, tool);
111 	if (write_output(rec, event, event->header.size) < 0)
112 		return -1;
113 
114 	return 0;
115 }
116 
perf_record__mmap_read(struct perf_record * rec,struct perf_mmap * md)117 static int perf_record__mmap_read(struct perf_record *rec,
118 				   struct perf_mmap *md)
119 {
120 	unsigned int head = perf_mmap__read_head(md);
121 	unsigned int old = md->prev;
122 	unsigned char *data = md->base + rec->page_size;
123 	unsigned long size;
124 	void *buf;
125 	int rc = 0;
126 
127 	if (old == head)
128 		return 0;
129 
130 	rec->samples++;
131 
132 	size = head - old;
133 
134 	if ((old & md->mask) + size != (head & md->mask)) {
135 		buf = &data[old & md->mask];
136 		size = md->mask + 1 - (old & md->mask);
137 		old += size;
138 
139 		if (write_output(rec, buf, size) < 0) {
140 			rc = -1;
141 			goto out;
142 		}
143 	}
144 
145 	buf = &data[old & md->mask];
146 	size = head - old;
147 	old += size;
148 
149 	if (write_output(rec, buf, size) < 0) {
150 		rc = -1;
151 		goto out;
152 	}
153 
154 	md->prev = old;
155 	perf_mmap__write_tail(md, old);
156 
157 out:
158 	return rc;
159 }
160 
161 static volatile int done = 0;
162 static volatile int signr = -1;
163 static volatile int child_finished = 0;
164 
sig_handler(int sig)165 static void sig_handler(int sig)
166 {
167 	if (sig == SIGCHLD)
168 		child_finished = 1;
169 
170 	done = 1;
171 	signr = sig;
172 }
173 
perf_record__sig_exit(int exit_status __maybe_unused,void * arg)174 static void perf_record__sig_exit(int exit_status __maybe_unused, void *arg)
175 {
176 	struct perf_record *rec = arg;
177 	int status;
178 
179 	if (rec->evlist->workload.pid > 0) {
180 		if (!child_finished)
181 			kill(rec->evlist->workload.pid, SIGTERM);
182 
183 		wait(&status);
184 		if (WIFSIGNALED(status))
185 			psignal(WTERMSIG(status), rec->progname);
186 	}
187 
188 	if (signr == -1 || signr == SIGUSR1)
189 		return;
190 
191 	signal(signr, SIG_DFL);
192 }
193 
perf_record__open(struct perf_record * rec)194 static int perf_record__open(struct perf_record *rec)
195 {
196 	char msg[512];
197 	struct perf_evsel *pos;
198 	struct perf_evlist *evlist = rec->evlist;
199 	struct perf_session *session = rec->session;
200 	struct perf_record_opts *opts = &rec->opts;
201 	int rc = 0;
202 
203 	perf_evlist__config(evlist, opts);
204 
205 	list_for_each_entry(pos, &evlist->entries, node) {
206 try_again:
207 		if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
208 			if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
209 				if (verbose)
210 					ui__warning("%s\n", msg);
211 				goto try_again;
212 			}
213 
214 			rc = -errno;
215 			perf_evsel__open_strerror(pos, &opts->target,
216 						  errno, msg, sizeof(msg));
217 			ui__error("%s\n", msg);
218 			goto out;
219 		}
220 	}
221 
222 	if (perf_evlist__apply_filters(evlist)) {
223 		error("failed to set filter with %d (%s)\n", errno,
224 			strerror(errno));
225 		rc = -1;
226 		goto out;
227 	}
228 
229 	if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
230 		if (errno == EPERM) {
231 			pr_err("Permission error mapping pages.\n"
232 			       "Consider increasing "
233 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
234 			       "or try again with a smaller value of -m/--mmap_pages.\n"
235 			       "(current value: %d)\n", opts->mmap_pages);
236 			rc = -errno;
237 		} else if (!is_power_of_2(opts->mmap_pages) &&
238 			   (opts->mmap_pages != UINT_MAX)) {
239 			pr_err("--mmap_pages/-m value must be a power of two.");
240 			rc = -EINVAL;
241 		} else {
242 			pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
243 			rc = -errno;
244 		}
245 		goto out;
246 	}
247 
248 	session->evlist = evlist;
249 	perf_session__set_id_hdr_size(session);
250 out:
251 	return rc;
252 }
253 
process_buildids(struct perf_record * rec)254 static int process_buildids(struct perf_record *rec)
255 {
256 	u64 size = lseek(rec->output, 0, SEEK_CUR);
257 
258 	if (size == 0)
259 		return 0;
260 
261 	rec->session->fd = rec->output;
262 	return __perf_session__process_events(rec->session, rec->post_processing_offset,
263 					      size - rec->post_processing_offset,
264 					      size, &build_id__mark_dso_hit_ops);
265 }
266 
perf_record__exit(int status,void * arg)267 static void perf_record__exit(int status, void *arg)
268 {
269 	struct perf_record *rec = arg;
270 
271 	if (status != 0)
272 		return;
273 
274 	if (!rec->opts.pipe_output) {
275 		rec->session->header.data_size += rec->bytes_written;
276 
277 		if (!rec->no_buildid)
278 			process_buildids(rec);
279 		perf_session__write_header(rec->session, rec->evlist,
280 					   rec->output, true);
281 		perf_session__delete(rec->session);
282 		perf_evlist__delete(rec->evlist);
283 		symbol__exit();
284 	}
285 }
286 
perf_event__synthesize_guest_os(struct machine * machine,void * data)287 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
288 {
289 	int err;
290 	struct perf_tool *tool = data;
291 	/*
292 	 *As for guest kernel when processing subcommand record&report,
293 	 *we arrange module mmap prior to guest kernel mmap and trigger
294 	 *a preload dso because default guest module symbols are loaded
295 	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
296 	 *method is used to avoid symbol missing when the first addr is
297 	 *in module instead of in guest kernel.
298 	 */
299 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
300 					     machine);
301 	if (err < 0)
302 		pr_err("Couldn't record guest kernel [%d]'s reference"
303 		       " relocation symbol.\n", machine->pid);
304 
305 	/*
306 	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
307 	 * have no _text sometimes.
308 	 */
309 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
310 						 machine, "_text");
311 	if (err < 0)
312 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
313 							 machine, "_stext");
314 	if (err < 0)
315 		pr_err("Couldn't record guest kernel [%d]'s reference"
316 		       " relocation symbol.\n", machine->pid);
317 }
318 
319 static struct perf_event_header finished_round_event = {
320 	.size = sizeof(struct perf_event_header),
321 	.type = PERF_RECORD_FINISHED_ROUND,
322 };
323 
perf_record__mmap_read_all(struct perf_record * rec)324 static int perf_record__mmap_read_all(struct perf_record *rec)
325 {
326 	int i;
327 	int rc = 0;
328 
329 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
330 		if (rec->evlist->mmap[i].base) {
331 			if (perf_record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
332 				rc = -1;
333 				goto out;
334 			}
335 		}
336 	}
337 
338 	if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
339 		rc = write_output(rec, &finished_round_event,
340 				  sizeof(finished_round_event));
341 
342 out:
343 	return rc;
344 }
345 
__cmd_record(struct perf_record * rec,int argc,const char ** argv)346 static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
347 {
348 	struct stat st;
349 	int flags;
350 	int err, output, feat;
351 	unsigned long waking = 0;
352 	const bool forks = argc > 0;
353 	struct machine *machine;
354 	struct perf_tool *tool = &rec->tool;
355 	struct perf_record_opts *opts = &rec->opts;
356 	struct perf_evlist *evsel_list = rec->evlist;
357 	const char *output_name = rec->output_name;
358 	struct perf_session *session;
359 	bool disabled = false;
360 
361 	rec->progname = argv[0];
362 
363 	rec->page_size = sysconf(_SC_PAGE_SIZE);
364 
365 	on_exit(perf_record__sig_exit, rec);
366 	signal(SIGCHLD, sig_handler);
367 	signal(SIGINT, sig_handler);
368 	signal(SIGUSR1, sig_handler);
369 	signal(SIGTERM, sig_handler);
370 
371 	if (!output_name) {
372 		if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
373 			opts->pipe_output = true;
374 		else
375 			rec->output_name = output_name = "perf.data";
376 	}
377 	if (output_name) {
378 		if (!strcmp(output_name, "-"))
379 			opts->pipe_output = true;
380 		else if (!stat(output_name, &st) && st.st_size) {
381 			char oldname[PATH_MAX];
382 			snprintf(oldname, sizeof(oldname), "%s.old",
383 				 output_name);
384 			unlink(oldname);
385 			rename(output_name, oldname);
386 		}
387 	}
388 
389 	flags = O_CREAT|O_RDWR|O_TRUNC;
390 
391 	if (opts->pipe_output)
392 		output = STDOUT_FILENO;
393 	else
394 		output = open(output_name, flags, S_IRUSR | S_IWUSR);
395 	if (output < 0) {
396 		perror("failed to create output file");
397 		return -1;
398 	}
399 
400 	rec->output = output;
401 
402 	session = perf_session__new(output_name, O_WRONLY,
403 				    true, false, NULL);
404 	if (session == NULL) {
405 		pr_err("Not enough memory for reading perf file header\n");
406 		return -1;
407 	}
408 
409 	rec->session = session;
410 
411 	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
412 		perf_header__set_feat(&session->header, feat);
413 
414 	if (rec->no_buildid)
415 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
416 
417 	if (!have_tracepoints(&evsel_list->entries))
418 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
419 
420 	if (!rec->opts.branch_stack)
421 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
422 
423 	if (forks) {
424 		err = perf_evlist__prepare_workload(evsel_list, &opts->target,
425 						    argv, opts->pipe_output,
426 						    true);
427 		if (err < 0) {
428 			pr_err("Couldn't run the workload!\n");
429 			goto out_delete_session;
430 		}
431 	}
432 
433 	if (perf_record__open(rec) != 0) {
434 		err = -1;
435 		goto out_delete_session;
436 	}
437 
438 	if (!evsel_list->nr_groups)
439 		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
440 
441 	/*
442 	 * perf_session__delete(session) will be called at perf_record__exit()
443 	 */
444 	on_exit(perf_record__exit, rec);
445 
446 	if (opts->pipe_output) {
447 		err = perf_header__write_pipe(output);
448 		if (err < 0)
449 			goto out_delete_session;
450 	} else {
451 		err = perf_session__write_header(session, evsel_list,
452 						 output, false);
453 		if (err < 0)
454 			goto out_delete_session;
455 	}
456 
457 	if (!rec->no_buildid
458 	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
459 		pr_err("Couldn't generate buildids. "
460 		       "Use --no-buildid to profile anyway.\n");
461 		err = -1;
462 		goto out_delete_session;
463 	}
464 
465 	rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
466 
467 	machine = &session->machines.host;
468 
469 	if (opts->pipe_output) {
470 		err = perf_event__synthesize_attrs(tool, session,
471 						   process_synthesized_event);
472 		if (err < 0) {
473 			pr_err("Couldn't synthesize attrs.\n");
474 			goto out_delete_session;
475 		}
476 
477 		if (have_tracepoints(&evsel_list->entries)) {
478 			/*
479 			 * FIXME err <= 0 here actually means that
480 			 * there were no tracepoints so its not really
481 			 * an error, just that we don't need to
482 			 * synthesize anything.  We really have to
483 			 * return this more properly and also
484 			 * propagate errors that now are calling die()
485 			 */
486 			err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
487 								  process_synthesized_event);
488 			if (err <= 0) {
489 				pr_err("Couldn't record tracing data.\n");
490 				goto out_delete_session;
491 			}
492 			advance_output(rec, err);
493 		}
494 	}
495 
496 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
497 						 machine, "_text");
498 	if (err < 0)
499 		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
500 							 machine, "_stext");
501 	if (err < 0)
502 		pr_err("Couldn't record kernel reference relocation symbol\n"
503 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
504 		       "Check /proc/kallsyms permission or run as root.\n");
505 
506 	err = perf_event__synthesize_modules(tool, process_synthesized_event,
507 					     machine);
508 	if (err < 0)
509 		pr_err("Couldn't record kernel module information.\n"
510 		       "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
511 		       "Check /proc/modules permission or run as root.\n");
512 
513 	if (perf_guest) {
514 		machines__process_guests(&session->machines,
515 					 perf_event__synthesize_guest_os, tool);
516 	}
517 
518 	if (perf_target__has_task(&opts->target))
519 		err = perf_event__synthesize_thread_map(tool, evsel_list->threads,
520 						  process_synthesized_event,
521 						  machine);
522 	else if (perf_target__has_cpu(&opts->target))
523 		err = perf_event__synthesize_threads(tool, process_synthesized_event,
524 					       machine);
525 	else /* command specified */
526 		err = 0;
527 
528 	if (err != 0)
529 		goto out_delete_session;
530 
531 	if (rec->realtime_prio) {
532 		struct sched_param param;
533 
534 		param.sched_priority = rec->realtime_prio;
535 		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
536 			pr_err("Could not set realtime priority.\n");
537 			err = -1;
538 			goto out_delete_session;
539 		}
540 	}
541 
542 	/*
543 	 * When perf is starting the traced process, all the events
544 	 * (apart from group members) have enable_on_exec=1 set,
545 	 * so don't spoil it by prematurely enabling them.
546 	 */
547 	if (!perf_target__none(&opts->target))
548 		perf_evlist__enable(evsel_list);
549 
550 	/*
551 	 * Let the child rip
552 	 */
553 	if (forks)
554 		perf_evlist__start_workload(evsel_list);
555 
556 	for (;;) {
557 		int hits = rec->samples;
558 
559 		if (perf_record__mmap_read_all(rec) < 0) {
560 			err = -1;
561 			goto out_delete_session;
562 		}
563 
564 		if (hits == rec->samples) {
565 			if (done)
566 				break;
567 			err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
568 			waking++;
569 		}
570 
571 		/*
572 		 * When perf is starting the traced process, at the end events
573 		 * die with the process and we wait for that. Thus no need to
574 		 * disable events in this case.
575 		 */
576 		if (done && !disabled && !perf_target__none(&opts->target)) {
577 			perf_evlist__disable(evsel_list);
578 			disabled = true;
579 		}
580 	}
581 
582 	if (quiet || signr == SIGUSR1)
583 		return 0;
584 
585 	fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
586 
587 	/*
588 	 * Approximate RIP event size: 24 bytes.
589 	 */
590 	fprintf(stderr,
591 		"[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
592 		(double)rec->bytes_written / 1024.0 / 1024.0,
593 		output_name,
594 		rec->bytes_written / 24);
595 
596 	return 0;
597 
598 out_delete_session:
599 	perf_session__delete(session);
600 	return err;
601 }
602 
603 #define BRANCH_OPT(n, m) \
604 	{ .name = n, .mode = (m) }
605 
606 #define BRANCH_END { .name = NULL }
607 
608 struct branch_mode {
609 	const char *name;
610 	int mode;
611 };
612 
613 static const struct branch_mode branch_modes[] = {
614 	BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
615 	BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
616 	BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
617 	BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
618 	BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
619 	BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
620 	BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
621 	BRANCH_END
622 };
623 
624 static int
parse_branch_stack(const struct option * opt,const char * str,int unset)625 parse_branch_stack(const struct option *opt, const char *str, int unset)
626 {
627 #define ONLY_PLM \
628 	(PERF_SAMPLE_BRANCH_USER	|\
629 	 PERF_SAMPLE_BRANCH_KERNEL	|\
630 	 PERF_SAMPLE_BRANCH_HV)
631 
632 	uint64_t *mode = (uint64_t *)opt->value;
633 	const struct branch_mode *br;
634 	char *s, *os = NULL, *p;
635 	int ret = -1;
636 
637 	if (unset)
638 		return 0;
639 
640 	/*
641 	 * cannot set it twice, -b + --branch-filter for instance
642 	 */
643 	if (*mode)
644 		return -1;
645 
646 	/* str may be NULL in case no arg is passed to -b */
647 	if (str) {
648 		/* because str is read-only */
649 		s = os = strdup(str);
650 		if (!s)
651 			return -1;
652 
653 		for (;;) {
654 			p = strchr(s, ',');
655 			if (p)
656 				*p = '\0';
657 
658 			for (br = branch_modes; br->name; br++) {
659 				if (!strcasecmp(s, br->name))
660 					break;
661 			}
662 			if (!br->name) {
663 				ui__warning("unknown branch filter %s,"
664 					    " check man page\n", s);
665 				goto error;
666 			}
667 
668 			*mode |= br->mode;
669 
670 			if (!p)
671 				break;
672 
673 			s = p + 1;
674 		}
675 	}
676 	ret = 0;
677 
678 	/* default to any branch */
679 	if ((*mode & ~ONLY_PLM) == 0) {
680 		*mode = PERF_SAMPLE_BRANCH_ANY;
681 	}
682 error:
683 	free(os);
684 	return ret;
685 }
686 
687 #ifdef LIBUNWIND_SUPPORT
get_stack_size(char * str,unsigned long * _size)688 static int get_stack_size(char *str, unsigned long *_size)
689 {
690 	char *endptr;
691 	unsigned long size;
692 	unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
693 
694 	size = strtoul(str, &endptr, 0);
695 
696 	do {
697 		if (*endptr)
698 			break;
699 
700 		size = round_up(size, sizeof(u64));
701 		if (!size || size > max_size)
702 			break;
703 
704 		*_size = size;
705 		return 0;
706 
707 	} while (0);
708 
709 	pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
710 	       max_size, str);
711 	return -1;
712 }
713 #endif /* LIBUNWIND_SUPPORT */
714 
record_parse_callchain(const char * arg,struct perf_record_opts * opts)715 int record_parse_callchain(const char *arg, struct perf_record_opts *opts)
716 {
717 	char *tok, *name, *saveptr = NULL;
718 	char *buf;
719 	int ret = -1;
720 
721 	/* We need buffer that we know we can write to. */
722 	buf = malloc(strlen(arg) + 1);
723 	if (!buf)
724 		return -ENOMEM;
725 
726 	strcpy(buf, arg);
727 
728 	tok = strtok_r((char *)buf, ",", &saveptr);
729 	name = tok ? : (char *)buf;
730 
731 	do {
732 		/* Framepointer style */
733 		if (!strncmp(name, "fp", sizeof("fp"))) {
734 			if (!strtok_r(NULL, ",", &saveptr)) {
735 				opts->call_graph = CALLCHAIN_FP;
736 				ret = 0;
737 			} else
738 				pr_err("callchain: No more arguments "
739 				       "needed for -g fp\n");
740 			break;
741 
742 #ifdef LIBUNWIND_SUPPORT
743 		/* Dwarf style */
744 		} else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
745 			const unsigned long default_stack_dump_size = 8192;
746 
747 			ret = 0;
748 			opts->call_graph = CALLCHAIN_DWARF;
749 			opts->stack_dump_size = default_stack_dump_size;
750 
751 			tok = strtok_r(NULL, ",", &saveptr);
752 			if (tok) {
753 				unsigned long size = 0;
754 
755 				ret = get_stack_size(tok, &size);
756 				opts->stack_dump_size = size;
757 			}
758 #endif /* LIBUNWIND_SUPPORT */
759 		} else {
760 			pr_err("callchain: Unknown --call-graph option "
761 			       "value: %s\n", arg);
762 			break;
763 		}
764 
765 	} while (0);
766 
767 	free(buf);
768 	return ret;
769 }
770 
callchain_debug(struct perf_record_opts * opts)771 static void callchain_debug(struct perf_record_opts *opts)
772 {
773 	pr_debug("callchain: type %d\n", opts->call_graph);
774 
775 	if (opts->call_graph == CALLCHAIN_DWARF)
776 		pr_debug("callchain: stack dump size %d\n",
777 			 opts->stack_dump_size);
778 }
779 
record_parse_callchain_opt(const struct option * opt,const char * arg,int unset)780 int record_parse_callchain_opt(const struct option *opt,
781 			       const char *arg,
782 			       int unset)
783 {
784 	struct perf_record_opts *opts = opt->value;
785 	int ret;
786 
787 	/* --no-call-graph */
788 	if (unset) {
789 		opts->call_graph = CALLCHAIN_NONE;
790 		pr_debug("callchain: disabled\n");
791 		return 0;
792 	}
793 
794 	ret = record_parse_callchain(arg, opts);
795 	if (!ret)
796 		callchain_debug(opts);
797 
798 	return ret;
799 }
800 
record_callchain_opt(const struct option * opt,const char * arg __maybe_unused,int unset __maybe_unused)801 int record_callchain_opt(const struct option *opt,
802 			 const char *arg __maybe_unused,
803 			 int unset __maybe_unused)
804 {
805 	struct perf_record_opts *opts = opt->value;
806 
807 	if (opts->call_graph == CALLCHAIN_NONE)
808 		opts->call_graph = CALLCHAIN_FP;
809 
810 	callchain_debug(opts);
811 	return 0;
812 }
813 
814 static const char * const record_usage[] = {
815 	"perf record [<options>] [<command>]",
816 	"perf record [<options>] -- <command> [<options>]",
817 	NULL
818 };
819 
820 /*
821  * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
822  * because we need to have access to it in perf_record__exit, that is called
823  * after cmd_record() exits, but since record_options need to be accessible to
824  * builtin-script, leave it here.
825  *
826  * At least we don't ouch it in all the other functions here directly.
827  *
828  * Just say no to tons of global variables, sigh.
829  */
830 static struct perf_record record = {
831 	.opts = {
832 		.mmap_pages	     = UINT_MAX,
833 		.user_freq	     = UINT_MAX,
834 		.user_interval	     = ULLONG_MAX,
835 		.freq		     = 4000,
836 		.target		     = {
837 			.uses_mmap   = true,
838 		},
839 	},
840 };
841 
842 #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: "
843 
844 #ifdef LIBUNWIND_SUPPORT
845 const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf";
846 #else
847 const char record_callchain_help[] = CALLCHAIN_HELP "fp";
848 #endif
849 
850 /*
851  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
852  * with it and switch to use the library functions in perf_evlist that came
853  * from builtin-record.c, i.e. use perf_record_opts,
854  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
855  * using pipes, etc.
856  */
857 const struct option record_options[] = {
858 	OPT_CALLBACK('e', "event", &record.evlist, "event",
859 		     "event selector. use 'perf list' to list available events",
860 		     parse_events_option),
861 	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
862 		     "event filter", parse_filter),
863 	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
864 		    "record events on existing process id"),
865 	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
866 		    "record events on existing thread id"),
867 	OPT_INTEGER('r', "realtime", &record.realtime_prio,
868 		    "collect data with this RT SCHED_FIFO priority"),
869 	OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
870 		    "collect data without buffering"),
871 	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
872 		    "collect raw sample records from all opened counters"),
873 	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
874 			    "system-wide collection from all CPUs"),
875 	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
876 		    "list of cpus to monitor"),
877 	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
878 	OPT_STRING('o', "output", &record.output_name, "file",
879 		    "output file name"),
880 	OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
881 		    "child tasks do not inherit counters"),
882 	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
883 	OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
884 		     "number of mmap data pages"),
885 	OPT_BOOLEAN(0, "group", &record.opts.group,
886 		    "put the counters into a counter group"),
887 	OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
888 			   NULL, "enables call-graph recording" ,
889 			   &record_callchain_opt),
890 	OPT_CALLBACK(0, "call-graph", &record.opts,
891 		     "mode[,dump_size]", record_callchain_help,
892 		     &record_parse_callchain_opt),
893 	OPT_INCR('v', "verbose", &verbose,
894 		    "be more verbose (show counter open errors, etc)"),
895 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
896 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
897 		    "per thread counts"),
898 	OPT_BOOLEAN('d', "data", &record.opts.sample_address,
899 		    "Sample addresses"),
900 	OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
901 	OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
902 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
903 		    "don't sample"),
904 	OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
905 		    "do not update the buildid cache"),
906 	OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
907 		    "do not collect buildids in perf.data"),
908 	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
909 		     "monitor event in cgroup name only",
910 		     parse_cgroups),
911 	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
912 		   "user to profile"),
913 
914 	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
915 		     "branch any", "sample any taken branches",
916 		     parse_branch_stack),
917 
918 	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
919 		     "branch filter mask", "branch stack filter modes",
920 		     parse_branch_stack),
921 	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
922 		    "sample by weight (on special events only)"),
923 	OPT_END()
924 };
925 
cmd_record(int argc,const char ** argv,const char * prefix __maybe_unused)926 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
927 {
928 	int err = -ENOMEM;
929 	struct perf_evlist *evsel_list;
930 	struct perf_record *rec = &record;
931 	char errbuf[BUFSIZ];
932 
933 	evsel_list = perf_evlist__new();
934 	if (evsel_list == NULL)
935 		return -ENOMEM;
936 
937 	rec->evlist = evsel_list;
938 
939 	argc = parse_options(argc, argv, record_options, record_usage,
940 			    PARSE_OPT_STOP_AT_NON_OPTION);
941 	if (!argc && perf_target__none(&rec->opts.target))
942 		usage_with_options(record_usage, record_options);
943 
944 	if (nr_cgroups && !rec->opts.target.system_wide) {
945 		ui__error("cgroup monitoring only available in"
946 			  " system-wide mode\n");
947 		usage_with_options(record_usage, record_options);
948 	}
949 
950 	symbol__init();
951 
952 	if (symbol_conf.kptr_restrict)
953 		pr_warning(
954 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
955 "check /proc/sys/kernel/kptr_restrict.\n\n"
956 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
957 "file is not found in the buildid cache or in the vmlinux path.\n\n"
958 "Samples in kernel modules won't be resolved at all.\n\n"
959 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
960 "even with a suitable vmlinux or kallsyms file.\n\n");
961 
962 	if (rec->no_buildid_cache || rec->no_buildid)
963 		disable_buildid_cache();
964 
965 	if (evsel_list->nr_entries == 0 &&
966 	    perf_evlist__add_default(evsel_list) < 0) {
967 		pr_err("Not enough memory for event selector list\n");
968 		goto out_symbol_exit;
969 	}
970 
971 	err = perf_target__validate(&rec->opts.target);
972 	if (err) {
973 		perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
974 		ui__warning("%s", errbuf);
975 	}
976 
977 	err = perf_target__parse_uid(&rec->opts.target);
978 	if (err) {
979 		int saved_errno = errno;
980 
981 		perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
982 		ui__error("%s", errbuf);
983 
984 		err = -saved_errno;
985 		goto out_symbol_exit;
986 	}
987 
988 	err = -ENOMEM;
989 	if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
990 		usage_with_options(record_usage, record_options);
991 
992 	if (rec->opts.user_interval != ULLONG_MAX)
993 		rec->opts.default_interval = rec->opts.user_interval;
994 	if (rec->opts.user_freq != UINT_MAX)
995 		rec->opts.freq = rec->opts.user_freq;
996 
997 	/*
998 	 * User specified count overrides default frequency.
999 	 */
1000 	if (rec->opts.default_interval)
1001 		rec->opts.freq = 0;
1002 	else if (rec->opts.freq) {
1003 		rec->opts.default_interval = rec->opts.freq;
1004 	} else {
1005 		ui__error("frequency and count are zero, aborting\n");
1006 		err = -EINVAL;
1007 		goto out_free_fd;
1008 	}
1009 
1010 	err = __cmd_record(&record, argc, argv);
1011 
1012 	perf_evlist__munmap(evsel_list);
1013 	perf_evlist__close(evsel_list);
1014 out_free_fd:
1015 	perf_evlist__delete_maps(evsel_list);
1016 out_symbol_exit:
1017 	symbol__exit();
1018 	return err;
1019 }
1020