1 /*
2  * ipvrf.c	"ip vrf"
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  * Authors:	David Ahern <dsa@cumulusnetworks.com>
10  *
11  */
12 
13 #include <sys/types.h>
14 #include <sys/stat.h>
15 #include <sys/socket.h>
16 #include <sys/mount.h>
17 #include <linux/bpf.h>
18 #include <linux/if.h>
19 #include <fcntl.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <unistd.h>
23 #include <string.h>
24 #include <dirent.h>
25 #include <errno.h>
26 #include <limits.h>
27 
28 #include "rt_names.h"
29 #include "utils.h"
30 #include "ip_common.h"
31 #include "bpf_util.h"
32 
33 #define CGRP_PROC_FILE  "/cgroup.procs"
34 
35 static struct link_filter vrf_filter;
36 
usage(void)37 static void usage(void)
38 {
39 	fprintf(stderr, "Usage: ip vrf show [NAME] ...\n");
40 	fprintf(stderr, "       ip vrf exec [NAME] cmd ...\n");
41 	fprintf(stderr, "       ip vrf identify [PID]\n");
42 	fprintf(stderr, "       ip vrf pids [NAME]\n");
43 
44 	exit(-1);
45 }
46 
47 /*
48  * parse process based cgroup file looking for PATH/vrf/NAME where
49  * NAME is the name of the vrf the process is associated with
50  */
vrf_identify(pid_t pid,char * name,size_t len)51 static int vrf_identify(pid_t pid, char *name, size_t len)
52 {
53 	char path[PATH_MAX];
54 	char buf[4096];
55 	char *vrf, *end;
56 	FILE *fp;
57 
58 	snprintf(path, sizeof(path), "/proc/%d/cgroup", pid);
59 	fp = fopen(path, "r");
60 	if (!fp)
61 		return -1;
62 
63 	memset(name, 0, len);
64 
65 	while (fgets(buf, sizeof(buf), fp)) {
66 		/* want the controller-less cgroup */
67 		if (strstr(buf, "::/") == NULL)
68 			continue;
69 
70 		vrf = strstr(buf, "/vrf/");
71 		if (vrf) {
72 			vrf += 5;  /* skip past "/vrf/" */
73 			end = strchr(vrf, '\n');
74 			if (end)
75 				*end = '\0';
76 
77 			strlcpy(name, vrf, len);
78 			break;
79 		}
80 	}
81 
82 	fclose(fp);
83 
84 	return 0;
85 }
86 
ipvrf_identify(int argc,char ** argv)87 static int ipvrf_identify(int argc, char **argv)
88 {
89 	char vrf[32];
90 	int rc;
91 	unsigned int pid;
92 
93 	if (argc < 1)
94 		pid = getpid();
95 	else if (argc > 1)
96 		invarg("Extra arguments specified\n", argv[1]);
97 	else if (get_unsigned(&pid, argv[0], 10))
98 		invarg("Invalid pid\n", argv[0]);
99 
100 	rc = vrf_identify(pid, vrf, sizeof(vrf));
101 	if (!rc) {
102 		if (vrf[0] != '\0')
103 			printf("%s\n", vrf);
104 	} else {
105 		fprintf(stderr, "Failed to lookup vrf association: %s\n",
106 			strerror(errno));
107 	}
108 
109 	return rc;
110 }
111 
112 /* read PATH/vrf/NAME/cgroup.procs file */
read_cgroup_pids(const char * base_path,char * name)113 static void read_cgroup_pids(const char *base_path, char *name)
114 {
115 	char path[PATH_MAX];
116 	char buf[4096];
117 	FILE *fp;
118 
119 	if (snprintf(path, sizeof(path), "%s/vrf/%s%s",
120 		     base_path, name, CGRP_PROC_FILE) >= sizeof(path))
121 		return;
122 
123 	fp = fopen(path, "r");
124 	if (!fp)
125 		return; /* no cgroup file, nothing to show */
126 
127 	/* dump contents (pids) of cgroup.procs */
128 	while (fgets(buf, sizeof(buf), fp)) {
129 		char *nl, comm[32];
130 
131 		nl = strchr(buf, '\n');
132 		if (nl)
133 			*nl = '\0';
134 
135 		if (get_command_name(buf, comm, sizeof(comm)))
136 			strcpy(comm, "<terminated?>");
137 
138 		printf("%5s  %s\n", buf, comm);
139 	}
140 
141 	fclose(fp);
142 }
143 
144 /* recurse path looking for PATH[/NETNS]/vrf/NAME */
recurse_dir(char * base_path,char * name,const char * netns)145 static int recurse_dir(char *base_path, char *name, const char *netns)
146 {
147 	char path[PATH_MAX];
148 	struct dirent *de;
149 	struct stat fstat;
150 	int rc;
151 	DIR *d;
152 
153 	d = opendir(base_path);
154 	if (!d)
155 		return -1;
156 
157 	while ((de = readdir(d)) != NULL) {
158 		if (!strcmp(de->d_name, ".") || !strcmp(de->d_name, ".."))
159 			continue;
160 
161 		if (!strcmp(de->d_name, "vrf")) {
162 			const char *pdir = strrchr(base_path, '/');
163 
164 			/* found a 'vrf' directory. if it is for the given
165 			 * namespace then dump the cgroup pids
166 			 */
167 			if (*netns == '\0' ||
168 			    (pdir && !strcmp(pdir+1, netns)))
169 				read_cgroup_pids(base_path, name);
170 
171 			continue;
172 		}
173 
174 		/* is this a subdir that needs to be walked */
175 		if (snprintf(path, sizeof(path), "%s/%s",
176 			     base_path, de->d_name) >= sizeof(path))
177 			continue;
178 
179 		if (lstat(path, &fstat) < 0)
180 			continue;
181 
182 		if (S_ISDIR(fstat.st_mode)) {
183 			rc = recurse_dir(path, name, netns);
184 			if (rc != 0)
185 				goto out;
186 		}
187 	}
188 
189 	rc = 0;
190 out:
191 	closedir(d);
192 
193 	return rc;
194 }
195 
ipvrf_get_netns(char * netns,int len)196 static int ipvrf_get_netns(char *netns, int len)
197 {
198 	if (netns_identify_pid("self", netns, len-3)) {
199 		fprintf(stderr, "Failed to get name of network namespace: %s\n",
200 			strerror(errno));
201 		return -1;
202 	}
203 
204 	if (*netns != '\0')
205 		strcat(netns, "-ns");
206 
207 	return 0;
208 }
209 
ipvrf_pids(int argc,char ** argv)210 static int ipvrf_pids(int argc, char **argv)
211 {
212 	char *mnt, *vrf;
213 	char netns[256];
214 	int ret = -1;
215 
216 	if (argc != 1) {
217 		fprintf(stderr, "Invalid arguments\n");
218 		return -1;
219 	}
220 
221 	vrf = argv[0];
222 	if (!name_is_vrf(vrf)) {
223 		fprintf(stderr, "Invalid VRF name\n");
224 		return -1;
225 	}
226 
227 	mnt = find_cgroup2_mount();
228 	if (!mnt)
229 		return -1;
230 
231 	if (ipvrf_get_netns(netns, sizeof(netns)) < 0)
232 		goto out;
233 
234 	ret = recurse_dir(mnt, vrf, netns);
235 
236 out:
237 	free(mnt);
238 
239 	return ret;
240 }
241 
242 /* load BPF program to set sk_bound_dev_if for sockets */
243 static char bpf_log_buf[256*1024];
244 
prog_load(int idx)245 static int prog_load(int idx)
246 {
247 	struct bpf_insn prog[] = {
248 		BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
249 		BPF_MOV64_IMM(BPF_REG_3, idx),
250 		BPF_MOV64_IMM(BPF_REG_2,
251 			      offsetof(struct bpf_sock, bound_dev_if)),
252 		BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3,
253 			    offsetof(struct bpf_sock, bound_dev_if)),
254 		BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */
255 		BPF_EXIT_INSN(),
256 	};
257 
258 	return bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, prog, sizeof(prog),
259 			     "GPL", bpf_log_buf, sizeof(bpf_log_buf));
260 }
261 
vrf_configure_cgroup(const char * path,int ifindex)262 static int vrf_configure_cgroup(const char *path, int ifindex)
263 {
264 	int rc = -1, cg_fd, prog_fd = -1;
265 
266 	cg_fd = open(path, O_DIRECTORY | O_RDONLY);
267 	if (cg_fd < 0) {
268 		fprintf(stderr,
269 			"Failed to open cgroup path: '%s'\n",
270 			strerror(errno));
271 		goto out;
272 	}
273 
274 	/*
275 	 * Load bpf program into kernel and attach to cgroup to affect
276 	 * socket creates
277 	 */
278 	prog_fd = prog_load(ifindex);
279 	if (prog_fd < 0) {
280 		fprintf(stderr, "Failed to load BPF prog: '%s'\n",
281 			strerror(errno));
282 
283 		if (errno != EPERM) {
284 			fprintf(stderr,
285 				"Kernel compiled with CGROUP_BPF enabled?\n");
286 		}
287 		goto out;
288 	}
289 
290 	if (bpf_prog_attach_fd(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE)) {
291 		fprintf(stderr, "Failed to attach prog to cgroup: '%s'\n",
292 			strerror(errno));
293 		goto out;
294 	}
295 
296 	rc = 0;
297 out:
298 	close(cg_fd);
299 	close(prog_fd);
300 
301 	return rc;
302 }
303 
304 /* get base path for controller-less cgroup for a process.
305  * path returned does not include /vrf/NAME if it exists
306  */
vrf_path(char * vpath,size_t len)307 static int vrf_path(char *vpath, size_t len)
308 {
309 	char path[PATH_MAX];
310 	char buf[4096];
311 	char *vrf;
312 	FILE *fp;
313 
314 	snprintf(path, sizeof(path), "/proc/%d/cgroup", getpid());
315 	fp = fopen(path, "r");
316 	if (!fp)
317 		return -1;
318 
319 	vpath[0] = '\0';
320 
321 	while (fgets(buf, sizeof(buf), fp)) {
322 		char *start, *nl;
323 
324 		start = strstr(buf, "::/");
325 		if (!start)
326 			continue;
327 
328 		/* advance past '::' */
329 		start += 2;
330 
331 		nl = strchr(start, '\n');
332 		if (nl)
333 			*nl = '\0';
334 
335 		vrf = strstr(start, "/vrf");
336 		if (vrf)
337 			*vrf = '\0';
338 
339 		strlcpy(vpath, start, len);
340 
341 		/* if vrf path is just / then return nothing */
342 		if (!strcmp(vpath, "/"))
343 			vpath[0] = '\0';
344 
345 		break;
346 	}
347 
348 	fclose(fp);
349 
350 	return 0;
351 }
352 
vrf_switch(const char * name)353 static int vrf_switch(const char *name)
354 {
355 	char path[PATH_MAX], *mnt, pid[16];
356 	char vpath[PATH_MAX], netns[256];
357 	int ifindex = 0;
358 	int rc = -1, len, fd = -1;
359 
360 	if (strcmp(name, "default")) {
361 		ifindex = name_is_vrf(name);
362 		if (!ifindex) {
363 			fprintf(stderr, "Invalid VRF name\n");
364 			return -1;
365 		}
366 	}
367 
368 	mnt = find_cgroup2_mount();
369 	if (!mnt)
370 		return -1;
371 
372 	/* -1 on length to add '/' to the end */
373 	if (ipvrf_get_netns(netns, sizeof(netns) - 1) < 0)
374 		goto out;
375 
376 	if (vrf_path(vpath, sizeof(vpath)) < 0) {
377 		fprintf(stderr, "Failed to get base cgroup path: %s\n",
378 			strerror(errno));
379 		goto out;
380 	}
381 
382 	/* if path already ends in netns then don't add it again */
383 	if (*netns != '\0') {
384 		char *pdir = strrchr(vpath, '/');
385 
386 		if (!pdir)
387 			pdir = vpath;
388 		else
389 			pdir++;
390 
391 		if (strcmp(pdir, netns) == 0)
392 			*pdir = '\0';
393 
394 		strcat(netns, "/");
395 	}
396 
397 	/* path to cgroup; make sure buffer has room to cat "/cgroup.procs"
398 	 * to the end of the path
399 	 */
400 	len = snprintf(path, sizeof(path) - sizeof(CGRP_PROC_FILE),
401 		       "%s%s/%svrf/%s",
402 		       mnt, vpath, netns, ifindex ? name : "");
403 	if (len > sizeof(path) - sizeof(CGRP_PROC_FILE)) {
404 		fprintf(stderr, "Invalid path to cgroup2 mount\n");
405 		goto out;
406 	}
407 
408 	if (make_path(path, 0755)) {
409 		fprintf(stderr, "Failed to setup vrf cgroup2 directory\n");
410 		goto out;
411 	}
412 
413 	if (ifindex && vrf_configure_cgroup(path, ifindex))
414 		goto out;
415 
416 	/*
417 	 * write pid to cgroup.procs making process part of cgroup
418 	 */
419 	strcat(path, CGRP_PROC_FILE);
420 	fd = open(path, O_RDWR | O_APPEND);
421 	if (fd < 0) {
422 		fprintf(stderr, "Failed to open cgroups.procs file: %s.\n",
423 			strerror(errno));
424 		goto out;
425 	}
426 
427 	snprintf(pid, sizeof(pid), "%d", getpid());
428 	if (write(fd, pid, strlen(pid)) < 0) {
429 		fprintf(stderr, "Failed to join cgroup\n");
430 		goto out2;
431 	}
432 
433 	rc = 0;
434 out2:
435 	close(fd);
436 out:
437 	free(mnt);
438 
439 	return rc;
440 }
441 
ipvrf_exec(int argc,char ** argv)442 static int ipvrf_exec(int argc, char **argv)
443 {
444 	if (argc < 1) {
445 		fprintf(stderr, "No VRF name specified\n");
446 		return -1;
447 	}
448 	if (argc < 2) {
449 		fprintf(stderr, "No command specified\n");
450 		return -1;
451 	}
452 
453 	if (vrf_switch(argv[0]))
454 		return -1;
455 
456 	return -cmd_exec(argv[1], argv + 1, !!batch_mode);
457 }
458 
459 /* reset VRF association of current process to default VRF;
460  * used by netns_exec
461  */
vrf_reset(void)462 void vrf_reset(void)
463 {
464 	char vrf[32];
465 
466 	if (vrf_identify(getpid(), vrf, sizeof(vrf)) ||
467 	    (vrf[0] == '\0'))
468 		return;
469 
470 	vrf_switch("default");
471 }
472 
ipvrf_filter_req(struct nlmsghdr * nlh,int reqlen)473 static int ipvrf_filter_req(struct nlmsghdr *nlh, int reqlen)
474 {
475 	struct rtattr *linkinfo;
476 	int err;
477 
478 	if (vrf_filter.kind) {
479 		linkinfo = addattr_nest(nlh, reqlen, IFLA_LINKINFO);
480 
481 		err = addattr_l(nlh, reqlen, IFLA_INFO_KIND, vrf_filter.kind,
482 				strlen(vrf_filter.kind));
483 		if (err)
484 			return err;
485 
486 		addattr_nest_end(nlh, linkinfo);
487 	}
488 
489 	return 0;
490 }
491 
492 /* input arg is linkinfo */
vrf_table_linkinfo(struct rtattr * li[])493 static __u32 vrf_table_linkinfo(struct rtattr *li[])
494 {
495 	struct rtattr *attr[IFLA_VRF_MAX + 1];
496 
497 	if (li[IFLA_INFO_DATA]) {
498 		parse_rtattr_nested(attr, IFLA_VRF_MAX, li[IFLA_INFO_DATA]);
499 
500 		if (attr[IFLA_VRF_TABLE])
501 			return rta_getattr_u32(attr[IFLA_VRF_TABLE]);
502 	}
503 
504 	return 0;
505 }
506 
ipvrf_print(struct nlmsghdr * n)507 static int ipvrf_print(struct nlmsghdr *n)
508 {
509 	struct ifinfomsg *ifi = NLMSG_DATA(n);
510 	struct rtattr *tb[IFLA_MAX+1];
511 	struct rtattr *li[IFLA_INFO_MAX+1];
512 	int len = n->nlmsg_len;
513 	const char *name;
514 	__u32 tb_id;
515 
516 	len -= NLMSG_LENGTH(sizeof(*ifi));
517 	if (len < 0)
518 		return 0;
519 
520 	if (vrf_filter.ifindex && vrf_filter.ifindex != ifi->ifi_index)
521 		return 0;
522 
523 	parse_rtattr(tb, IFLA_MAX, IFLA_RTA(ifi), len);
524 
525 	/* kernel does not support filter by master device */
526 	if (tb[IFLA_MASTER]) {
527 		int master = *(int *)RTA_DATA(tb[IFLA_MASTER]);
528 
529 		if (vrf_filter.master && master != vrf_filter.master)
530 			return 0;
531 	}
532 
533 	if (!tb[IFLA_IFNAME]) {
534 		fprintf(stderr,
535 			"BUG: device with ifindex %d has nil ifname\n",
536 			ifi->ifi_index);
537 		return 0;
538 	}
539 	name = rta_getattr_str(tb[IFLA_IFNAME]);
540 
541 	/* missing LINKINFO means not VRF. e.g., kernel does not
542 	 * support filtering on kind, so userspace needs to handle
543 	 */
544 	if (!tb[IFLA_LINKINFO])
545 		return 0;
546 
547 	parse_rtattr_nested(li, IFLA_INFO_MAX, tb[IFLA_LINKINFO]);
548 
549 	if (!li[IFLA_INFO_KIND])
550 		return 0;
551 
552 	if (strcmp(RTA_DATA(li[IFLA_INFO_KIND]), "vrf"))
553 		return 0;
554 
555 	tb_id = vrf_table_linkinfo(li);
556 	if (!tb_id) {
557 		fprintf(stderr,
558 			"BUG: VRF %s is missing table id\n", name);
559 		return 0;
560 	}
561 
562 	printf("%-16s %5u", name, tb_id);
563 
564 	printf("\n");
565 	return 1;
566 }
567 
ipvrf_show(int argc,char ** argv)568 static int ipvrf_show(int argc, char **argv)
569 {
570 	struct nlmsg_chain linfo = { NULL, NULL};
571 	int rc = 0;
572 
573 	vrf_filter.kind = "vrf";
574 
575 	if (argc > 1)
576 		usage();
577 
578 	if (argc == 1) {
579 		__u32 tb_id;
580 
581 		tb_id = ipvrf_get_table(argv[0]);
582 		if (!tb_id) {
583 			fprintf(stderr, "Invalid VRF\n");
584 			return 1;
585 		}
586 		printf("%s %u\n", argv[0], tb_id);
587 		return 0;
588 	}
589 
590 	if (ip_linkaddr_list(0, ipvrf_filter_req, &linfo, NULL) == 0) {
591 		struct nlmsg_list *l;
592 		unsigned nvrf = 0;
593 		int n;
594 
595 		n = printf("%-16s  %5s\n", "Name", "Table");
596 		printf("%.*s\n", n-1, "-----------------------");
597 		for (l = linfo.head; l; l = l->next)
598 			nvrf += ipvrf_print(&l->h);
599 
600 		if (!nvrf)
601 			printf("No VRF has been configured\n");
602 	} else
603 		rc = 1;
604 
605 	free_nlmsg_chain(&linfo);
606 
607 	return rc;
608 }
609 
do_ipvrf(int argc,char ** argv)610 int do_ipvrf(int argc, char **argv)
611 {
612 	if (argc == 0)
613 		return ipvrf_show(0, NULL);
614 
615 	if (matches(*argv, "identify") == 0)
616 		return ipvrf_identify(argc-1, argv+1);
617 
618 	if (matches(*argv, "pids") == 0)
619 		return ipvrf_pids(argc-1, argv+1);
620 
621 	if (matches(*argv, "exec") == 0)
622 		return ipvrf_exec(argc-1, argv+1);
623 
624 	if (matches(*argv, "show") == 0 ||
625 	    matches(*argv, "lst") == 0 ||
626 	    matches(*argv, "list") == 0)
627 		return ipvrf_show(argc-1, argv+1);
628 
629 	if (matches(*argv, "help") == 0)
630 		usage();
631 
632 	fprintf(stderr, "Command \"%s\" is unknown, try \"ip vrf help\".\n",
633 		*argv);
634 
635 	exit(-1);
636 }
637