1 /* Copyright 2017 The Chromium OS Authors. All rights reserved.
2  * Use of this source code is governed by a BSD-style license that can be
3  * found in the LICENSE file.
4  */
5 
6 #include "system.h"
7 
8 #include <errno.h>
9 #include <fcntl.h>
10 #include <grp.h>
11 #include <net/if.h>
12 #include <pwd.h>
13 #include <stdbool.h>
14 #include <stdio.h>
15 #include <string.h>
16 #include <sys/ioctl.h>
17 #include <sys/prctl.h>
18 #include <sys/socket.h>
19 #include <sys/stat.h>
20 #include <sys/statvfs.h>
21 #include <unistd.h>
22 
23 #include <linux/securebits.h>
24 
25 #include "util.h"
26 
27 /*
28  * SECBIT_NO_CAP_AMBIENT_RAISE was added in kernel 4.3, so fill in the
29  * definition if the securebits header doesn't provide it.
30  */
31 #ifndef SECBIT_NO_CAP_AMBIENT_RAISE
32 #define SECBIT_NO_CAP_AMBIENT_RAISE (issecure_mask(6))
33 #endif
34 
35 #ifndef SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED
36 #define SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED (issecure_mask(7))
37 #endif
38 
39 /*
40  * Assert the value of SECURE_ALL_BITS at compile-time.
41  * Android devices are currently compiled against 4.4 kernel headers. Kernel 4.3
42  * added a new securebit.
43  * When a new securebit is added, the new SECURE_ALL_BITS mask will return EPERM
44  * when used on older kernels. The compile-time assert will catch this situation
45  * at compile time.
46  */
47 #if defined(__ANDROID__)
48 _Static_assert(SECURE_ALL_BITS == 0x55, "SECURE_ALL_BITS == 0x55.");
49 #endif
50 
secure_noroot_set_and_locked(uint64_t mask)51 int secure_noroot_set_and_locked(uint64_t mask)
52 {
53 	return (mask & (SECBIT_NOROOT | SECBIT_NOROOT_LOCKED)) ==
54 	       (SECBIT_NOROOT | SECBIT_NOROOT_LOCKED);
55 }
56 
lock_securebits(uint64_t skip_mask,bool require_keep_caps)57 int lock_securebits(uint64_t skip_mask, bool require_keep_caps)
58 {
59 	/* The general idea is to set all bits, subject to exceptions below. */
60 	unsigned long securebits = SECURE_ALL_BITS | SECURE_ALL_LOCKS;
61 
62 	/*
63 	 * SECBIT_KEEP_CAPS is special in that it is automatically cleared on
64 	 * execve(2). This implies that attempts to set SECBIT_KEEP_CAPS (as is
65 	 * the default) in processes that have it locked already (such as nested
66 	 * minijail usage) would fail. Thus, unless the caller requires it,
67 	 * allow it to remain off if it is already locked.
68 	 */
69 	if (!require_keep_caps) {
70 		int current_securebits = prctl(PR_GET_SECUREBITS);
71 		if (current_securebits < 0) {
72 			pwarn("prctl(PR_GET_SECUREBITS) failed");
73 			return -1;
74 		}
75 
76 		if ((current_securebits & SECBIT_KEEP_CAPS_LOCKED) != 0 &&
77 		    (current_securebits & SECBIT_KEEP_CAPS) == 0) {
78 			securebits &= ~SECBIT_KEEP_CAPS;
79 		}
80 	}
81 
82 	/*
83 	 * Ambient capabilities can only be raised if they're already present
84 	 * in the permitted *and* inheritable set. Therefore, we don't really
85 	 * need to lock the NO_CAP_AMBIENT_RAISE securebit, since we are already
86 	 * configuring the permitted and inheritable set.
87 	 */
88 	securebits &=
89 	    ~(SECBIT_NO_CAP_AMBIENT_RAISE | SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED);
90 
91 	/* Don't set any bits that the user requested not to be touched. */
92 	securebits &= ~skip_mask;
93 
94 	if (!securebits) {
95 		warn("not locking any securebits");
96 		return 0;
97 	}
98 	int securebits_ret = prctl(PR_SET_SECUREBITS, securebits);
99 	if (securebits_ret < 0) {
100 		pwarn("prctl(PR_SET_SECUREBITS) failed");
101 		return -1;
102 	}
103 
104 	return 0;
105 }
106 
write_proc_file(pid_t pid,const char * content,const char * basename)107 int write_proc_file(pid_t pid, const char *content, const char *basename)
108 {
109 	int fd, ret;
110 	size_t sz, len;
111 	ssize_t written;
112 	char filename[32];
113 
114 	sz = sizeof(filename);
115 	ret = snprintf(filename, sz, "/proc/%d/%s", pid, basename);
116 	if (ret < 0 || (size_t)ret >= sz) {
117 		warn("failed to generate %s filename", basename);
118 		return -1;
119 	}
120 
121 	fd = open(filename, O_WRONLY | O_CLOEXEC);
122 	if (fd < 0) {
123 		pwarn("failed to open '%s'", filename);
124 		return -errno;
125 	}
126 
127 	len = strlen(content);
128 	written = write(fd, content, len);
129 	if (written < 0) {
130 		pwarn("failed to write '%s'", filename);
131 		return -errno;
132 	}
133 
134 	if ((size_t)written < len) {
135 		warn("failed to write %zu bytes to '%s'", len, filename);
136 		return -1;
137 	}
138 	close(fd);
139 	return 0;
140 }
141 
142 /*
143  * We specifically do not use cap_valid() as that only tells us the last
144  * valid cap we were *compiled* against (i.e. what the version of kernel
145  * headers says). If we run on a different kernel version, then it's not
146  * uncommon for that to be less (if an older kernel) or more (if a newer
147  * kernel).
148  * Normally, we suck up the answer via /proc. On Android, not all processes are
149  * guaranteed to be able to access '/proc/sys/kernel/cap_last_cap' so we
150  * programmatically find the value by calling prctl(PR_CAPBSET_READ).
151  */
get_last_valid_cap(void)152 unsigned int get_last_valid_cap(void)
153 {
154 	unsigned int last_valid_cap = 0;
155 	if (is_android()) {
156 		for (; prctl(PR_CAPBSET_READ, last_valid_cap, 0, 0, 0) >= 0;
157 		     ++last_valid_cap)
158 			;
159 
160 		/* |last_valid_cap| will be the first failing value. */
161 		if (last_valid_cap > 0) {
162 			last_valid_cap--;
163 		}
164 	} else {
165 		const char cap_file[] = "/proc/sys/kernel/cap_last_cap";
166 		FILE *fp = fopen(cap_file, "re");
167 		if (fscanf(fp, "%u", &last_valid_cap) != 1)
168 			pdie("fscanf(%s)", cap_file);
169 		fclose(fp);
170 	}
171 	return last_valid_cap;
172 }
173 
cap_ambient_supported(void)174 int cap_ambient_supported(void)
175 {
176 	return prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_CHOWN, 0, 0) >=
177 	       0;
178 }
179 
config_net_loopback(void)180 int config_net_loopback(void)
181 {
182 	const char ifname[] = "lo";
183 	int sock;
184 	struct ifreq ifr;
185 
186 	/* Make sure people don't try to add really long names. */
187 	_Static_assert(sizeof(ifname) <= IFNAMSIZ, "interface name too long");
188 
189 	sock = socket(AF_LOCAL, SOCK_DGRAM | SOCK_CLOEXEC, 0);
190 	if (sock < 0) {
191 		pwarn("socket(AF_LOCAL) failed");
192 		return -1;
193 	}
194 
195 	/*
196 	 * Do the equiv of `ip link set up lo`.  The kernel will assign
197 	 * IPv4 (127.0.0.1) & IPv6 (::1) addresses automatically!
198 	 */
199 	strcpy(ifr.ifr_name, ifname);
200 	if (ioctl(sock, SIOCGIFFLAGS, &ifr) < 0) {
201 		pwarn("ioctl(SIOCGIFFLAGS) failed");
202 		return -1;
203 	}
204 
205 	/* The kernel preserves ifr.ifr_name for use. */
206 	ifr.ifr_flags |= IFF_UP | IFF_RUNNING;
207 	if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0) {
208 		pwarn("ioctl(SIOCSIFFLAGS) failed");
209 		return -1;
210 	}
211 
212 	close(sock);
213 	return 0;
214 }
215 
setup_pipe_end(int fds[2],size_t index)216 int setup_pipe_end(int fds[2], size_t index)
217 {
218 	if (index > 1)
219 		return -1;
220 
221 	close(fds[1 - index]);
222 	return fds[index];
223 }
224 
setup_and_dupe_pipe_end(int fds[2],size_t index,int fd)225 int setup_and_dupe_pipe_end(int fds[2], size_t index, int fd)
226 {
227 	if (index > 1)
228 		return -1;
229 
230 	close(fds[1 - index]);
231 	/* dup2(2) the corresponding end of the pipe into |fd|. */
232 	return dup2(fds[index], fd);
233 }
234 
write_pid_to_path(pid_t pid,const char * path)235 int write_pid_to_path(pid_t pid, const char *path)
236 {
237 	FILE *fp = fopen(path, "we");
238 
239 	if (!fp) {
240 		pwarn("failed to open '%s'", path);
241 		return -errno;
242 	}
243 	if (fprintf(fp, "%d\n", (int)pid) < 0) {
244 		/* fprintf(3) does not set errno on failure. */
245 		warn("fprintf(%s) failed", path);
246 		return -1;
247 	}
248 	if (fclose(fp)) {
249 		pwarn("fclose(%s) failed", path);
250 		return -errno;
251 	}
252 
253 	return 0;
254 }
255 
256 /*
257  * Create the |path| directory and its parents (if need be) with |mode|.
258  * If not |isdir|, then |path| is actually a file, so the last component
259  * will not be created.
260  */
mkdir_p(const char * path,mode_t mode,bool isdir)261 int mkdir_p(const char *path, mode_t mode, bool isdir)
262 {
263 	int rc;
264 	char *dir = strdup(path);
265 	if (!dir) {
266 		rc = errno;
267 		pwarn("strdup(%s) failed", path);
268 		return -rc;
269 	}
270 
271 	/* Starting from the root, work our way out to the end. */
272 	char *p = strchr(dir + 1, '/');
273 	while (p) {
274 		*p = '\0';
275 		if (mkdir(dir, mode) && errno != EEXIST) {
276 			rc = errno;
277 			pwarn("mkdir(%s, 0%o) failed", dir, mode);
278 			free(dir);
279 			return -rc;
280 		}
281 		*p = '/';
282 		p = strchr(p + 1, '/');
283 	}
284 
285 	/*
286 	 * Create the last directory.  We still check EEXIST here in case
287 	 * of trailing slashes.
288 	 */
289 	free(dir);
290 	if (isdir && mkdir(path, mode) && errno != EEXIST) {
291 		rc = errno;
292 		pwarn("mkdir(%s, 0%o) failed", path, mode);
293 		return -rc;
294 	}
295 	return 0;
296 }
297 
298 /*
299  * setup_mount_destination: Ensures the mount target exists.
300  * Creates it if needed and possible.
301  */
setup_mount_destination(const char * source,const char * dest,uid_t uid,uid_t gid,bool bind,unsigned long * mnt_flags)302 int setup_mount_destination(const char *source, const char *dest, uid_t uid,
303 			    uid_t gid, bool bind, unsigned long *mnt_flags)
304 {
305 	int rc;
306 	struct stat st_buf;
307 	bool domkdir;
308 
309 	rc = stat(dest, &st_buf);
310 	if (rc == 0) /* destination exists */
311 		return 0;
312 
313 	/*
314 	 * Try to create the destination.
315 	 * Either make a directory or touch a file depending on the source type.
316 	 *
317 	 * If the source isn't an absolute path, assume it is a filesystem type
318 	 * such as "tmpfs" and create a directory to mount it on.  The dest will
319 	 * be something like "none" or "proc" which we shouldn't be checking.
320 	 */
321 	if (source[0] == '/') {
322 		/* The source is an absolute path -- it better exist! */
323 		rc = stat(source, &st_buf);
324 		if (rc) {
325 			rc = errno;
326 			pwarn("stat(%s) failed", source);
327 			return -rc;
328 		}
329 
330 		/*
331 		 * If bind mounting, we only create a directory if the source
332 		 * is a directory, else we always bind mount it as a file to
333 		 * support device nodes, sockets, etc...
334 		 *
335 		 * For all other mounts, we assume a block/char source is
336 		 * going to want a directory to mount to.  If the source is
337 		 * something else (e.g. a fifo or socket), this probably will
338 		 * not do the right thing, but we'll fail later on when we try
339 		 * to mount(), so shouldn't be a big deal.
340 		 */
341 		domkdir = S_ISDIR(st_buf.st_mode) ||
342 			  (!bind && (S_ISBLK(st_buf.st_mode) ||
343 				     S_ISCHR(st_buf.st_mode)));
344 
345 		/* If bind mounting, also grab the mount flags of the source. */
346 		if (bind && mnt_flags) {
347 			struct statvfs stvfs_buf;
348 			rc = statvfs(source, &stvfs_buf);
349 			if (rc) {
350 				rc = errno;
351 				pwarn(
352 				    "failed to look up mount flags: source=%s",
353 				    source);
354 				return -rc;
355 			}
356 			*mnt_flags = stvfs_buf.f_flag;
357 		}
358 	} else {
359 		/* The source is a relative path -- assume it's a pseudo fs. */
360 
361 		/* Disallow relative bind mounts. */
362 		if (bind) {
363 			warn("relative bind-mounts are not allowed: source=%s",
364 			     source);
365 			return -EINVAL;
366 		}
367 
368 		domkdir = true;
369 	}
370 
371 	/*
372 	 * Now that we know what we want to do, do it!
373 	 * We always create the intermediate dirs and the final path with 0755
374 	 * perms and root/root ownership.  This shouldn't be a problem because
375 	 * the actual mount will set those perms/ownership on the mount point
376 	 * which is all people should need to access it.
377 	 */
378 	rc = mkdir_p(dest, 0755, domkdir);
379 	if (rc)
380 		return rc;
381 	if (!domkdir) {
382 		int fd = open(dest, O_RDWR | O_CREAT | O_CLOEXEC, 0700);
383 		if (fd < 0) {
384 			rc = errno;
385 			pwarn("open(%s) failed", dest);
386 			return -rc;
387 		}
388 		close(fd);
389 	}
390 	if (chown(dest, uid, gid)) {
391 		rc = errno;
392 		pwarn("chown(%s, %u, %u) failed", dest, uid, gid);
393 		return -rc;
394 	}
395 	return 0;
396 }
397 
398 /*
399  * lookup_user: Gets the uid/gid for the given username.
400  */
lookup_user(const char * user,uid_t * uid,gid_t * gid)401 int lookup_user(const char *user, uid_t *uid, gid_t *gid)
402 {
403 	char *buf = NULL;
404 	struct passwd pw;
405 	struct passwd *ppw = NULL;
406 	ssize_t sz = sysconf(_SC_GETPW_R_SIZE_MAX);
407 	if (sz == -1)
408 		sz = 65536; /* your guess is as good as mine... */
409 
410 	/*
411 	 * sysconf(_SC_GETPW_R_SIZE_MAX), under glibc, is documented to return
412 	 * the maximum needed size of the buffer, so we don't have to search.
413 	 */
414 	buf = malloc(sz);
415 	if (!buf)
416 		return -ENOMEM;
417 	getpwnam_r(user, &pw, buf, sz, &ppw);
418 	/*
419 	 * We're safe to free the buffer here. The strings inside |pw| point
420 	 * inside |buf|, but we don't use any of them; this leaves the pointers
421 	 * dangling but it's safe. |ppw| points at |pw| if getpwnam_r(3)
422 	 * succeeded.
423 	 */
424 	free(buf);
425 	/* getpwnam_r(3) does *not* set errno when |ppw| is NULL. */
426 	if (!ppw)
427 		return -1;
428 
429 	*uid = ppw->pw_uid;
430 	*gid = ppw->pw_gid;
431 	return 0;
432 }
433 
434 /*
435  * lookup_group: Gets the gid for the given group name.
436  */
lookup_group(const char * group,gid_t * gid)437 int lookup_group(const char *group, gid_t *gid)
438 {
439 	char *buf = NULL;
440 	struct group gr;
441 	struct group *pgr = NULL;
442 	ssize_t sz = sysconf(_SC_GETGR_R_SIZE_MAX);
443 	if (sz == -1)
444 		sz = 65536; /* and mine is as good as yours, really */
445 
446 	/*
447 	 * sysconf(_SC_GETGR_R_SIZE_MAX), under glibc, is documented to return
448 	 * the maximum needed size of the buffer, so we don't have to search.
449 	 */
450 	buf = malloc(sz);
451 	if (!buf)
452 		return -ENOMEM;
453 	getgrnam_r(group, &gr, buf, sz, &pgr);
454 	/*
455 	 * We're safe to free the buffer here. The strings inside gr point
456 	 * inside buf, but we don't use any of them; this leaves the pointers
457 	 * dangling but it's safe. pgr points at gr if getgrnam_r succeeded.
458 	 */
459 	free(buf);
460 	/* getgrnam_r(3) does *not* set errno when |pgr| is NULL. */
461 	if (!pgr)
462 		return -1;
463 
464 	*gid = pgr->gr_gid;
465 	return 0;
466 }
467