1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "./execute.h"
18 
19 #include <linux/securebits.h>
20 #include <linux/uio.h>
21 #include <seccomp_policy.h>
22 #include <sys/capability.h>
23 #include <sys/personality.h>
24 #include <sys/prctl.h>
25 #include <sys/ptrace.h>
26 #include <sys/wait.h>
27 #include <unistd.h>
28 
29 #include <iostream>
30 #include <memory>
31 
32 #include "./elf-utils.h"
33 #include "./registers.h"
34 #include "./shell-code.h"
35 
36 namespace shell_as {
37 
38 namespace {
39 
40 // Capabilities are implemented as a 64-bit bit-vector. Therefore the maximum
41 // number of capabilities supported by a kernel is 64.
42 constexpr cap_value_t kMaxCapabilities = 64;
43 
DropPreExecPrivileges(const shell_as::SecurityContext * context)44 bool DropPreExecPrivileges(const shell_as::SecurityContext* context) {
45   // The ordering here is important:
46   //   (1) The platform's seccomp filters disallow setresgiud, so it must come
47   //       before the seccomp drop.
48   //   (2) Adding seccomp filters must happen before setresuid because setresuid
49   //       drops some capabilities which are required for seccomp.
50   if (context->group_id.has_value() &&
51       setresgid(context->group_id.value(), context->group_id.value(),
52                 context->group_id.value()) != 0) {
53     std::cerr << "Unable to set group id: " << context->group_id.value()
54               << std::endl;
55     return false;
56   }
57   if (context->supplementary_group_ids.has_value() &&
58       setgroups(context->supplementary_group_ids.value().size(),
59                 context->supplementary_group_ids.value().data()) != 0) {
60     std::cerr << "Unable to set supplementary groups." << std::endl;
61     return false;
62   }
63 
64   if (context->seccomp_filter.has_value()) {
65     switch (context->seccomp_filter.value()) {
66       case shell_as::kAppFilter:
67         set_app_seccomp_filter();
68         break;
69       case shell_as::kAppZygoteFilter:
70         set_app_zygote_seccomp_filter();
71         break;
72       case shell_as::kSystemFilter:
73         set_system_seccomp_filter();
74         break;
75     }
76   }
77 
78   // This must be set prior to setresuid, otherwise that call will drop the
79   // permitted set of capabilities.
80   if (prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0) != 0) {
81     std::cerr << "Unable to set keep capabilities." << std::endl;
82     return false;
83   }
84 
85   if (context->user_id.has_value() &&
86       setresuid(context->user_id.value(), context->user_id.value(),
87                 context->user_id.value()) != 0) {
88     std::cerr << "Unable to set user id: " << context->user_id.value()
89               << std::endl;
90     return false;
91   }
92 
93   // Capabilities must be reacquired after setresuid since it still modifies
94   // capabilities, but it leaves the permitted set intact.
95   if (context->capabilities.has_value()) {
96     // The first step is to raise all the capabilities possible in all sets
97     // including the inheritable set. This defines the superset of possible
98     // capabilities that can be passed on after calling execve.
99     //
100     // The reason that all capabilities are raised in the inheritable set is due
101     // to a limitation of libcap. libcap may not contain a capability definition
102     // for all capabilities supported by the kernel. If this occurs, it will
103     // silently ignore requests to raise unknown capabilities via cap_set_flag.
104     //
105     // However, when parsing a cap_t from a text value, libcap will treat "all"
106     // as all possible 64 capability bits as set.
107     cap_t all_capabilities = cap_from_text("all+pie");
108     if (cap_set_proc(all_capabilities) != 0) {
109       std::cerr << "Unable to raise inheritable capability set." << std::endl;
110       cap_free(all_capabilities);
111       return false;
112     }
113     cap_free(all_capabilities);
114 
115     // The second step is to raise the /desired/ capability subset in the
116     // ambient capability set. These are the capabilities that will actually be
117     // passed to the process after execve.
118     if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL, 0, 0, 0) != 0) {
119       std::cerr << "Unable to clear ambient capabilities." << std::endl;
120       return false;
121     }
122     cap_t desired_capabilities = context->capabilities.value();
123     for (cap_value_t cap = 0; cap < kMaxCapabilities; cap++) {
124       // Skip capability values not supported by the kernel.
125       if (!CAP_IS_SUPPORTED(cap)) {
126         continue;
127       }
128       cap_flag_value_t value = CAP_CLEAR;
129       if (cap_get_flag(desired_capabilities, cap, CAP_PERMITTED, &value) == 0 &&
130           value == CAP_SET) {
131         if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, cap, 0, 0) != 0) {
132           std::cerr << "Unable to raise capability " << cap
133                     << " in the ambient set." << std::endl;
134           return false;
135         }
136       }
137     }
138 
139     // The final step is to raise the SECBIT_NOROOT flag. The kernel has special
140     // case logic that treats root calling execve differently than other users.
141     //
142     // By default all bits in the permitted set prior to calling execve will be
143     // raised after calling execve. This would ignore the work above and result
144     // in the process to have all capabilities.
145     //
146     // Setting the SECBIT_NOROOT disables this special casing for root and
147     // causes the kernel to treat it as any other UID.
148     int64_t secure_bits = prctl(PR_GET_SECUREBITS, 0, 0, 0, 0);
149     if (secure_bits < 0 ||
150         prctl(PR_SET_SECUREBITS, secure_bits | SECBIT_NOROOT, 0, 0, 0) != 0) {
151       std::cerr << "Unable to raise SECBIT_NOROOT." << std::endl;
152       return false;
153     }
154   }
155   return true;
156 }
157 
ReadChildByte(const pid_t process,const uintptr_t address)158 uint8_t ReadChildByte(const pid_t process, const uintptr_t address) {
159   uintptr_t data = ptrace(PTRACE_PEEKDATA, process, address, nullptr);
160   return ((uint8_t*)&data)[0];
161 }
162 
WriteChildByte(const pid_t process,const uintptr_t address,const uint8_t value)163 void WriteChildByte(const pid_t process, const uintptr_t address,
164                     const uint8_t value) {
165   // This is not the most efficient way to write data to a process. However, it
166   // reduces code complexity of handling different word sizes and reading and
167   // writing memory that is not a multiple of the native word size.
168   uintptr_t data = ptrace(PTRACE_PEEKDATA, process, address, nullptr);
169   ((uint8_t*)&data)[0] = value;
170   ptrace(PTRACE_POKEDATA, process, address, data);
171 }
172 
ReadChildMemory(const pid_t process,uintptr_t process_address,uint8_t * bytes,size_t byte_count)173 void ReadChildMemory(const pid_t process, uintptr_t process_address,
174                      uint8_t* bytes, size_t byte_count) {
175   for (; byte_count != 0; byte_count--, bytes++, process_address++) {
176     *bytes = ReadChildByte(process, process_address);
177   }
178 }
179 
WriteChildMemory(const pid_t process,uintptr_t process_address,uint8_t const * bytes,size_t byte_count)180 void WriteChildMemory(const pid_t process, uintptr_t process_address,
181                       uint8_t const* bytes, size_t byte_count) {
182   for (; byte_count != 0; byte_count--, bytes++, process_address++) {
183     WriteChildByte(process, process_address, *bytes);
184   }
185 }
186 
187 // Executes shell code in a target process.
188 //
189 // The following assumptions are made:
190 //  * The process is currently being ptraced and that the process has already
191 //    stopped.
192 //  * The shell code will raise SIGSTOP when it has finished as signal that
193 //    control flow should be handed back to the original code.
194 //  * The shell code only alters registers and pushes values onto the stack.
195 //
196 // Execution is performed by overwriting the memory under the current
197 // instruction pointer with the shell code. After the shell code signals
198 // completion the original register state and memory are restored.
199 //
200 // If the above assumptions are met, then this function will leave the process
201 // in a stopped state that is equivalent to the original state.
ExecuteShellCode(const pid_t process,const uint8_t * shell_code,const size_t shell_code_size)202 bool ExecuteShellCode(const pid_t process, const uint8_t* shell_code,
203                       const size_t shell_code_size) {
204   REGISTER_STRUCT registers;
205   struct iovec registers_iovec;
206   registers_iovec.iov_base = &registers;
207   registers_iovec.iov_len = sizeof(REGISTER_STRUCT);
208   ptrace(PTRACE_GETREGSET, process, 1, &registers_iovec);
209 
210   std::unique_ptr<uint8_t[]> memory_backup(new uint8_t[shell_code_size]);
211   ReadChildMemory(process, PROGRAM_COUNTER(registers), memory_backup.get(),
212                   shell_code_size);
213   WriteChildMemory(process, PROGRAM_COUNTER(registers), shell_code,
214                    shell_code_size);
215 
216   // Execute the shell code and wait for the signal that it has finished.
217   ptrace(PTRACE_CONT, process, NULL, NULL);
218   int status;
219   waitpid(process, &status, 0);
220   if (status >> 8 != SIGSTOP) {
221     std::cerr << "Failed to execute SELinux shellcode." << std::endl;
222     return false;
223   }
224 
225   ptrace(PTRACE_SETREGSET, process, 1, &registers_iovec);
226   WriteChildMemory(process, PROGRAM_COUNTER(registers), memory_backup.get(),
227                    shell_code_size);
228   return true;
229 }
230 
SetProgramCounter(const pid_t process_id,uint64_t program_counter)231 bool SetProgramCounter(const pid_t process_id, uint64_t program_counter) {
232   REGISTER_STRUCT registers;
233   struct iovec registers_iovec;
234   registers_iovec.iov_base = &registers;
235   registers_iovec.iov_len = sizeof(REGISTER_STRUCT);
236   if (ptrace(PTRACE_GETREGSET, process_id, 1, &registers_iovec) != 0) {
237     return false;
238   }
239   PROGRAM_COUNTER(registers) = program_counter;
240   if ((ptrace(PTRACE_SETREGSET, process_id, 1, &registers_iovec)) != 0) {
241     return false;
242   }
243   return true;
244 }
245 
StepToEntryPoint(const pid_t process_id)246 bool StepToEntryPoint(const pid_t process_id) {
247   bool is_arm_mode;
248   uint64_t entry_address;
249   if (!GetElfEntryPoint(process_id, &entry_address, &is_arm_mode)) {
250     std::cerr << "Not able to determine Elf entry point." << std::endl;
251     return false;
252   }
253   if (is_arm_mode) {
254     // TODO(willcoster): If there is a need to handle ARM mode instructions in
255     // addition to thumb instructions update this with ARM mode shell code.
256     std::cerr << "Attempting to run an ARM-mode binary. "
257               << "shell-as currently only supports thumb-mode. "
258               << "Bug willcoster@ if you run into this error." << std::endl;
259     return false;
260   }
261 
262   int expected_signal = 0;
263   size_t trap_code_size = 0;
264   std::unique_ptr<uint8_t[]> trap_code =
265       GetTrapShellCode(&expected_signal, &trap_code_size);
266   std::unique_ptr<uint8_t[]> backup(new uint8_t[trap_code_size]);
267 
268   // Set a break point at the entry point declared by the Elf file. When a
269   // statically linked binary is executed this will be the first instruction
270   // executed.
271   //
272   // When a dynamically linked binary is executed, the dynamic linker is
273   // executed first. This brings .so files into memory and resolves shared
274   // symbols. Once this process is finished, it jumps to the entry point
275   // declared in the Elf file.
276   ReadChildMemory(process_id, entry_address, backup.get(), trap_code_size);
277   WriteChildMemory(process_id, entry_address, trap_code.get(), trap_code_size);
278   ptrace(PTRACE_CONT, process_id, NULL, NULL);
279   int status;
280   waitpid(process_id, &status, 0);
281   if (status >> 8 != expected_signal) {
282     std::cerr << "Program exited unexpectedly while stepping to entry point."
283               << std::endl;
284     std::cerr << "Expected status " << expected_signal << " but encountered "
285               << (status >> 8) << std::endl;
286     return false;
287   }
288 
289   if (!SetProgramCounter(process_id, entry_address)) {
290     return false;
291   }
292   WriteChildMemory(process_id, entry_address, backup.get(), trap_code_size);
293   return true;
294 }
295 
296 }  // namespace
297 
ExecuteInContext(char * const executable_and_args[],const shell_as::SecurityContext * context)298 bool ExecuteInContext(char* const executable_and_args[],
299                       const shell_as::SecurityContext* context) {
300   // Getting an executable running in a lower privileged context is tricky with
301   // SELinux. The recommended approach in the documentation is to use setexeccon
302   // which sets the context on the next execve call.
303   //
304   // However, this doesn't work for unprivileged processes like untrusted apps
305   // in Android because they are not allowed to execute most binaries.
306   //
307   // To work around this, ptrace is used to inject shell code into the new
308   // process just after it has executed an execve syscall. This shell code then
309   // sets the desired SELinux context.
310   pid_t child = fork();
311   if (child == 0) {
312     // Disabling ASLR makes it easier to determine the entry point of the target
313     // executable.
314     personality(ADDR_NO_RANDOMIZE);
315 
316     // Drop the privileges that can be dropped before executing the new binary
317     // and exit early if there is an issue.
318     if (!DropPreExecPrivileges(context)) {
319       exit(1);
320     }
321 
322     ptrace(PTRACE_TRACEME, 0, NULL, NULL);
323     raise(SIGSTOP);  // Wait for the parent process to attach.
324     execv(executable_and_args[0], executable_and_args);
325   } else {
326     // Wait for the child to reach the SIGSTOP line above.
327     int status;
328     waitpid(child, &status, 0);
329     if ((status >> 8) != SIGSTOP) {
330       // If the first status is not SIGSTOP, then the child aborted early
331       // because it was not able to set the user and group IDs.
332       return false;
333     }
334 
335     // Break inside the child's execv call.
336     ptrace(PTRACE_SETOPTIONS, child, NULL,
337            PTRACE_O_TRACEEXEC | PTRACE_O_EXITKILL);
338     ptrace(PTRACE_CONT, child, NULL, NULL);
339     waitpid(child, &status, 0);
340     if (status >> 8 != (SIGTRAP | PTRACE_EVENT_EXEC << 8)) {
341       std::cerr << "Failed to execute " << executable_and_args[0] << std::endl;
342       return false;
343     }
344 
345     // Allow the dynamic linker to run before dropping to a lower SELinux
346     // context. This is required for executing in some very constrained domains
347     // like mediacodec.
348     //
349     // If the context was dropped before the dynamic linker runs, then when the
350     // linker attempts to read /proc/self/exe to determine dynamic symbol
351     // information, SELinux will kill the binary if the domain is not allowed to
352     // read the binary's executable file.
353     //
354     // This happens for example, when attempting to run any toybox binary (id,
355     // sh, etc) as mediacodec.
356     if (!StepToEntryPoint(child)) {
357       std::cerr << "Something bad happened stepping to the entry point."
358                 << std::endl;
359       return false;
360     }
361 
362     // Run the SELinux shellcode in the child process before the child can
363     // execute any instructions in the newly loaded executable.
364     if (context->selinux_context.has_value()) {
365       size_t shell_code_size;
366       std::unique_ptr<uint8_t[]> shell_code = GetSELinuxShellCode(
367           context->selinux_context.value(), &shell_code_size);
368       bool success = ExecuteShellCode(child, shell_code.get(), shell_code_size);
369       if (!success) {
370         return false;
371       }
372     }
373 
374     // Resume and detach from the child now that the SELinux context has been
375     // updated.
376     ptrace(PTRACE_DETACH, child, NULL, NULL);
377     waitpid(child, nullptr, 0);
378   }
379   return true;
380 }
381 
382 }  // namespace shell_as
383