1 /*
2 * Copyright 2016, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <fcntl.h>
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <sys/stat.h>
21 #include <sys/types.h>
22 #include <unistd.h>
23
24 #include <array>
25 #include <deque>
26 #include <string>
27 #include <unordered_map>
28 #include <utility>
29
30 #include <event2/event.h>
31 #include <event2/listener.h>
32 #include <event2/thread.h>
33
34 #include <android-base/logging.h>
35 #include <android-base/properties.h>
36 #include <android-base/stringprintf.h>
37 #include <android-base/unique_fd.h>
38 #include <cutils/sockets.h>
39
40 #include "debuggerd/handler.h"
41 #include "dump_type.h"
42 #include "protocol.h"
43 #include "util.h"
44
45 #include "intercept_manager.h"
46
47 using android::base::GetIntProperty;
48 using android::base::StringPrintf;
49 using android::base::unique_fd;
50
51 static InterceptManager* intercept_manager;
52
53 enum CrashStatus {
54 kCrashStatusRunning,
55 kCrashStatusQueued,
56 };
57
58 // Ownership of Crash is a bit messy.
59 // It's either owned by an active event that must have a timeout, or owned by
60 // queued_requests, in the case that multiple crashes come in at the same time.
61 struct Crash {
~CrashCrash62 ~Crash() { event_free(crash_event); }
63
64 std::string crash_tombstone_path;
65 unique_fd crash_tombstone_fd;
66 unique_fd crash_socket_fd;
67 pid_t crash_pid;
68 event* crash_event = nullptr;
69
70 DebuggerdDumpType crash_type;
71 };
72
73 class CrashQueue {
74 public:
CrashQueue(const std::string & dir_path,const std::string & file_name_prefix,size_t max_artifacts,size_t max_concurrent_dumps)75 CrashQueue(const std::string& dir_path, const std::string& file_name_prefix, size_t max_artifacts,
76 size_t max_concurrent_dumps)
77 : file_name_prefix_(file_name_prefix),
78 dir_path_(dir_path),
79 dir_fd_(open(dir_path.c_str(), O_DIRECTORY | O_RDONLY | O_CLOEXEC)),
80 max_artifacts_(max_artifacts),
81 next_artifact_(0),
82 max_concurrent_dumps_(max_concurrent_dumps),
83 num_concurrent_dumps_(0) {
84 if (dir_fd_ == -1) {
85 PLOG(FATAL) << "failed to open directory: " << dir_path;
86 }
87
88 // NOTE: If max_artifacts_ <= max_concurrent_dumps_, then theoretically the
89 // same filename could be handed out to multiple processes.
90 CHECK(max_artifacts_ > max_concurrent_dumps_);
91
92 find_oldest_artifact();
93 }
94
for_crash(const Crash * crash)95 static CrashQueue* for_crash(const Crash* crash) {
96 return (crash->crash_type == kDebuggerdJavaBacktrace) ? for_anrs() : for_tombstones();
97 }
98
for_tombstones()99 static CrashQueue* for_tombstones() {
100 static CrashQueue queue("/data/tombstones", "tombstone_" /* file_name_prefix */,
101 GetIntProperty("tombstoned.max_tombstone_count", 10),
102 1 /* max_concurrent_dumps */);
103 return &queue;
104 }
105
for_anrs()106 static CrashQueue* for_anrs() {
107 static CrashQueue queue("/data/anr", "trace_" /* file_name_prefix */,
108 GetIntProperty("tombstoned.max_anr_count", 64),
109 4 /* max_concurrent_dumps */);
110 return &queue;
111 }
112
get_output()113 std::pair<std::string, unique_fd> get_output() {
114 std::string path;
115 unique_fd result(openat(dir_fd_, ".", O_WRONLY | O_APPEND | O_TMPFILE | O_CLOEXEC, 0640));
116 if (result == -1) {
117 // We might not have O_TMPFILE. Try creating with an arbitrary filename instead.
118 static size_t counter = 0;
119 std::string tmp_filename = StringPrintf(".temporary%zu", counter++);
120 result.reset(openat(dir_fd_, tmp_filename.c_str(),
121 O_WRONLY | O_APPEND | O_CREAT | O_TRUNC | O_CLOEXEC, 0640));
122 if (result == -1) {
123 PLOG(FATAL) << "failed to create temporary tombstone in " << dir_path_;
124 }
125
126 path = StringPrintf("%s/%s", dir_path_.c_str(), tmp_filename.c_str());
127 }
128 return std::make_pair(std::move(path), std::move(result));
129 }
130
get_next_artifact_path()131 std::string get_next_artifact_path() {
132 std::string file_name =
133 StringPrintf("%s/%s%02d", dir_path_.c_str(), file_name_prefix_.c_str(), next_artifact_);
134 next_artifact_ = (next_artifact_ + 1) % max_artifacts_;
135 return file_name;
136 }
137
maybe_enqueue_crash(Crash * crash)138 bool maybe_enqueue_crash(Crash* crash) {
139 if (num_concurrent_dumps_ == max_concurrent_dumps_) {
140 queued_requests_.push_back(crash);
141 return true;
142 }
143
144 return false;
145 }
146
maybe_dequeue_crashes(void (* handler)(Crash * crash))147 void maybe_dequeue_crashes(void (*handler)(Crash* crash)) {
148 while (!queued_requests_.empty() && num_concurrent_dumps_ < max_concurrent_dumps_) {
149 Crash* next_crash = queued_requests_.front();
150 queued_requests_.pop_front();
151 handler(next_crash);
152 }
153 }
154
on_crash_started()155 void on_crash_started() { ++num_concurrent_dumps_; }
156
on_crash_completed()157 void on_crash_completed() { --num_concurrent_dumps_; }
158
159 private:
find_oldest_artifact()160 void find_oldest_artifact() {
161 size_t oldest_tombstone = 0;
162 time_t oldest_time = std::numeric_limits<time_t>::max();
163
164 for (size_t i = 0; i < max_artifacts_; ++i) {
165 std::string path = StringPrintf("%s/%s%02zu", dir_path_.c_str(), file_name_prefix_.c_str(), i);
166 struct stat st;
167 if (stat(path.c_str(), &st) != 0) {
168 if (errno == ENOENT) {
169 oldest_tombstone = i;
170 break;
171 } else {
172 PLOG(ERROR) << "failed to stat " << path;
173 continue;
174 }
175 }
176
177 if (st.st_mtime < oldest_time) {
178 oldest_tombstone = i;
179 oldest_time = st.st_mtime;
180 }
181 }
182
183 next_artifact_ = oldest_tombstone;
184 }
185
186 const std::string file_name_prefix_;
187
188 const std::string dir_path_;
189 const int dir_fd_;
190
191 const size_t max_artifacts_;
192 int next_artifact_;
193
194 const size_t max_concurrent_dumps_;
195 size_t num_concurrent_dumps_;
196
197 std::deque<Crash*> queued_requests_;
198
199 DISALLOW_COPY_AND_ASSIGN(CrashQueue);
200 };
201
202 // Whether java trace dumps are produced via tombstoned.
203 static constexpr bool kJavaTraceDumpsEnabled = true;
204
205 // Forward declare the callbacks so they can be placed in a sensible order.
206 static void crash_accept_cb(evconnlistener* listener, evutil_socket_t sockfd, sockaddr*, int, void*);
207 static void crash_request_cb(evutil_socket_t sockfd, short ev, void* arg);
208 static void crash_completed_cb(evutil_socket_t sockfd, short ev, void* arg);
209
perform_request(Crash * crash)210 static void perform_request(Crash* crash) {
211 unique_fd output_fd;
212 bool intercepted =
213 intercept_manager->GetIntercept(crash->crash_pid, crash->crash_type, &output_fd);
214 if (!intercepted) {
215 std::tie(crash->crash_tombstone_path, output_fd) = CrashQueue::for_crash(crash)->get_output();
216 crash->crash_tombstone_fd.reset(dup(output_fd.get()));
217 }
218
219 TombstonedCrashPacket response = {
220 .packet_type = CrashPacketType::kPerformDump
221 };
222 ssize_t rc = send_fd(crash->crash_socket_fd, &response, sizeof(response), std::move(output_fd));
223 if (rc == -1) {
224 PLOG(WARNING) << "failed to send response to CrashRequest";
225 goto fail;
226 } else if (rc != sizeof(response)) {
227 PLOG(WARNING) << "crash socket write returned short";
228 goto fail;
229 } else {
230 // TODO: Make this configurable by the interceptor?
231 struct timeval timeout = { 10, 0 };
232
233 event_base* base = event_get_base(crash->crash_event);
234 event_assign(crash->crash_event, base, crash->crash_socket_fd, EV_TIMEOUT | EV_READ,
235 crash_completed_cb, crash);
236 event_add(crash->crash_event, &timeout);
237 }
238
239 CrashQueue::for_crash(crash)->on_crash_started();
240 return;
241
242 fail:
243 delete crash;
244 }
245
crash_accept_cb(evconnlistener * listener,evutil_socket_t sockfd,sockaddr *,int,void *)246 static void crash_accept_cb(evconnlistener* listener, evutil_socket_t sockfd, sockaddr*, int,
247 void*) {
248 event_base* base = evconnlistener_get_base(listener);
249 Crash* crash = new Crash();
250
251 // TODO: Make sure that only java crashes come in on the java socket
252 // and only native crashes on the native socket.
253 struct timeval timeout = { 1, 0 };
254 event* crash_event = event_new(base, sockfd, EV_TIMEOUT | EV_READ, crash_request_cb, crash);
255 crash->crash_socket_fd.reset(sockfd);
256 crash->crash_event = crash_event;
257 event_add(crash_event, &timeout);
258 }
259
crash_request_cb(evutil_socket_t sockfd,short ev,void * arg)260 static void crash_request_cb(evutil_socket_t sockfd, short ev, void* arg) {
261 ssize_t rc;
262 Crash* crash = static_cast<Crash*>(arg);
263
264 TombstonedCrashPacket request = {};
265
266 if ((ev & EV_TIMEOUT) != 0) {
267 LOG(WARNING) << "crash request timed out";
268 goto fail;
269 } else if ((ev & EV_READ) == 0) {
270 LOG(WARNING) << "tombstoned received unexpected event from crash socket";
271 goto fail;
272 }
273
274 rc = TEMP_FAILURE_RETRY(read(sockfd, &request, sizeof(request)));
275 if (rc == -1) {
276 PLOG(WARNING) << "failed to read from crash socket";
277 goto fail;
278 } else if (rc != sizeof(request)) {
279 LOG(WARNING) << "crash socket received short read of length " << rc << " (expected "
280 << sizeof(request) << ")";
281 goto fail;
282 }
283
284 if (request.packet_type != CrashPacketType::kDumpRequest) {
285 LOG(WARNING) << "unexpected crash packet type, expected kDumpRequest, received "
286 << StringPrintf("%#2hhX", request.packet_type);
287 goto fail;
288 }
289
290 crash->crash_type = request.packet.dump_request.dump_type;
291 if (crash->crash_type < 0 || crash->crash_type > kDebuggerdAnyIntercept) {
292 LOG(WARNING) << "unexpected crash dump type: " << crash->crash_type;
293 goto fail;
294 }
295
296 if (crash->crash_type != kDebuggerdJavaBacktrace) {
297 crash->crash_pid = request.packet.dump_request.pid;
298 } else {
299 // Requests for java traces are sent from untrusted processes, so we
300 // must not trust the PID sent down with the request. Instead, we ask the
301 // kernel.
302 ucred cr = {};
303 socklen_t len = sizeof(cr);
304 int ret = getsockopt(sockfd, SOL_SOCKET, SO_PEERCRED, &cr, &len);
305 if (ret != 0) {
306 PLOG(ERROR) << "Failed to getsockopt(..SO_PEERCRED)";
307 goto fail;
308 }
309
310 crash->crash_pid = cr.pid;
311 }
312
313 LOG(INFO) << "received crash request for pid " << crash->crash_pid;
314
315 if (CrashQueue::for_crash(crash)->maybe_enqueue_crash(crash)) {
316 LOG(INFO) << "enqueueing crash request for pid " << crash->crash_pid;
317 } else {
318 perform_request(crash);
319 }
320
321 return;
322
323 fail:
324 delete crash;
325 }
326
crash_completed_cb(evutil_socket_t sockfd,short ev,void * arg)327 static void crash_completed_cb(evutil_socket_t sockfd, short ev, void* arg) {
328 ssize_t rc;
329 Crash* crash = static_cast<Crash*>(arg);
330 TombstonedCrashPacket request = {};
331
332 CrashQueue::for_crash(crash)->on_crash_completed();
333
334 if ((ev & EV_READ) == 0) {
335 goto fail;
336 }
337
338 rc = TEMP_FAILURE_RETRY(read(sockfd, &request, sizeof(request)));
339 if (rc == -1) {
340 PLOG(WARNING) << "failed to read from crash socket";
341 goto fail;
342 } else if (rc != sizeof(request)) {
343 LOG(WARNING) << "crash socket received short read of length " << rc << " (expected "
344 << sizeof(request) << ")";
345 goto fail;
346 }
347
348 if (request.packet_type != CrashPacketType::kCompletedDump) {
349 LOG(WARNING) << "unexpected crash packet type, expected kCompletedDump, received "
350 << uint32_t(request.packet_type);
351 goto fail;
352 }
353
354 if (crash->crash_tombstone_fd != -1) {
355 std::string fd_path = StringPrintf("/proc/self/fd/%d", crash->crash_tombstone_fd.get());
356 std::string tombstone_path = CrashQueue::for_crash(crash)->get_next_artifact_path();
357
358 // linkat doesn't let us replace a file, so we need to unlink first.
359 int rc = unlink(tombstone_path.c_str());
360 if (rc != 0 && errno != ENOENT) {
361 PLOG(ERROR) << "failed to unlink tombstone at " << tombstone_path;
362 goto fail;
363 }
364
365 rc = linkat(AT_FDCWD, fd_path.c_str(), AT_FDCWD, tombstone_path.c_str(), AT_SYMLINK_FOLLOW);
366 if (rc != 0) {
367 PLOG(ERROR) << "failed to link tombstone";
368 } else {
369 if (crash->crash_type == kDebuggerdJavaBacktrace) {
370 LOG(ERROR) << "Traces for pid " << crash->crash_pid << " written to: " << tombstone_path;
371 } else {
372 // NOTE: Several tools parse this log message to figure out where the
373 // tombstone associated with a given native crash was written. Any changes
374 // to this message must be carefully considered.
375 LOG(ERROR) << "Tombstone written to: " << tombstone_path;
376 }
377 }
378
379 // If we don't have O_TMPFILE, we need to clean up after ourselves.
380 if (!crash->crash_tombstone_path.empty()) {
381 rc = unlink(crash->crash_tombstone_path.c_str());
382 if (rc != 0) {
383 PLOG(ERROR) << "failed to unlink temporary tombstone at " << crash->crash_tombstone_path;
384 }
385 }
386 }
387
388 fail:
389 CrashQueue* queue = CrashQueue::for_crash(crash);
390 delete crash;
391
392 // If there's something queued up, let them proceed.
393 queue->maybe_dequeue_crashes(perform_request);
394 }
395
main(int,char * [])396 int main(int, char* []) {
397 umask(0137);
398
399 // Don't try to connect to ourselves if we crash.
400 struct sigaction action = {};
401 action.sa_handler = [](int signal) {
402 LOG(ERROR) << "received fatal signal " << signal;
403 _exit(1);
404 };
405 debuggerd_register_handlers(&action);
406
407 int intercept_socket = android_get_control_socket(kTombstonedInterceptSocketName);
408 int crash_socket = android_get_control_socket(kTombstonedCrashSocketName);
409
410 if (intercept_socket == -1 || crash_socket == -1) {
411 PLOG(FATAL) << "failed to get socket from init";
412 }
413
414 evutil_make_socket_nonblocking(intercept_socket);
415 evutil_make_socket_nonblocking(crash_socket);
416
417 event_base* base = event_base_new();
418 if (!base) {
419 LOG(FATAL) << "failed to create event_base";
420 }
421
422 intercept_manager = new InterceptManager(base, intercept_socket);
423
424 evconnlistener* tombstone_listener =
425 evconnlistener_new(base, crash_accept_cb, CrashQueue::for_tombstones(), LEV_OPT_CLOSE_ON_FREE,
426 -1 /* backlog */, crash_socket);
427 if (!tombstone_listener) {
428 LOG(FATAL) << "failed to create evconnlistener for tombstones.";
429 }
430
431 if (kJavaTraceDumpsEnabled) {
432 const int java_trace_socket = android_get_control_socket(kTombstonedJavaTraceSocketName);
433 if (java_trace_socket == -1) {
434 PLOG(FATAL) << "failed to get socket from init";
435 }
436
437 evutil_make_socket_nonblocking(java_trace_socket);
438 evconnlistener* java_trace_listener =
439 evconnlistener_new(base, crash_accept_cb, CrashQueue::for_anrs(), LEV_OPT_CLOSE_ON_FREE,
440 -1 /* backlog */, java_trace_socket);
441 if (!java_trace_listener) {
442 LOG(FATAL) << "failed to create evconnlistener for java traces.";
443 }
444 }
445
446 LOG(INFO) << "tombstoned successfully initialized";
447 event_base_dispatch(base);
448 }
449