1 /*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "carbugreportd"
18
19 #include <android-base/errors.h>
20 #include <android-base/file.h>
21 #include <android-base/macros.h>
22 #include <android-base/properties.h>
23 #include <android-base/stringprintf.h>
24 #include <android-base/strings.h>
25 #include <android-base/unique_fd.h>
26 #include <cutils/sockets.h>
27 #include <gui/SurfaceComposerClient.h>
28 #include <log/log_main.h>
29 #include <private/android_filesystem_config.h>
30 #include <utils/SystemClock.h>
31 #include <ziparchive/zip_writer.h>
32
33 #include <errno.h>
34 #include <fcntl.h>
35 #include <ftw.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <sys/prctl.h>
39 #include <sys/stat.h>
40 #include <sys/types.h>
41 #include <sys/wait.h>
42 #include <time.h>
43 #include <unistd.h>
44
45 #include <string>
46 #include <vector>
47
48 namespace {
49 // Directory used for keeping temporary files
50 constexpr const char* kTempDirectory = "/data/user_de/0/com.android.shell/temp_bugreport_files";
51 // Socket to write the progress information.
52 constexpr const char* kCarBrProgressSocket = "car_br_progress_socket";
53 // Socket to write the zipped bugreport file.
54 constexpr const char* kCarBrOutputSocket = "car_br_output_socket";
55 // Socket to write the extra bugreport zip file. This zip file contains data that does not exist
56 // in bugreport file generated by dumpstate.
57 constexpr const char* kCarBrExtraOutputSocket = "car_br_extra_output_socket";
58 // The prefix used by bugreportz protocol to indicate bugreport finished successfully.
59 constexpr const char* kOkPrefix = "OK:";
60 // Number of connect attempts to dumpstate socket
61 constexpr const int kMaxDumpstateConnectAttempts = 20;
62 // Wait time between connect attempts
63 constexpr const int kWaitTimeBetweenConnectAttemptsInSec = 1;
64 // Wait time for dumpstate. Set a timeout so that if nothing is read in 10 minutes, we'll stop
65 // reading and quit. No timeout in dumpstate is longer than 60 seconds, so this gives lots of leeway
66 // in case of unforeseen time outs.
67 constexpr const int kDumpstateTimeoutInSec = 600;
68 // The prefix for screenshot filename in the generated zip file.
69 constexpr const char* kScreenshotPrefix = "/screenshot";
70
71 using android::OK;
72 using android::PhysicalDisplayId;
73 using android::status_t;
74 using android::SurfaceComposerClient;
75
76 // Returns a valid socket descriptor or -1 on failure.
openSocket(const char * service)77 int openSocket(const char* service) {
78 int s = android_get_control_socket(service);
79 if (s < 0) {
80 ALOGE("android_get_control_socket(%s): %s", service, strerror(errno));
81 return -1;
82 }
83 fcntl(s, F_SETFD, FD_CLOEXEC);
84 if (listen(s, 4) < 0) {
85 ALOGE("listen(control socket): %s", strerror(errno));
86 return -1;
87 }
88
89 struct sockaddr addr;
90 socklen_t alen = sizeof(addr);
91 int fd = accept(s, &addr, &alen);
92 if (fd < 0) {
93 ALOGE("accept(control socket): %s", strerror(errno));
94 return -1;
95 }
96 return fd;
97 }
98
99 // Processes the given dumpstate progress protocol |line| and updates
100 // |out_last_nonempty_line| when |line| is non-empty, and |out_zip_path| when
101 // the bugreport is finished.
processLine(const std::string & line,std::string * out_zip_path,std::string * out_last_nonempty_line)102 void processLine(const std::string& line, std::string* out_zip_path,
103 std::string* out_last_nonempty_line) {
104 // The protocol is documented in frameworks/native/cmds/bugreportz/readme.md
105 if (line.empty()) {
106 return;
107 }
108 *out_last_nonempty_line = line;
109 if (line.find(kOkPrefix) != 0) {
110 return;
111 }
112 *out_zip_path = line.substr(strlen(kOkPrefix));
113 return;
114 }
115
116 // Sends the contents of the zip fileto |outfd|.
117 // Returns true if success
zipFilesToFd(const std::vector<std::string> & extra_files,int outfd)118 void zipFilesToFd(const std::vector<std::string>& extra_files, int outfd) {
119 // pass fclose as Deleter to close the file when unique_ptr is destroyed.
120 std::unique_ptr<FILE, decltype(fclose)*> outfile = {fdopen(outfd, "wb"), fclose};
121 if (outfile == nullptr) {
122 ALOGE("Failed to open output descriptor");
123 return;
124 }
125 auto writer = std::make_unique<ZipWriter>(outfile.get());
126
127 int error = 0;
128 for (const auto& filepath : extra_files) {
129 const auto name = android::base::Basename(filepath);
130
131 error = writer->StartEntry(name.c_str(), 0);
132 if (error) {
133 ALOGE("Failed to start entry: [%d] %s", error, writer->ErrorCodeString(error));
134 return;
135 }
136 android::base::unique_fd fd(
137 TEMP_FAILURE_RETRY(open(filepath.c_str(), O_RDONLY | O_NOFOLLOW)));
138 if (fd == -1) {
139 return;
140 }
141 while (1) {
142 char buffer[65536];
143
144 ssize_t bytes_read = TEMP_FAILURE_RETRY(read(fd, buffer, sizeof(buffer)));
145 if (bytes_read == 0) {
146 break;
147 }
148 if (bytes_read == -1) {
149 if (errno == EAGAIN) {
150 ALOGE("timed out while reading %s", name.c_str());
151 } else {
152 ALOGE("read terminated abnormally (%s)", strerror(errno));
153 }
154 // fail immediately
155 return;
156 }
157 error = writer->WriteBytes(buffer, bytes_read);
158 if (error) {
159 ALOGE("WriteBytes() failed: [%d] %s", error, ZipWriter::ErrorCodeString(error));
160 // fail immediately
161 return;
162 }
163 }
164
165 error = writer->FinishEntry();
166 if (error) {
167 ALOGW("failed to finish entry: [%d] %s", error, writer->ErrorCodeString(error));
168 continue;
169 }
170 }
171 error = writer->Finish();
172 if (error) {
173 ALOGW("Failed to finish zip writer to: [%d] %s", error, writer->ErrorCodeString(error));
174 }
175 }
176
copyTo(int fd_in,int fd_out,void * buffer,size_t buffer_len)177 int copyTo(int fd_in, int fd_out, void* buffer, size_t buffer_len) {
178 ssize_t bytes_read = TEMP_FAILURE_RETRY(read(fd_in, buffer, buffer_len));
179 if (bytes_read == 0) {
180 return 0;
181 }
182 if (bytes_read == -1) {
183 // EAGAIN really means time out, so make that clear.
184 if (errno == EAGAIN) {
185 ALOGE("read timed out");
186 } else {
187 ALOGE("read terminated abnormally (%s)", strerror(errno));
188 }
189 return -1;
190 }
191 // copy all bytes to the output socket
192 if (!android::base::WriteFully(fd_out, buffer, bytes_read)) {
193 ALOGE("write failed");
194 return -1;
195 }
196 return bytes_read;
197 }
198
copyFile(const std::string & zip_path,int output_socket)199 bool copyFile(const std::string& zip_path, int output_socket) {
200 android::base::unique_fd fd(TEMP_FAILURE_RETRY(open(zip_path.c_str(), O_RDONLY | O_NOFOLLOW)));
201 if (fd == -1) {
202 ALOGE("Failed to open zip file %s.", zip_path.c_str());
203 return false;
204 }
205 while (1) {
206 char buffer[65536];
207 int bytes_copied = copyTo(fd, output_socket, buffer, sizeof(buffer));
208 if (bytes_copied == 0) {
209 break;
210 }
211 if (bytes_copied == -1) {
212 ALOGE("Failed to copy zip file %s to the output_socket.", zip_path.c_str());
213 return false;
214 }
215 }
216 return true;
217 }
218
219 // Triggers a bugreport and waits until it is all collected.
220 // returns false if error, true if success
doBugreport(int progress_socket,size_t * out_bytes_written,std::string * zip_path)221 bool doBugreport(int progress_socket, size_t* out_bytes_written, std::string* zip_path) {
222 // Socket will not be available until service starts.
223 android::base::unique_fd s;
224 for (int i = 0; i < kMaxDumpstateConnectAttempts; i++) {
225 s.reset(socket_local_client("dumpstate", ANDROID_SOCKET_NAMESPACE_RESERVED, SOCK_STREAM));
226 if (s != -1) break;
227 sleep(kWaitTimeBetweenConnectAttemptsInSec);
228 }
229
230 if (s == -1) {
231 ALOGE("failed to connect to dumpstatez service");
232 return false;
233 }
234
235 // Set a timeout so that if nothing is read by the timeout, stop reading and quit
236 struct timeval tv = {
237 .tv_sec = kDumpstateTimeoutInSec,
238 .tv_usec = 0,
239 };
240 if (setsockopt(s, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)) != 0) {
241 ALOGW("Cannot set socket timeout (%s)", strerror(errno));
242 }
243
244 std::string line;
245 std::string last_nonempty_line;
246 char buffer[65536];
247 while (true) {
248 ssize_t bytes_read = copyTo(s, progress_socket, buffer, sizeof(buffer));
249 if (bytes_read == 0) {
250 break;
251 }
252 if (bytes_read == -1) {
253 ALOGE("Failed to copy progress to the progress_socket.");
254 return false;
255 }
256 // Process the buffer line by line. this is needed for the filename.
257 for (int i = 0; i < bytes_read; i++) {
258 char c = buffer[i];
259 if (c == '\n') {
260 processLine(line, zip_path, &last_nonempty_line);
261 line.clear();
262 } else {
263 line.append(1, c);
264 }
265 }
266 *out_bytes_written += bytes_read;
267 }
268 s.reset();
269 // Process final line, in case it didn't finish with newline.
270 processLine(line, zip_path, &last_nonempty_line);
271 // if doBugReport finished successfully, zip path should be set.
272 if (zip_path->empty()) {
273 ALOGE("no zip file path was found in bugreportz progress data");
274 return false;
275 }
276 return true;
277 }
278
waitpid_with_timeout(pid_t pid,int timeout_secs,int * status)279 bool waitpid_with_timeout(pid_t pid, int timeout_secs, int* status) {
280 sigset_t child_mask, old_mask;
281 sigemptyset(&child_mask);
282 sigaddset(&child_mask, SIGCHLD);
283
284 if (sigprocmask(SIG_BLOCK, &child_mask, &old_mask) == -1) {
285 ALOGE("*** sigprocmask failed: %s\n", strerror(errno));
286 return false;
287 }
288
289 timespec ts = {.tv_sec = timeout_secs, .tv_nsec = 0};
290 int ret = TEMP_FAILURE_RETRY(sigtimedwait(&child_mask, nullptr, &ts));
291 int saved_errno = errno;
292
293 // Set the signals back the way they were.
294 if (sigprocmask(SIG_SETMASK, &old_mask, nullptr) == -1) {
295 ALOGE("*** sigprocmask failed: %s\n", strerror(errno));
296 if (ret == 0) {
297 return false;
298 }
299 }
300 if (ret == -1) {
301 errno = saved_errno;
302 if (errno == EAGAIN) {
303 errno = ETIMEDOUT;
304 } else {
305 ALOGE("*** sigtimedwait failed: %s\n", strerror(errno));
306 }
307 return false;
308 }
309
310 pid_t child_pid = waitpid(pid, status, WNOHANG);
311 if (child_pid != pid) {
312 if (child_pid != -1) {
313 ALOGE("*** Waiting for pid %d, got pid %d instead\n", pid, child_pid);
314 } else {
315 ALOGE("*** waitpid failed: %s\n", strerror(errno));
316 }
317 return false;
318 }
319 return true;
320 }
321
322 // Runs the given command. Kills the command if it does not finish by timeout.
runCommand(int timeout_secs,const char * file,std::vector<const char * > args)323 int runCommand(int timeout_secs, const char* file, std::vector<const char*> args) {
324 pid_t pid = fork();
325
326 // handle error case
327 if (pid < 0) {
328 ALOGE("fork failed %s", strerror(errno));
329 return pid;
330 }
331
332 // handle child case
333 if (pid == 0) {
334 /* make sure the child dies when parent dies */
335 prctl(PR_SET_PDEATHSIG, SIGKILL);
336
337 /* just ignore SIGPIPE, will go down with parent's */
338 struct sigaction sigact;
339 memset(&sigact, 0, sizeof(sigact));
340 sigact.sa_handler = SIG_IGN;
341 sigaction(SIGPIPE, &sigact, nullptr);
342
343 execvp(file, const_cast<char* const*>(args.data()));
344 // execvp's result will be handled after waitpid_with_timeout() below, but
345 // if it failed, it's safer to exit dumpstate.
346 ALOGE("execvp on command %s failed (error: %s)", file, strerror(errno));
347 _exit(EXIT_FAILURE);
348 }
349
350 // handle parent case
351 int status;
352 bool ret = waitpid_with_timeout(pid, timeout_secs, &status);
353
354 if (!ret) {
355 if (errno == ETIMEDOUT) {
356 ALOGE("command %s timed out (killing pid %d)", file, pid);
357 } else {
358 ALOGE("command %s: Error (killing pid %d)\n", file, pid);
359 }
360 kill(pid, SIGTERM);
361 if (!waitpid_with_timeout(pid, 5, nullptr)) {
362 kill(pid, SIGKILL);
363 if (!waitpid_with_timeout(pid, 5, nullptr)) {
364 ALOGE("could not kill command '%s' (pid %d) even with SIGKILL.\n", file, pid);
365 }
366 }
367 return -1;
368 }
369
370 if (WIFSIGNALED(status)) {
371 ALOGE("command '%s' failed: killed by signal %d\n", file, WTERMSIG(status));
372 } else if (WIFEXITED(status) && WEXITSTATUS(status) > 0) {
373 status = WEXITSTATUS(status);
374 ALOGE("command '%s' failed: exit code %d\n", file, status);
375 }
376
377 return status;
378 }
379
takeScreenshotForDisplayId(PhysicalDisplayId id,const char * tmp_dir,std::vector<std::string> * extra_files)380 void takeScreenshotForDisplayId(PhysicalDisplayId id, const char* tmp_dir,
381 std::vector<std::string>* extra_files) {
382 std::string id_as_string = to_string(id);
383 std::string filename = std::string(tmp_dir) + kScreenshotPrefix + id_as_string + ".png";
384 std::vector<const char*> args{"-p", "-d", id_as_string.c_str(), filename.c_str(), nullptr};
385 ALOGI("capturing screen for display (%s) as %s", id_as_string.c_str(), filename.c_str());
386 int status = runCommand(10, "/system/bin/screencap", args);
387 if (status == 0) {
388 ALOGI("Screenshot saved for display: %s", id_as_string.c_str());
389 } else {
390 ALOGW("Failed to take screenshot for display: %s", id_as_string.c_str());
391 }
392 // add the file regardless of the exit status of the screencap util.
393 extra_files->push_back(filename);
394 }
395
takeScreenshot(const char * tmp_dir,std::vector<std::string> * extra_files)396 void takeScreenshot(const char* tmp_dir, std::vector<std::string>* extra_files) {
397 // Now send the screencaptures
398 std::vector<PhysicalDisplayId> ids = SurfaceComposerClient::getPhysicalDisplayIds();
399
400 for (PhysicalDisplayId display_id : ids) {
401 takeScreenshotForDisplayId(display_id, tmp_dir, extra_files);
402 }
403 }
404
recursiveRemoveDir(const std::string & path)405 bool recursiveRemoveDir(const std::string& path) {
406 auto callback = [](const char* child, const struct stat*, int file_type, struct FTW*) -> int {
407 if (file_type == FTW_DP) {
408 if (rmdir(child) == -1) {
409 ALOGE("rmdir(%s): %s", child, strerror(errno));
410 return -1;
411 }
412 } else if (file_type == FTW_F) {
413 if (unlink(child) == -1) {
414 ALOGE("unlink(%s): %s", child, strerror(errno));
415 return -1;
416 }
417 }
418 return 0;
419 };
420 // do a file tree walk with a sufficiently large depth.
421 return nftw(path.c_str(), callback, 128, FTW_DEPTH) == 0;
422 }
423
createTempDir(const char * dir)424 status_t createTempDir(const char* dir) {
425 struct stat sb;
426 if (TEMP_FAILURE_RETRY(stat(dir, &sb)) == 0) {
427 if (!recursiveRemoveDir(dir)) {
428 return -errno;
429 }
430 } else if (errno != ENOENT) {
431 ALOGE("Failed to stat %s ", dir);
432 return -errno;
433 }
434 if (TEMP_FAILURE_RETRY(mkdir(dir, 0700)) == -1) {
435 ALOGE("Failed to mkdir %s", dir);
436 return -errno;
437 }
438 return OK;
439 }
440
441 // Removes bugreport
cleanupBugreportFile(const std::string & zip_path)442 void cleanupBugreportFile(const std::string& zip_path) {
443 if (unlink(zip_path.c_str()) != 0) {
444 ALOGE("Could not unlink %s (%s)", zip_path.c_str(), strerror(errno));
445 }
446 }
447
448 } // namespace
449
main(void)450 int main(void) {
451 ALOGI("Starting bugreport collecting service");
452
453 auto started_at_millis = android::elapsedRealtime();
454
455 std::vector<std::string> extra_files;
456 if (createTempDir(kTempDirectory) == OK) {
457 // take screenshots of the physical displays as early as possible
458 takeScreenshot(kTempDirectory, &extra_files);
459 }
460
461 // Start the dumpstatez service.
462 android::base::SetProperty("ctl.start", "cardumpstatez");
463
464 size_t bytes_written = 0;
465
466 std::string zip_path;
467 int progress_socket = openSocket(kCarBrProgressSocket);
468 if (progress_socket < 0) {
469 // early out. in this case we will not print the final message, but that is ok.
470 android::base::SetProperty("ctl.stop", "cardumpstatez");
471 return EXIT_FAILURE;
472 }
473 bool is_success = doBugreport(progress_socket, &bytes_written, &zip_path);
474 close(progress_socket);
475
476 if (is_success) {
477 int output_socket = openSocket(kCarBrOutputSocket);
478 if (output_socket != -1) {
479 is_success = copyFile(zip_path, output_socket);
480 close(output_socket);
481 }
482 }
483
484 int extra_output_socket = openSocket(kCarBrExtraOutputSocket);
485 if (extra_output_socket != -1 && is_success) {
486 zipFilesToFd(extra_files, extra_output_socket);
487 }
488 if (extra_output_socket != -1) {
489 close(extra_output_socket);
490 }
491
492 auto delta_sec = (android::elapsedRealtime() - started_at_millis) / 1000.0;
493 std::string result = is_success ? "success" : "failed";
494 ALOGI("bugreport %s in %.02fs, %zu bytes written", result.c_str(), delta_sec, bytes_written);
495 cleanupBugreportFile(zip_path);
496
497 recursiveRemoveDir(kTempDirectory);
498
499 // No matter how doBugreport() finished, let's try to explicitly stop
500 // cardumpstatez in case it stalled.
501 android::base::SetProperty("ctl.stop", "cardumpstatez");
502
503 return is_success ? EXIT_SUCCESS : EXIT_FAILURE;
504 }
505