1 /*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "kernel_collector.h"
18
19 #include <map>
20 #include <sys/stat.h>
21
22 #include <base/files/file_util.h>
23 #include <base/logging.h>
24 #include <base/strings/string_util.h>
25 #include <base/strings/stringprintf.h>
26
27 using base::FilePath;
28 using base::StringPrintf;
29
30 namespace {
31
32 const char kDefaultKernelStackSignature[] = "kernel-UnspecifiedStackSignature";
33 const char kDumpParentPath[] = "/sys/fs";
34 const char kDumpPath[] = "/sys/fs/pstore";
35 const char kDumpFormat[] = "dmesg-ramoops-%zu";
36 const char kKernelExecName[] = "kernel";
37 // Maximum number of records to examine in the kDumpPath.
38 const size_t kMaxDumpRecords = 100;
39 const pid_t kKernelPid = 0;
40 const char kKernelSignatureKey[] = "sig";
41 // Byte length of maximum human readable portion of a kernel crash signature.
42 const int kMaxHumanStringLength = 40;
43 const uid_t kRootUid = 0;
44 // Time in seconds from the final kernel log message for a call stack
45 // to count towards the signature of the kcrash.
46 const int kSignatureTimestampWindow = 2;
47 // Kernel log timestamp regular expression.
48 const char kTimestampRegex[] = "^<.*>\\[\\s*(\\d+\\.\\d+)\\]";
49
50 //
51 // These regular expressions enable to us capture the PC in a backtrace.
52 // The backtrace is obtained through dmesg or the kernel's preserved/kcrashmem
53 // feature.
54 //
55 // For ARM we see:
56 // "<5>[ 39.458982] PC is at write_breakme+0xd0/0x1b4"
57 // For MIPS we see:
58 // "<5>[ 3378.552000] epc : 804010f0 lkdtm_do_action+0x68/0x3f8"
59 // For x86:
60 // "<0>[ 37.474699] EIP: [<790ed488>] write_breakme+0x80/0x108
61 // SS:ESP 0068:e9dd3efc"
62 //
63 const char* const kPCRegex[] = {
64 0,
65 " PC is at ([^\\+ ]+).*",
66 " epc\\s+:\\s+\\S+\\s+([^\\+ ]+).*", // MIPS has an exception program counter
67 " EIP: \\[<.*>\\] ([^\\+ ]+).*", // X86 uses EIP for the program counter
68 " RIP \\[<.*>\\] ([^\\+ ]+).*", // X86_64 uses RIP for the program counter
69 };
70
71 static_assert(arraysize(kPCRegex) == KernelCollector::kArchCount,
72 "Missing Arch PC regexp");
73
74 } // namespace
75
KernelCollector()76 KernelCollector::KernelCollector()
77 : is_enabled_(false),
78 ramoops_dump_path_(kDumpPath),
79 records_(0),
80 // We expect crash dumps in the format of architecture we are built for.
81 arch_(GetCompilerArch()) {
82 }
83
~KernelCollector()84 KernelCollector::~KernelCollector() {
85 }
86
OverridePreservedDumpPath(const FilePath & file_path)87 void KernelCollector::OverridePreservedDumpPath(const FilePath &file_path) {
88 ramoops_dump_path_ = file_path;
89 }
90
ReadRecordToString(std::string * contents,size_t current_record,bool * record_found)91 bool KernelCollector::ReadRecordToString(std::string *contents,
92 size_t current_record,
93 bool *record_found) {
94 // A record is a ramoops dump. It has an associated size of "record_size".
95 std::string record;
96 std::string captured;
97
98 // Ramoops appends a header to a crash which contains ==== followed by a
99 // timestamp. Ignore the header.
100 pcrecpp::RE record_re(
101 "====\\d+\\.\\d+\n(.*)",
102 pcrecpp::RE_Options().set_multiline(true).set_dotall(true));
103
104 pcrecpp::RE sanity_check_re("\n<\\d+>\\[\\s*(\\d+\\.\\d+)\\]");
105
106 FilePath ramoops_record;
107 GetRamoopsRecordPath(&ramoops_record, current_record);
108 if (!base::ReadFileToString(ramoops_record, &record)) {
109 LOG(ERROR) << "Unable to open " << ramoops_record.value();
110 return false;
111 }
112
113 *record_found = false;
114 if (record_re.FullMatch(record, &captured)) {
115 // Found a ramoops header, so strip the header and append the rest.
116 contents->append(captured);
117 *record_found = true;
118 } else if (sanity_check_re.PartialMatch(record.substr(0, 1024))) {
119 // pstore compression has been added since kernel 3.12. In order to
120 // decompress dmesg correctly, ramoops driver has to strip the header
121 // before handing over the record to the pstore driver, so we don't
122 // need to do it here anymore. However, the sanity check is needed because
123 // sometimes a pstore record is just a chunk of uninitialized memory which
124 // is not the result of a kernel crash. See crbug.com/443764
125 contents->append(record);
126 *record_found = true;
127 } else {
128 LOG(WARNING) << "Found invalid record at " << ramoops_record.value();
129 }
130
131 // Remove the record from pstore after it's found.
132 if (*record_found)
133 base::DeleteFile(ramoops_record, false);
134
135 return true;
136 }
137
GetRamoopsRecordPath(FilePath * path,size_t record)138 void KernelCollector::GetRamoopsRecordPath(FilePath *path,
139 size_t record) {
140 // Disable error "format not a string literal, argument types not checked"
141 // because this is valid, but GNU apparently doesn't bother checking a const
142 // format string.
143 #pragma GCC diagnostic push
144 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
145 *path = ramoops_dump_path_.Append(StringPrintf(kDumpFormat, record));
146 #pragma GCC diagnostic pop
147 }
148
LoadParameters()149 bool KernelCollector::LoadParameters() {
150 // Discover how many ramoops records are being exported by the driver.
151 size_t count;
152
153 for (count = 0; count < kMaxDumpRecords; ++count) {
154 FilePath ramoops_record;
155 GetRamoopsRecordPath(&ramoops_record, count);
156
157 if (!base::PathExists(ramoops_record))
158 break;
159 }
160
161 records_ = count;
162 return (records_ > 0);
163 }
164
LoadPreservedDump(std::string * contents)165 bool KernelCollector::LoadPreservedDump(std::string *contents) {
166 // Load dumps from the preserved memory and save them in contents.
167 // Since the system is set to restart on oops we won't actually ever have
168 // multiple records (only 0 or 1), but check in case we don't restart on
169 // oops in the future.
170 bool any_records_found = false;
171 bool record_found = false;
172 // clear contents since ReadFileToString actually appends to the string.
173 contents->clear();
174
175 for (size_t i = 0; i < records_; ++i) {
176 if (!ReadRecordToString(contents, i, &record_found)) {
177 break;
178 }
179 if (record_found) {
180 any_records_found = true;
181 }
182 }
183
184 if (!any_records_found) {
185 LOG(ERROR) << "No valid records found in " << ramoops_dump_path_.value();
186 return false;
187 }
188
189 return true;
190 }
191
StripSensitiveData(std::string * kernel_dump)192 void KernelCollector::StripSensitiveData(std::string *kernel_dump) {
193 // Strip any data that the user might not want sent up to the crash servers.
194 // We'll read in from kernel_dump and also place our output there.
195 //
196 // At the moment, the only sensitive data we strip is MAC addresses.
197
198 // Get rid of things that look like MAC addresses, since they could possibly
199 // give information about where someone has been. This is strings that look
200 // like this: 11:22:33:44:55:66
201 // Complications:
202 // - Within a given kernel_dump, want to be able to tell when the same MAC
203 // was used more than once. Thus, we'll consistently replace the first
204 // MAC found with 00:00:00:00:00:01, the second with ...:02, etc.
205 // - ACPI commands look like MAC addresses. We'll specifically avoid getting
206 // rid of those.
207 std::ostringstream result;
208 std::string pre_mac_str;
209 std::string mac_str;
210 std::map<std::string, std::string> mac_map;
211 pcrecpp::StringPiece input(*kernel_dump);
212
213 // This RE will find the next MAC address and can return us the data preceding
214 // the MAC and the MAC itself.
215 pcrecpp::RE mac_re("(.*?)("
216 "[0-9a-fA-F][0-9a-fA-F]:"
217 "[0-9a-fA-F][0-9a-fA-F]:"
218 "[0-9a-fA-F][0-9a-fA-F]:"
219 "[0-9a-fA-F][0-9a-fA-F]:"
220 "[0-9a-fA-F][0-9a-fA-F]:"
221 "[0-9a-fA-F][0-9a-fA-F])",
222 pcrecpp::RE_Options()
223 .set_multiline(true)
224 .set_dotall(true));
225
226 // This RE will identify when the 'pre_mac_str' shows that the MAC address
227 // was really an ACPI cmd. The full string looks like this:
228 // ata1.00: ACPI cmd ef/10:03:00:00:00:a0 (SET FEATURES) filtered out
229 pcrecpp::RE acpi_re("ACPI cmd ef/$",
230 pcrecpp::RE_Options()
231 .set_multiline(true)
232 .set_dotall(true));
233
234 // Keep consuming, building up a result string as we go.
235 while (mac_re.Consume(&input, &pre_mac_str, &mac_str)) {
236 if (acpi_re.PartialMatch(pre_mac_str)) {
237 // We really saw an ACPI command; add to result w/ no stripping.
238 result << pre_mac_str << mac_str;
239 } else {
240 // Found a MAC address; look up in our hash for the mapping.
241 std::string replacement_mac = mac_map[mac_str];
242 if (replacement_mac == "") {
243 // It wasn't present, so build up a replacement string.
244 int mac_id = mac_map.size();
245
246 // Handle up to 2^32 unique MAC address; overkill, but doesn't hurt.
247 replacement_mac = StringPrintf("00:00:%02x:%02x:%02x:%02x",
248 (mac_id & 0xff000000) >> 24,
249 (mac_id & 0x00ff0000) >> 16,
250 (mac_id & 0x0000ff00) >> 8,
251 (mac_id & 0x000000ff));
252 mac_map[mac_str] = replacement_mac;
253 }
254
255 // Dump the string before the MAC and the fake MAC address into result.
256 result << pre_mac_str << replacement_mac;
257 }
258 }
259
260 // One last bit of data might still be in the input.
261 result << input;
262
263 // We'll just assign right back to kernel_dump.
264 *kernel_dump = result.str();
265 }
266
DumpDirMounted()267 bool KernelCollector::DumpDirMounted() {
268 struct stat st_parent;
269 if (stat(kDumpParentPath, &st_parent)) {
270 PLOG(WARNING) << "Could not stat " << kDumpParentPath;
271 return false;
272 }
273
274 struct stat st_dump;
275 if (stat(kDumpPath, &st_dump)) {
276 PLOG(WARNING) << "Could not stat " << kDumpPath;
277 return false;
278 }
279
280 if (st_parent.st_dev == st_dump.st_dev) {
281 LOG(WARNING) << "Dump dir " << kDumpPath << " not mounted";
282 return false;
283 }
284
285 return true;
286 }
287
Enable()288 bool KernelCollector::Enable() {
289 if (arch_ == kArchUnknown || arch_ >= kArchCount ||
290 kPCRegex[arch_] == nullptr) {
291 LOG(WARNING) << "KernelCollector does not understand this architecture";
292 return false;
293 }
294
295 if (!DumpDirMounted()) {
296 LOG(WARNING) << "Kernel does not support crash dumping";
297 return false;
298 }
299
300 // To enable crashes, we will eventually need to set
301 // the chnv bit in BIOS, but it does not yet work.
302 LOG(INFO) << "Enabling kernel crash handling";
303 is_enabled_ = true;
304 return true;
305 }
306
307 // Hash a string to a number. We define our own hash function to not
308 // be dependent on a C++ library that might change. This function
309 // uses basically the same approach as tr1/functional_hash.h but with
310 // a larger prime number (16127 vs 131).
HashString(const std::string & input)311 static unsigned HashString(const std::string &input) {
312 unsigned hash = 0;
313 for (size_t i = 0; i < input.length(); ++i)
314 hash = hash * 16127 + input[i];
315 return hash;
316 }
317
ProcessStackTrace(pcrecpp::StringPiece kernel_dump,bool print_diagnostics,unsigned * hash,float * last_stack_timestamp,bool * is_watchdog_crash)318 void KernelCollector::ProcessStackTrace(
319 pcrecpp::StringPiece kernel_dump,
320 bool print_diagnostics,
321 unsigned *hash,
322 float *last_stack_timestamp,
323 bool *is_watchdog_crash) {
324 pcrecpp::RE line_re("(.+)", pcrecpp::MULTILINE());
325 pcrecpp::RE stack_trace_start_re(std::string(kTimestampRegex) +
326 " (Call Trace|Backtrace):$");
327
328 // Match lines such as the following and grab out "function_name".
329 // The ? may or may not be present.
330 //
331 // For ARM:
332 // <4>[ 3498.731164] [<c0057220>] ? (function_name+0x20/0x2c) from
333 // [<c018062c>] (foo_bar+0xdc/0x1bc)
334 //
335 // For MIPS:
336 // <5>[ 3378.656000] [<804010f0>] lkdtm_do_action+0x68/0x3f8
337 //
338 // For X86:
339 // <4>[ 6066.849504] [<7937bcee>] ? function_name+0x66/0x6c
340 //
341 pcrecpp::RE stack_entry_re(std::string(kTimestampRegex) +
342 "\\s+\\[<[[:xdigit:]]+>\\]" // Matches " [<7937bcee>]"
343 "([\\s\\?(]+)" // Matches " ? (" (ARM) or " ? " (X86)
344 "([^\\+ )]+)"); // Matches until delimiter reached
345 std::string line;
346 std::string hashable;
347 std::string previous_hashable;
348 bool is_watchdog = false;
349
350 *hash = 0;
351 *last_stack_timestamp = 0;
352
353 // Find the last and second-to-last stack traces. The latter is used when
354 // the panic is from a watchdog timeout.
355 while (line_re.FindAndConsume(&kernel_dump, &line)) {
356 std::string certainty;
357 std::string function_name;
358 if (stack_trace_start_re.PartialMatch(line, last_stack_timestamp)) {
359 if (print_diagnostics) {
360 printf("Stack trace starting.%s\n",
361 hashable.empty() ? "" : " Saving prior trace.");
362 }
363 previous_hashable = hashable;
364 hashable.clear();
365 is_watchdog = false;
366 } else if (stack_entry_re.PartialMatch(line,
367 last_stack_timestamp,
368 &certainty,
369 &function_name)) {
370 bool is_certain = certainty.find('?') == std::string::npos;
371 if (print_diagnostics) {
372 printf("@%f: stack entry for %s (%s)\n",
373 *last_stack_timestamp,
374 function_name.c_str(),
375 is_certain ? "certain" : "uncertain");
376 }
377 // Do not include any uncertain (prefixed by '?') frames in our hash.
378 if (!is_certain)
379 continue;
380 if (!hashable.empty())
381 hashable.append("|");
382 if (function_name == "watchdog_timer_fn" ||
383 function_name == "watchdog") {
384 is_watchdog = true;
385 }
386 hashable.append(function_name);
387 }
388 }
389
390 // If the last stack trace contains a watchdog function we assume the panic
391 // is from the watchdog timer, and we hash the previous stack trace rather
392 // than the last one, assuming that the previous stack is that of the hung
393 // thread.
394 //
395 // In addition, if the hashable is empty (meaning all frames are uncertain,
396 // for whatever reason) also use the previous frame, as it cannot be any
397 // worse.
398 if (is_watchdog || hashable.empty()) {
399 hashable = previous_hashable;
400 }
401
402 *hash = HashString(hashable);
403 *is_watchdog_crash = is_watchdog;
404
405 if (print_diagnostics) {
406 printf("Hash based on stack trace: \"%s\" at %f.\n",
407 hashable.c_str(), *last_stack_timestamp);
408 }
409 }
410
411 // static
GetCompilerArch()412 KernelCollector::ArchKind KernelCollector::GetCompilerArch() {
413 #if defined(COMPILER_GCC) && defined(ARCH_CPU_ARM_FAMILY)
414 return kArchArm;
415 #elif defined(COMPILER_GCC) && defined(ARCH_CPU_MIPS_FAMILY)
416 return kArchMips;
417 #elif defined(COMPILER_GCC) && defined(ARCH_CPU_X86_64)
418 return kArchX86_64;
419 #elif defined(COMPILER_GCC) && defined(ARCH_CPU_X86_FAMILY)
420 return kArchX86;
421 #else
422 return kArchUnknown;
423 #endif
424 }
425
FindCrashingFunction(pcrecpp::StringPiece kernel_dump,bool print_diagnostics,float stack_trace_timestamp,std::string * crashing_function)426 bool KernelCollector::FindCrashingFunction(
427 pcrecpp::StringPiece kernel_dump,
428 bool print_diagnostics,
429 float stack_trace_timestamp,
430 std::string *crashing_function) {
431 float timestamp = 0;
432
433 // Use the correct regex for this architecture.
434 pcrecpp::RE eip_re(std::string(kTimestampRegex) + kPCRegex[arch_],
435 pcrecpp::MULTILINE());
436
437 while (eip_re.FindAndConsume(&kernel_dump, ×tamp, crashing_function)) {
438 if (print_diagnostics) {
439 printf("@%f: found crashing function %s\n",
440 timestamp,
441 crashing_function->c_str());
442 }
443 }
444 if (timestamp == 0) {
445 if (print_diagnostics) {
446 printf("Found no crashing function.\n");
447 }
448 return false;
449 }
450 if (stack_trace_timestamp != 0 &&
451 abs(static_cast<int>(stack_trace_timestamp - timestamp))
452 > kSignatureTimestampWindow) {
453 if (print_diagnostics) {
454 printf("Found crashing function but not within window.\n");
455 }
456 return false;
457 }
458 if (print_diagnostics) {
459 printf("Found crashing function %s\n", crashing_function->c_str());
460 }
461 return true;
462 }
463
FindPanicMessage(pcrecpp::StringPiece kernel_dump,bool print_diagnostics,std::string * panic_message)464 bool KernelCollector::FindPanicMessage(pcrecpp::StringPiece kernel_dump,
465 bool print_diagnostics,
466 std::string *panic_message) {
467 // Match lines such as the following and grab out "Fatal exception"
468 // <0>[ 342.841135] Kernel panic - not syncing: Fatal exception
469 pcrecpp::RE kernel_panic_re(std::string(kTimestampRegex) +
470 " Kernel panic[^\\:]*\\:\\s*(.*)",
471 pcrecpp::MULTILINE());
472 float timestamp = 0;
473 while (kernel_panic_re.FindAndConsume(&kernel_dump,
474 ×tamp,
475 panic_message)) {
476 if (print_diagnostics) {
477 printf("@%f: panic message %s\n",
478 timestamp,
479 panic_message->c_str());
480 }
481 }
482 if (timestamp == 0) {
483 if (print_diagnostics) {
484 printf("Found no panic message.\n");
485 }
486 return false;
487 }
488 return true;
489 }
490
ComputeKernelStackSignature(const std::string & kernel_dump,std::string * kernel_signature,bool print_diagnostics)491 bool KernelCollector::ComputeKernelStackSignature(
492 const std::string &kernel_dump,
493 std::string *kernel_signature,
494 bool print_diagnostics) {
495 unsigned stack_hash = 0;
496 float last_stack_timestamp = 0;
497 std::string human_string;
498 bool is_watchdog_crash;
499
500 ProcessStackTrace(kernel_dump,
501 print_diagnostics,
502 &stack_hash,
503 &last_stack_timestamp,
504 &is_watchdog_crash);
505
506 if (!FindCrashingFunction(kernel_dump,
507 print_diagnostics,
508 last_stack_timestamp,
509 &human_string)) {
510 if (!FindPanicMessage(kernel_dump, print_diagnostics, &human_string)) {
511 if (print_diagnostics) {
512 printf("Found no human readable string, using empty string.\n");
513 }
514 human_string.clear();
515 }
516 }
517
518 if (human_string.empty() && stack_hash == 0) {
519 if (print_diagnostics) {
520 printf("Found neither a stack nor a human readable string, failing.\n");
521 }
522 return false;
523 }
524
525 human_string = human_string.substr(0, kMaxHumanStringLength);
526 *kernel_signature = StringPrintf("%s-%s%s-%08X",
527 kKernelExecName,
528 (is_watchdog_crash ? "(HANG)-" : ""),
529 human_string.c_str(),
530 stack_hash);
531 return true;
532 }
533
Collect()534 bool KernelCollector::Collect() {
535 std::string kernel_dump;
536 FilePath root_crash_directory;
537
538 if (!LoadParameters()) {
539 return false;
540 }
541 if (!LoadPreservedDump(&kernel_dump)) {
542 return false;
543 }
544 StripSensitiveData(&kernel_dump);
545 if (kernel_dump.empty()) {
546 return false;
547 }
548 std::string signature;
549 if (!ComputeKernelStackSignature(kernel_dump, &signature, false)) {
550 signature = kDefaultKernelStackSignature;
551 }
552
553 std::string reason = "handling";
554 bool feedback = true;
555 if (IsDeveloperImage()) {
556 reason = "developer build - always dumping";
557 feedback = true;
558 } else if (!is_feedback_allowed_function_()) {
559 reason = "ignoring - no consent";
560 feedback = false;
561 }
562
563 LOG(INFO) << "Received prior crash notification from "
564 << "kernel (signature " << signature << ") (" << reason << ")";
565
566 if (feedback) {
567 count_crash_function_();
568
569 if (!GetCreatedCrashDirectoryByEuid(kRootUid,
570 &root_crash_directory,
571 nullptr)) {
572 return true;
573 }
574
575 std::string dump_basename =
576 FormatDumpBasename(kKernelExecName, time(nullptr), kKernelPid);
577 FilePath kernel_crash_path = root_crash_directory.Append(
578 StringPrintf("%s.kcrash", dump_basename.c_str()));
579
580 // We must use WriteNewFile instead of base::WriteFile as we
581 // do not want to write with root access to a symlink that an attacker
582 // might have created.
583 if (WriteNewFile(kernel_crash_path,
584 kernel_dump.data(),
585 kernel_dump.length()) !=
586 static_cast<int>(kernel_dump.length())) {
587 LOG(INFO) << "Failed to write kernel dump to "
588 << kernel_crash_path.value().c_str();
589 return true;
590 }
591
592 AddCrashMetaData(kKernelSignatureKey, signature);
593 WriteCrashMetaData(
594 root_crash_directory.Append(
595 StringPrintf("%s.meta", dump_basename.c_str())),
596 kKernelExecName,
597 kernel_crash_path.value());
598
599 LOG(INFO) << "Stored kcrash to " << kernel_crash_path.value();
600 }
601
602 return true;
603 }
604