1 /*
2  * Copyright (C) 2012 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "kernel_collector.h"
18 
19 #include <map>
20 #include <sys/stat.h>
21 
22 #include <base/files/file_util.h>
23 #include <base/logging.h>
24 #include <base/strings/string_util.h>
25 #include <base/strings/stringprintf.h>
26 
27 using base::FilePath;
28 using base::StringPrintf;
29 
30 namespace {
31 
32 const char kDefaultKernelStackSignature[] = "kernel-UnspecifiedStackSignature";
33 const char kDumpParentPath[] = "/sys/fs";
34 const char kDumpPath[] = "/sys/fs/pstore";
35 const char kDumpFormat[] = "dmesg-ramoops-%zu";
36 const char kKernelExecName[] = "kernel";
37 // Maximum number of records to examine in the kDumpPath.
38 const size_t kMaxDumpRecords = 100;
39 const pid_t kKernelPid = 0;
40 const char kKernelSignatureKey[] = "sig";
41 // Byte length of maximum human readable portion of a kernel crash signature.
42 const int kMaxHumanStringLength = 40;
43 const uid_t kRootUid = 0;
44 // Time in seconds from the final kernel log message for a call stack
45 // to count towards the signature of the kcrash.
46 const int kSignatureTimestampWindow = 2;
47 // Kernel log timestamp regular expression.
48 const char kTimestampRegex[] = "^<.*>\\[\\s*(\\d+\\.\\d+)\\]";
49 
50 //
51 // These regular expressions enable to us capture the PC in a backtrace.
52 // The backtrace is obtained through dmesg or the kernel's preserved/kcrashmem
53 // feature.
54 //
55 // For ARM we see:
56 //   "<5>[   39.458982] PC is at write_breakme+0xd0/0x1b4"
57 // For MIPS we see:
58 //   "<5>[ 3378.552000] epc   : 804010f0 lkdtm_do_action+0x68/0x3f8"
59 // For x86:
60 //   "<0>[   37.474699] EIP: [<790ed488>] write_breakme+0x80/0x108
61 //    SS:ESP 0068:e9dd3efc"
62 //
63 const char* const kPCRegex[] = {
64   0,
65   " PC is at ([^\\+ ]+).*",
66   " epc\\s+:\\s+\\S+\\s+([^\\+ ]+).*",  // MIPS has an exception program counter
67   " EIP: \\[<.*>\\] ([^\\+ ]+).*",  // X86 uses EIP for the program counter
68   " RIP  \\[<.*>\\] ([^\\+ ]+).*",  // X86_64 uses RIP for the program counter
69 };
70 
71 static_assert(arraysize(kPCRegex) == KernelCollector::kArchCount,
72               "Missing Arch PC regexp");
73 
74 }  // namespace
75 
KernelCollector()76 KernelCollector::KernelCollector()
77     : is_enabled_(false),
78       ramoops_dump_path_(kDumpPath),
79       records_(0),
80       // We expect crash dumps in the format of architecture we are built for.
81       arch_(GetCompilerArch()) {
82 }
83 
~KernelCollector()84 KernelCollector::~KernelCollector() {
85 }
86 
OverridePreservedDumpPath(const FilePath & file_path)87 void KernelCollector::OverridePreservedDumpPath(const FilePath &file_path) {
88   ramoops_dump_path_ = file_path;
89 }
90 
ReadRecordToString(std::string * contents,size_t current_record,bool * record_found)91 bool KernelCollector::ReadRecordToString(std::string *contents,
92                                          size_t current_record,
93                                          bool *record_found) {
94   // A record is a ramoops dump. It has an associated size of "record_size".
95   std::string record;
96   std::string captured;
97 
98   // Ramoops appends a header to a crash which contains ==== followed by a
99   // timestamp. Ignore the header.
100   pcrecpp::RE record_re(
101       "====\\d+\\.\\d+\n(.*)",
102       pcrecpp::RE_Options().set_multiline(true).set_dotall(true));
103 
104   pcrecpp::RE sanity_check_re("\n<\\d+>\\[\\s*(\\d+\\.\\d+)\\]");
105 
106   FilePath ramoops_record;
107   GetRamoopsRecordPath(&ramoops_record, current_record);
108   if (!base::ReadFileToString(ramoops_record, &record)) {
109     LOG(ERROR) << "Unable to open " << ramoops_record.value();
110     return false;
111   }
112 
113   *record_found = false;
114   if (record_re.FullMatch(record, &captured)) {
115     // Found a ramoops header, so strip the header and append the rest.
116     contents->append(captured);
117     *record_found = true;
118   } else if (sanity_check_re.PartialMatch(record.substr(0, 1024))) {
119     // pstore compression has been added since kernel 3.12. In order to
120     // decompress dmesg correctly, ramoops driver has to strip the header
121     // before handing over the record to the pstore driver, so we don't
122     // need to do it here anymore. However, the sanity check is needed because
123     // sometimes a pstore record is just a chunk of uninitialized memory which
124     // is not the result of a kernel crash. See crbug.com/443764
125     contents->append(record);
126     *record_found = true;
127   } else {
128     LOG(WARNING) << "Found invalid record at " << ramoops_record.value();
129   }
130 
131   // Remove the record from pstore after it's found.
132   if (*record_found)
133     base::DeleteFile(ramoops_record, false);
134 
135   return true;
136 }
137 
GetRamoopsRecordPath(FilePath * path,size_t record)138 void KernelCollector::GetRamoopsRecordPath(FilePath *path,
139                                            size_t record) {
140   // Disable error "format not a string literal, argument types not checked"
141   // because this is valid, but GNU apparently doesn't bother checking a const
142   // format string.
143   #pragma GCC diagnostic push
144   #pragma GCC diagnostic ignored "-Wformat-nonliteral"
145   *path = ramoops_dump_path_.Append(StringPrintf(kDumpFormat, record));
146   #pragma GCC diagnostic pop
147 }
148 
LoadParameters()149 bool KernelCollector::LoadParameters() {
150   // Discover how many ramoops records are being exported by the driver.
151   size_t count;
152 
153   for (count = 0; count < kMaxDumpRecords; ++count) {
154     FilePath ramoops_record;
155     GetRamoopsRecordPath(&ramoops_record, count);
156 
157     if (!base::PathExists(ramoops_record))
158       break;
159   }
160 
161   records_ = count;
162   return (records_ > 0);
163 }
164 
LoadPreservedDump(std::string * contents)165 bool KernelCollector::LoadPreservedDump(std::string *contents) {
166   // Load dumps from the preserved memory and save them in contents.
167   // Since the system is set to restart on oops we won't actually ever have
168   // multiple records (only 0 or 1), but check in case we don't restart on
169   // oops in the future.
170   bool any_records_found = false;
171   bool record_found = false;
172   // clear contents since ReadFileToString actually appends to the string.
173   contents->clear();
174 
175   for (size_t i = 0; i < records_; ++i) {
176     if (!ReadRecordToString(contents, i, &record_found)) {
177       break;
178     }
179     if (record_found) {
180       any_records_found = true;
181     }
182   }
183 
184   if (!any_records_found) {
185     LOG(ERROR) << "No valid records found in " << ramoops_dump_path_.value();
186     return false;
187   }
188 
189   return true;
190 }
191 
StripSensitiveData(std::string * kernel_dump)192 void KernelCollector::StripSensitiveData(std::string *kernel_dump) {
193   // Strip any data that the user might not want sent up to the crash servers.
194   // We'll read in from kernel_dump and also place our output there.
195   //
196   // At the moment, the only sensitive data we strip is MAC addresses.
197 
198   // Get rid of things that look like MAC addresses, since they could possibly
199   // give information about where someone has been.  This is strings that look
200   // like this: 11:22:33:44:55:66
201   // Complications:
202   // - Within a given kernel_dump, want to be able to tell when the same MAC
203   //   was used more than once.  Thus, we'll consistently replace the first
204   //   MAC found with 00:00:00:00:00:01, the second with ...:02, etc.
205   // - ACPI commands look like MAC addresses.  We'll specifically avoid getting
206   //   rid of those.
207   std::ostringstream result;
208   std::string pre_mac_str;
209   std::string mac_str;
210   std::map<std::string, std::string> mac_map;
211   pcrecpp::StringPiece input(*kernel_dump);
212 
213   // This RE will find the next MAC address and can return us the data preceding
214   // the MAC and the MAC itself.
215   pcrecpp::RE mac_re("(.*?)("
216                      "[0-9a-fA-F][0-9a-fA-F]:"
217                      "[0-9a-fA-F][0-9a-fA-F]:"
218                      "[0-9a-fA-F][0-9a-fA-F]:"
219                      "[0-9a-fA-F][0-9a-fA-F]:"
220                      "[0-9a-fA-F][0-9a-fA-F]:"
221                      "[0-9a-fA-F][0-9a-fA-F])",
222                      pcrecpp::RE_Options()
223                        .set_multiline(true)
224                        .set_dotall(true));
225 
226   // This RE will identify when the 'pre_mac_str' shows that the MAC address
227   // was really an ACPI cmd.  The full string looks like this:
228   //   ata1.00: ACPI cmd ef/10:03:00:00:00:a0 (SET FEATURES) filtered out
229   pcrecpp::RE acpi_re("ACPI cmd ef/$",
230                       pcrecpp::RE_Options()
231                         .set_multiline(true)
232                         .set_dotall(true));
233 
234   // Keep consuming, building up a result string as we go.
235   while (mac_re.Consume(&input, &pre_mac_str, &mac_str)) {
236     if (acpi_re.PartialMatch(pre_mac_str)) {
237       // We really saw an ACPI command; add to result w/ no stripping.
238       result << pre_mac_str << mac_str;
239     } else {
240       // Found a MAC address; look up in our hash for the mapping.
241       std::string replacement_mac = mac_map[mac_str];
242       if (replacement_mac == "") {
243         // It wasn't present, so build up a replacement string.
244         int mac_id = mac_map.size();
245 
246         // Handle up to 2^32 unique MAC address; overkill, but doesn't hurt.
247         replacement_mac = StringPrintf("00:00:%02x:%02x:%02x:%02x",
248                                        (mac_id & 0xff000000) >> 24,
249                                        (mac_id & 0x00ff0000) >> 16,
250                                        (mac_id & 0x0000ff00) >> 8,
251                                        (mac_id & 0x000000ff));
252         mac_map[mac_str] = replacement_mac;
253       }
254 
255       // Dump the string before the MAC and the fake MAC address into result.
256       result << pre_mac_str << replacement_mac;
257     }
258   }
259 
260   // One last bit of data might still be in the input.
261   result << input;
262 
263   // We'll just assign right back to kernel_dump.
264   *kernel_dump = result.str();
265 }
266 
DumpDirMounted()267 bool KernelCollector::DumpDirMounted() {
268   struct stat st_parent;
269   if (stat(kDumpParentPath, &st_parent)) {
270     PLOG(WARNING) << "Could not stat " << kDumpParentPath;
271     return false;
272   }
273 
274   struct stat st_dump;
275   if (stat(kDumpPath, &st_dump)) {
276     PLOG(WARNING) << "Could not stat " << kDumpPath;
277     return false;
278   }
279 
280   if (st_parent.st_dev == st_dump.st_dev) {
281     LOG(WARNING) << "Dump dir " << kDumpPath << " not mounted";
282     return false;
283   }
284 
285   return true;
286 }
287 
Enable()288 bool KernelCollector::Enable() {
289   if (arch_ == kArchUnknown || arch_ >= kArchCount ||
290       kPCRegex[arch_] == nullptr) {
291     LOG(WARNING) << "KernelCollector does not understand this architecture";
292     return false;
293   }
294 
295   if (!DumpDirMounted()) {
296     LOG(WARNING) << "Kernel does not support crash dumping";
297     return false;
298   }
299 
300   // To enable crashes, we will eventually need to set
301   // the chnv bit in BIOS, but it does not yet work.
302   LOG(INFO) << "Enabling kernel crash handling";
303   is_enabled_ = true;
304   return true;
305 }
306 
307 // Hash a string to a number.  We define our own hash function to not
308 // be dependent on a C++ library that might change.  This function
309 // uses basically the same approach as tr1/functional_hash.h but with
310 // a larger prime number (16127 vs 131).
HashString(const std::string & input)311 static unsigned HashString(const std::string &input) {
312   unsigned hash = 0;
313   for (size_t i = 0; i < input.length(); ++i)
314     hash = hash * 16127 + input[i];
315   return hash;
316 }
317 
ProcessStackTrace(pcrecpp::StringPiece kernel_dump,bool print_diagnostics,unsigned * hash,float * last_stack_timestamp,bool * is_watchdog_crash)318 void KernelCollector::ProcessStackTrace(
319     pcrecpp::StringPiece kernel_dump,
320     bool print_diagnostics,
321     unsigned *hash,
322     float *last_stack_timestamp,
323     bool *is_watchdog_crash) {
324   pcrecpp::RE line_re("(.+)", pcrecpp::MULTILINE());
325   pcrecpp::RE stack_trace_start_re(std::string(kTimestampRegex) +
326         " (Call Trace|Backtrace):$");
327 
328   // Match lines such as the following and grab out "function_name".
329   // The ? may or may not be present.
330   //
331   // For ARM:
332   // <4>[ 3498.731164] [<c0057220>] ? (function_name+0x20/0x2c) from
333   // [<c018062c>] (foo_bar+0xdc/0x1bc)
334   //
335   // For MIPS:
336   // <5>[ 3378.656000] [<804010f0>] lkdtm_do_action+0x68/0x3f8
337   //
338   // For X86:
339   // <4>[ 6066.849504]  [<7937bcee>] ? function_name+0x66/0x6c
340   //
341   pcrecpp::RE stack_entry_re(std::string(kTimestampRegex) +
342     "\\s+\\[<[[:xdigit:]]+>\\]"      // Matches "  [<7937bcee>]"
343     "([\\s\\?(]+)"                   // Matches " ? (" (ARM) or " ? " (X86)
344     "([^\\+ )]+)");                  // Matches until delimiter reached
345   std::string line;
346   std::string hashable;
347   std::string previous_hashable;
348   bool is_watchdog = false;
349 
350   *hash = 0;
351   *last_stack_timestamp = 0;
352 
353   // Find the last and second-to-last stack traces.  The latter is used when
354   // the panic is from a watchdog timeout.
355   while (line_re.FindAndConsume(&kernel_dump, &line)) {
356     std::string certainty;
357     std::string function_name;
358     if (stack_trace_start_re.PartialMatch(line, last_stack_timestamp)) {
359       if (print_diagnostics) {
360         printf("Stack trace starting.%s\n",
361                hashable.empty() ? "" : "  Saving prior trace.");
362       }
363       previous_hashable = hashable;
364       hashable.clear();
365       is_watchdog = false;
366     } else if (stack_entry_re.PartialMatch(line,
367                                            last_stack_timestamp,
368                                            &certainty,
369                                            &function_name)) {
370       bool is_certain = certainty.find('?') == std::string::npos;
371       if (print_diagnostics) {
372         printf("@%f: stack entry for %s (%s)\n",
373                *last_stack_timestamp,
374                function_name.c_str(),
375                is_certain ? "certain" : "uncertain");
376       }
377       // Do not include any uncertain (prefixed by '?') frames in our hash.
378       if (!is_certain)
379         continue;
380       if (!hashable.empty())
381         hashable.append("|");
382       if (function_name == "watchdog_timer_fn" ||
383           function_name == "watchdog") {
384         is_watchdog = true;
385       }
386       hashable.append(function_name);
387     }
388   }
389 
390   // If the last stack trace contains a watchdog function we assume the panic
391   // is from the watchdog timer, and we hash the previous stack trace rather
392   // than the last one, assuming that the previous stack is that of the hung
393   // thread.
394   //
395   // In addition, if the hashable is empty (meaning all frames are uncertain,
396   // for whatever reason) also use the previous frame, as it cannot be any
397   // worse.
398   if (is_watchdog || hashable.empty()) {
399     hashable = previous_hashable;
400   }
401 
402   *hash = HashString(hashable);
403   *is_watchdog_crash = is_watchdog;
404 
405   if (print_diagnostics) {
406     printf("Hash based on stack trace: \"%s\" at %f.\n",
407            hashable.c_str(), *last_stack_timestamp);
408   }
409 }
410 
411 // static
GetCompilerArch()412 KernelCollector::ArchKind KernelCollector::GetCompilerArch() {
413 #if defined(COMPILER_GCC) && defined(ARCH_CPU_ARM_FAMILY)
414   return kArchArm;
415 #elif defined(COMPILER_GCC) && defined(ARCH_CPU_MIPS_FAMILY)
416   return kArchMips;
417 #elif defined(COMPILER_GCC) && defined(ARCH_CPU_X86_64)
418   return kArchX86_64;
419 #elif defined(COMPILER_GCC) && defined(ARCH_CPU_X86_FAMILY)
420   return kArchX86;
421 #else
422   return kArchUnknown;
423 #endif
424 }
425 
FindCrashingFunction(pcrecpp::StringPiece kernel_dump,bool print_diagnostics,float stack_trace_timestamp,std::string * crashing_function)426 bool KernelCollector::FindCrashingFunction(
427   pcrecpp::StringPiece kernel_dump,
428   bool print_diagnostics,
429   float stack_trace_timestamp,
430   std::string *crashing_function) {
431   float timestamp = 0;
432 
433   // Use the correct regex for this architecture.
434   pcrecpp::RE eip_re(std::string(kTimestampRegex) + kPCRegex[arch_],
435                      pcrecpp::MULTILINE());
436 
437   while (eip_re.FindAndConsume(&kernel_dump, &timestamp, crashing_function)) {
438     if (print_diagnostics) {
439       printf("@%f: found crashing function %s\n",
440              timestamp,
441              crashing_function->c_str());
442     }
443   }
444   if (timestamp == 0) {
445     if (print_diagnostics) {
446       printf("Found no crashing function.\n");
447     }
448     return false;
449   }
450   if (stack_trace_timestamp != 0 &&
451       abs(static_cast<int>(stack_trace_timestamp - timestamp))
452         > kSignatureTimestampWindow) {
453     if (print_diagnostics) {
454       printf("Found crashing function but not within window.\n");
455     }
456     return false;
457   }
458   if (print_diagnostics) {
459     printf("Found crashing function %s\n", crashing_function->c_str());
460   }
461   return true;
462 }
463 
FindPanicMessage(pcrecpp::StringPiece kernel_dump,bool print_diagnostics,std::string * panic_message)464 bool KernelCollector::FindPanicMessage(pcrecpp::StringPiece kernel_dump,
465                                        bool print_diagnostics,
466                                        std::string *panic_message) {
467   // Match lines such as the following and grab out "Fatal exception"
468   // <0>[  342.841135] Kernel panic - not syncing: Fatal exception
469   pcrecpp::RE kernel_panic_re(std::string(kTimestampRegex) +
470                               " Kernel panic[^\\:]*\\:\\s*(.*)",
471                               pcrecpp::MULTILINE());
472   float timestamp = 0;
473   while (kernel_panic_re.FindAndConsume(&kernel_dump,
474                                         &timestamp,
475                                         panic_message)) {
476     if (print_diagnostics) {
477       printf("@%f: panic message %s\n",
478              timestamp,
479              panic_message->c_str());
480     }
481   }
482   if (timestamp == 0) {
483     if (print_diagnostics) {
484       printf("Found no panic message.\n");
485     }
486     return false;
487   }
488   return true;
489 }
490 
ComputeKernelStackSignature(const std::string & kernel_dump,std::string * kernel_signature,bool print_diagnostics)491 bool KernelCollector::ComputeKernelStackSignature(
492     const std::string &kernel_dump,
493     std::string *kernel_signature,
494     bool print_diagnostics) {
495   unsigned stack_hash = 0;
496   float last_stack_timestamp = 0;
497   std::string human_string;
498   bool is_watchdog_crash;
499 
500   ProcessStackTrace(kernel_dump,
501                     print_diagnostics,
502                     &stack_hash,
503                     &last_stack_timestamp,
504                     &is_watchdog_crash);
505 
506   if (!FindCrashingFunction(kernel_dump,
507                             print_diagnostics,
508                             last_stack_timestamp,
509                             &human_string)) {
510     if (!FindPanicMessage(kernel_dump, print_diagnostics, &human_string)) {
511       if (print_diagnostics) {
512         printf("Found no human readable string, using empty string.\n");
513       }
514       human_string.clear();
515     }
516   }
517 
518   if (human_string.empty() && stack_hash == 0) {
519     if (print_diagnostics) {
520       printf("Found neither a stack nor a human readable string, failing.\n");
521     }
522     return false;
523   }
524 
525   human_string = human_string.substr(0, kMaxHumanStringLength);
526   *kernel_signature = StringPrintf("%s-%s%s-%08X",
527                                    kKernelExecName,
528                                    (is_watchdog_crash ? "(HANG)-" : ""),
529                                    human_string.c_str(),
530                                    stack_hash);
531   return true;
532 }
533 
Collect()534 bool KernelCollector::Collect() {
535   std::string kernel_dump;
536   FilePath root_crash_directory;
537 
538   if (!LoadParameters()) {
539     return false;
540   }
541   if (!LoadPreservedDump(&kernel_dump)) {
542     return false;
543   }
544   StripSensitiveData(&kernel_dump);
545   if (kernel_dump.empty()) {
546     return false;
547   }
548   std::string signature;
549   if (!ComputeKernelStackSignature(kernel_dump, &signature, false)) {
550     signature = kDefaultKernelStackSignature;
551   }
552 
553   std::string reason = "handling";
554   bool feedback = true;
555   if (IsDeveloperImage()) {
556     reason = "developer build - always dumping";
557     feedback = true;
558   } else if (!is_feedback_allowed_function_()) {
559     reason = "ignoring - no consent";
560     feedback = false;
561   }
562 
563   LOG(INFO) << "Received prior crash notification from "
564             << "kernel (signature " << signature << ") (" << reason << ")";
565 
566   if (feedback) {
567     count_crash_function_();
568 
569     if (!GetCreatedCrashDirectoryByEuid(kRootUid,
570                                         &root_crash_directory,
571                                         nullptr)) {
572       return true;
573     }
574 
575     std::string dump_basename =
576         FormatDumpBasename(kKernelExecName, time(nullptr), kKernelPid);
577     FilePath kernel_crash_path = root_crash_directory.Append(
578         StringPrintf("%s.kcrash", dump_basename.c_str()));
579 
580     // We must use WriteNewFile instead of base::WriteFile as we
581     // do not want to write with root access to a symlink that an attacker
582     // might have created.
583     if (WriteNewFile(kernel_crash_path,
584                      kernel_dump.data(),
585                      kernel_dump.length()) !=
586         static_cast<int>(kernel_dump.length())) {
587       LOG(INFO) << "Failed to write kernel dump to "
588                 << kernel_crash_path.value().c_str();
589       return true;
590     }
591 
592     AddCrashMetaData(kKernelSignatureKey, signature);
593     WriteCrashMetaData(
594         root_crash_directory.Append(
595             StringPrintf("%s.meta", dump_basename.c_str())),
596         kKernelExecName,
597         kernel_crash_path.value());
598 
599     LOG(INFO) << "Stored kcrash to " << kernel_crash_path.value();
600   }
601 
602   return true;
603 }
604