1 // Copyright (c) 2010 Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 //     * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 // minidump.h: A minidump reader.
31 //
32 // The basic structure of this module tracks the structure of the minidump
33 // file itself.  At the top level, a minidump file is represented by a
34 // Minidump object.  Like most other classes in this module, Minidump
35 // provides a Read method that initializes the object with information from
36 // the file.  Most of the classes in this file are wrappers around the
37 // "raw" structures found in the minidump file itself, and defined in
38 // minidump_format.h.  For example, each thread is represented by a
39 // MinidumpThread object, whose parameters are specified in an MDRawThread
40 // structure.  A properly byte-swapped MDRawThread can be obtained from a
41 // MinidumpThread easily by calling its thread() method.
42 //
43 // Most of the module lazily reads only the portion of the minidump file
44 // necessary to fulfill the user's request.  Calling Minidump::Read
45 // only reads the minidump's directory.  The thread list is not read until
46 // it is needed, and even once it's read, the memory regions for each
47 // thread's stack aren't read until they're needed.  This strategy avoids
48 // unnecessary file input, and allocating memory for data in which the user
49 // has no interest.  Note that although memory allocations for a typical
50 // minidump file are not particularly large, it is possible for legitimate
51 // minidumps to be sizable.  A full-memory minidump, for example, contains
52 // a snapshot of the entire mapped memory space.  Even a normal minidump,
53 // with stack memory only, can be large if, for example, the dump was
54 // generated in response to a crash that occurred due to an infinite-
55 // recursion bug that caused the stack's limits to be exceeded.  Finally,
56 // some users of this library will unfortunately find themselves in the
57 // position of having to process potentially-hostile minidumps that might
58 // attempt to cause problems by forcing the minidump processor to over-
59 // allocate memory.
60 //
61 // Memory management in this module is based on a strict
62 // you-don't-own-anything policy.  The only object owned by the user is
63 // the top-level Minidump object, the creation and destruction of which
64 // must be the user's own responsibility.  All other objects obtained
65 // through interaction with this module are ultimately owned by the
66 // Minidump object, and will be freed upon the Minidump object's destruction.
67 // Because memory regions can potentially involve large allocations, a
68 // FreeMemory method is provided by MinidumpMemoryRegion, allowing the user
69 // to release data when it is no longer needed.  Use of this method is
70 // optional but recommended.  If freed data is later required, it will
71 // be read back in from the minidump file again.
72 //
73 // There is one exception to this memory management policy:
74 // Minidump::ReadString will return a string object to the user, and the user
75 // is responsible for its deletion.
76 //
77 // Author: Mark Mentovai
78 
79 #ifndef GOOGLE_BREAKPAD_PROCESSOR_MINIDUMP_H__
80 #define GOOGLE_BREAKPAD_PROCESSOR_MINIDUMP_H__
81 
82 #ifndef _WIN32
83 #include <unistd.h>
84 #endif
85 
86 #include <iostream>
87 #include <map>
88 #include <string>
89 #include <vector>
90 
91 #include "common/using_std_string.h"
92 #include "google_breakpad/processor/code_module.h"
93 #include "google_breakpad/processor/code_modules.h"
94 #include "google_breakpad/processor/dump_context.h"
95 #include "google_breakpad/processor/dump_object.h"
96 #include "google_breakpad/processor/memory_region.h"
97 
98 
99 namespace google_breakpad {
100 
101 
102 using std::map;
103 using std::vector;
104 
105 
106 class Minidump;
107 template<typename AddressType, typename EntryType> class RangeMap;
108 
109 
110 // MinidumpObject is the base of all Minidump* objects except for Minidump
111 // itself.
112 class MinidumpObject : public DumpObject {
113  public:
~MinidumpObject()114   virtual ~MinidumpObject() {}
115 
116  protected:
117   explicit MinidumpObject(Minidump* minidump);
118 
119   // Refers to the Minidump object that is the ultimate parent of this
120   // Some MinidumpObjects are owned by other MinidumpObjects, but at the
121   // root of the ownership tree is always a Minidump.  The Minidump object
122   // is kept here for access to its seeking and reading facilities, and
123   // for access to data about the minidump file itself, such as whether
124   // it should be byte-swapped.
125   Minidump* minidump_;
126 };
127 
128 
129 // This class exists primarily to provide a virtual destructor in a base
130 // class common to all objects that might be stored in
131 // Minidump::mStreamObjects.  Some object types will never be stored in
132 // Minidump::mStreamObjects, but are represented as streams and adhere to the
133 // same interface, and may be derived from this class.
134 class MinidumpStream : public MinidumpObject {
135  public:
~MinidumpStream()136   virtual ~MinidumpStream() {}
137 
138  protected:
139   explicit MinidumpStream(Minidump* minidump);
140 
141  private:
142   // Populate (and validate) the MinidumpStream.  minidump_ is expected
143   // to be positioned at the beginning of the stream, so that the next
144   // read from the minidump will be at the beginning of the stream.
145   // expected_size should be set to the stream's length as contained in
146   // the MDRawDirectory record or other identifying record.  A class
147   // that implements MinidumpStream can compare expected_size to a
148   // known size as an integrity check.
149   virtual bool Read(uint32_t expected_size) = 0;
150 };
151 
152 
153 // MinidumpContext carries a CPU-specific MDRawContext structure, which
154 // contains CPU context such as register states.  Each thread has its
155 // own context, and the exception record, if present, also has its own
156 // context.  Note that if the exception record is present, the context it
157 // refers to is probably what the user wants to use for the exception
158 // thread, instead of that thread's own context.  The exception thread's
159 // context (as opposed to the exception record's context) will contain
160 // context for the exception handler (which performs minidump generation),
161 // and not the context that caused the exception (which is probably what the
162 // user wants).
163 class MinidumpContext : public DumpContext {
164  public:
165   virtual ~MinidumpContext();
166 
167  protected:
168   explicit MinidumpContext(Minidump* minidump);
169 
170  private:
171   friend class MinidumpThread;
172   friend class MinidumpException;
173 
174   bool Read(uint32_t expected_size);
175 
176   // If the minidump contains a SYSTEM_INFO_STREAM, makes sure that the
177   // system info stream gives an appropriate CPU type matching the context
178   // CPU type in context_cpu_type.  Returns false if the CPU type does not
179   // match.  Returns true if the CPU type matches or if the minidump does
180   // not contain a system info stream.
181   bool CheckAgainstSystemInfo(uint32_t context_cpu_type);
182 
183   // Refers to the Minidump object that is the ultimate parent of this
184   // Some MinidumpObjects are owned by other MinidumpObjects, but at the
185   // root of the ownership tree is always a Minidump.  The Minidump object
186   // is kept here for access to its seeking and reading facilities, and
187   // for access to data about the minidump file itself, such as whether
188   // it should be byte-swapped.
189   Minidump* minidump_;
190 };
191 
192 
193 // MinidumpMemoryRegion does not wrap any MDRaw structure, and only contains
194 // a reference to an MDMemoryDescriptor.  This object is intended to wrap
195 // portions of a minidump file that contain memory dumps.  In normal
196 // minidumps, each MinidumpThread owns a MinidumpMemoryRegion corresponding
197 // to the thread's stack memory.  MinidumpMemoryList also gives access to
198 // memory regions in its list as MinidumpMemoryRegions.  This class
199 // adheres to MemoryRegion so that it may be used as a data provider to
200 // the Stackwalker family of classes.
201 class MinidumpMemoryRegion : public MinidumpObject,
202                              public MemoryRegion {
203  public:
204   virtual ~MinidumpMemoryRegion();
205 
set_max_bytes(uint32_t max_bytes)206   static void set_max_bytes(uint32_t max_bytes) { max_bytes_ = max_bytes; }
max_bytes()207   static uint32_t max_bytes() { return max_bytes_; }
208 
209   // Returns a pointer to the base of the memory region.  Returns the
210   // cached value if available, otherwise, reads the minidump file and
211   // caches the memory region.
212   const uint8_t* GetMemory() const;
213 
214   // The address of the base of the memory region.
215   uint64_t GetBase() const;
216 
217   // The size, in bytes, of the memory region.
218   uint32_t GetSize() const;
219 
220   // Frees the cached memory region, if cached.
221   void FreeMemory();
222 
223   // Obtains the value of memory at the pointer specified by address.
224   bool GetMemoryAtAddress(uint64_t address, uint8_t*  value) const;
225   bool GetMemoryAtAddress(uint64_t address, uint16_t* value) const;
226   bool GetMemoryAtAddress(uint64_t address, uint32_t* value) const;
227   bool GetMemoryAtAddress(uint64_t address, uint64_t* value) const;
228 
229   // Print a human-readable representation of the object to stdout.
230   void Print() const;
231 
232  protected:
233   explicit MinidumpMemoryRegion(Minidump* minidump);
234 
235  private:
236   friend class MinidumpThread;
237   friend class MinidumpMemoryList;
238 
239   // Identify the base address and size of the memory region, and the
240   // location it may be found in the minidump file.
241   void SetDescriptor(MDMemoryDescriptor* descriptor);
242 
243   // Implementation for GetMemoryAtAddress
244   template<typename T> bool GetMemoryAtAddressInternal(uint64_t address,
245                                                        T*        value) const;
246 
247   // The largest memory region that will be read from a minidump.  The
248   // default is 1MB.
249   static uint32_t max_bytes_;
250 
251   // Base address and size of the memory region, and its position in the
252   // minidump file.
253   MDMemoryDescriptor* descriptor_;
254 
255   // Cached memory.
256   mutable vector<uint8_t>* memory_;
257 };
258 
259 
260 // MinidumpThread contains information about a thread of execution,
261 // including a snapshot of the thread's stack and CPU context.  For
262 // the thread that caused an exception, the context carried by
263 // MinidumpException is probably desired instead of the CPU context
264 // provided here.
265 // Note that a MinidumpThread may be valid() even if it does not
266 // contain a memory region or context.
267 class MinidumpThread : public MinidumpObject {
268  public:
269   virtual ~MinidumpThread();
270 
thread()271   const MDRawThread* thread() const { return valid_ ? &thread_ : NULL; }
272   // GetMemory may return NULL even if the MinidumpThread is valid,
273   // if the thread memory cannot be read.
274   virtual MinidumpMemoryRegion* GetMemory();
275   // GetContext may return NULL even if the MinidumpThread is valid.
276   virtual MinidumpContext* GetContext();
277 
278   // The thread ID is used to determine if a thread is the exception thread,
279   // so a special getter is provided to retrieve this data from the
280   // MDRawThread structure.  Returns false if the thread ID cannot be
281   // determined.
282   virtual bool GetThreadID(uint32_t *thread_id) const;
283 
284   // Print a human-readable representation of the object to stdout.
285   void Print();
286 
287   // Returns the start address of the thread stack memory region.  Returns 0 if
288   // MinidumpThread is invalid.  Note that this method can be called even when
289   // the thread memory cannot be read and GetMemory returns NULL.
290   virtual uint64_t GetStartOfStackMemoryRange() const;
291 
292  protected:
293   explicit MinidumpThread(Minidump* minidump);
294 
295  private:
296   // These objects are managed by MinidumpThreadList.
297   friend class MinidumpThreadList;
298 
299   // This works like MinidumpStream::Read, but is driven by
300   // MinidumpThreadList.  No size checking is done, because
301   // MinidumpThreadList handles that directly.
302   bool Read();
303 
304   MDRawThread           thread_;
305   MinidumpMemoryRegion* memory_;
306   MinidumpContext*      context_;
307 };
308 
309 
310 // MinidumpThreadList contains all of the threads (as MinidumpThreads) in
311 // a process.
312 class MinidumpThreadList : public MinidumpStream {
313  public:
314   virtual ~MinidumpThreadList();
315 
set_max_threads(uint32_t max_threads)316   static void set_max_threads(uint32_t max_threads) {
317     max_threads_ = max_threads;
318   }
max_threads()319   static uint32_t max_threads() { return max_threads_; }
320 
thread_count()321   virtual unsigned int thread_count() const {
322     return valid_ ? thread_count_ : 0;
323   }
324 
325   // Sequential access to threads.
326   virtual MinidumpThread* GetThreadAtIndex(unsigned int index) const;
327 
328   // Random access to threads.
329   MinidumpThread* GetThreadByID(uint32_t thread_id);
330 
331   // Print a human-readable representation of the object to stdout.
332   void Print();
333 
334  protected:
335   explicit MinidumpThreadList(Minidump* aMinidump);
336 
337  private:
338   friend class Minidump;
339 
340   typedef map<uint32_t, MinidumpThread*> IDToThreadMap;
341   typedef vector<MinidumpThread> MinidumpThreads;
342 
343   static const uint32_t kStreamType = MD_THREAD_LIST_STREAM;
344 
345   bool Read(uint32_t aExpectedSize);
346 
347   // The largest number of threads that will be read from a minidump.  The
348   // default is 256.
349   static uint32_t max_threads_;
350 
351   // Access to threads using the thread ID as the key.
352   IDToThreadMap    id_to_thread_map_;
353 
354   // The list of threads.
355   MinidumpThreads* threads_;
356   uint32_t        thread_count_;
357 };
358 
359 
360 // MinidumpModule wraps MDRawModule, which contains information about loaded
361 // code modules.  Access is provided to various data referenced indirectly
362 // by MDRawModule, such as the module's name and a specification for where
363 // to locate debugging information for the module.
364 class MinidumpModule : public MinidumpObject,
365                        public CodeModule {
366  public:
367   virtual ~MinidumpModule();
368 
set_max_cv_bytes(uint32_t max_cv_bytes)369   static void set_max_cv_bytes(uint32_t max_cv_bytes) {
370     max_cv_bytes_ = max_cv_bytes;
371   }
max_cv_bytes()372   static uint32_t max_cv_bytes() { return max_cv_bytes_; }
373 
set_max_misc_bytes(uint32_t max_misc_bytes)374   static void set_max_misc_bytes(uint32_t max_misc_bytes) {
375     max_misc_bytes_ = max_misc_bytes;
376   }
max_misc_bytes()377   static uint32_t max_misc_bytes() { return max_misc_bytes_; }
378 
module()379   const MDRawModule* module() const { return valid_ ? &module_ : NULL; }
380 
381   // CodeModule implementation
base_address()382   virtual uint64_t base_address() const {
383     return valid_ ? module_.base_of_image : static_cast<uint64_t>(-1);
384   }
size()385   virtual uint64_t size() const { return valid_ ? module_.size_of_image : 0; }
386   virtual string code_file() const;
387   virtual string code_identifier() const;
388   virtual string debug_file() const;
389   virtual string debug_identifier() const;
390   virtual string version() const;
391   virtual const CodeModule* Copy() const;
392 
393   // The CodeView record, which contains information to locate the module's
394   // debugging information (pdb).  This is returned as uint8_t* because
395   // the data can be of types MDCVInfoPDB20* or MDCVInfoPDB70*, or it may be
396   // of a type unknown to Breakpad, in which case the raw data will still be
397   // returned but no byte-swapping will have been performed.  Check the
398   // record's signature in the first four bytes to differentiate between
399   // the various types.  Current toolchains generate modules which carry
400   // MDCVInfoPDB70 by default.  Returns a pointer to the CodeView record on
401   // success, and NULL on failure.  On success, the optional |size| argument
402   // is set to the size of the CodeView record.
403   const uint8_t* GetCVRecord(uint32_t* size);
404 
405   // The miscellaneous debug record, which is obsolete.  Current toolchains
406   // do not generate this type of debugging information (dbg), and this
407   // field is not expected to be present.  Returns a pointer to the debugging
408   // record on success, and NULL on failure.  On success, the optional |size|
409   // argument is set to the size of the debugging record.
410   const MDImageDebugMisc* GetMiscRecord(uint32_t* size);
411 
412   // Print a human-readable representation of the object to stdout.
413   void Print();
414 
415  private:
416   // These objects are managed by MinidumpModuleList.
417   friend class MinidumpModuleList;
418 
419   explicit MinidumpModule(Minidump* minidump);
420 
421   // This works like MinidumpStream::Read, but is driven by
422   // MinidumpModuleList.  No size checking is done, because
423   // MinidumpModuleList handles that directly.
424   bool Read();
425 
426   // Reads indirectly-referenced data, including the module name, CodeView
427   // record, and miscellaneous debugging record.  This is necessary to allow
428   // MinidumpModuleList to fully construct MinidumpModule objects without
429   // requiring seeks to read a contiguous set of MinidumpModule objects.
430   // All auxiliary data should be available when Read is called, in order to
431   // allow the CodeModule getters to be const methods.
432   bool ReadAuxiliaryData();
433 
434   // The largest number of bytes that will be read from a minidump for a
435   // CodeView record or miscellaneous debugging record, respectively.  The
436   // default for each is 1024.
437   static uint32_t max_cv_bytes_;
438   static uint32_t max_misc_bytes_;
439 
440   // True after a successful Read.  This is different from valid_, which is
441   // not set true until ReadAuxiliaryData also completes successfully.
442   // module_valid_ is only used by ReadAuxiliaryData and the functions it
443   // calls to determine whether the object is ready for auxiliary data to
444   // be read.
445   bool              module_valid_;
446 
447   // True if debug info was read from the module.  Certain modules
448   // may contain debug records in formats we don't support,
449   // so we can just set this to false to ignore them.
450   bool              has_debug_info_;
451 
452   MDRawModule       module_;
453 
454   // Cached module name.
455   const string*     name_;
456 
457   // Cached CodeView record - this is MDCVInfoPDB20 or (likely)
458   // MDCVInfoPDB70, or possibly something else entirely.  Stored as a uint8_t
459   // because the structure contains a variable-sized string and its exact
460   // size cannot be known until it is processed.
461   vector<uint8_t>* cv_record_;
462 
463   // If cv_record_ is present, cv_record_signature_ contains a copy of the
464   // CodeView record's first four bytes, for ease of determinining the
465   // type of structure that cv_record_ contains.
466   uint32_t cv_record_signature_;
467 
468   // Cached MDImageDebugMisc (usually not present), stored as uint8_t
469   // because the structure contains a variable-sized string and its exact
470   // size cannot be known until it is processed.
471   vector<uint8_t>* misc_record_;
472 };
473 
474 
475 // MinidumpModuleList contains all of the loaded code modules for a process
476 // in the form of MinidumpModules.  It maintains a map of these modules
477 // so that it may easily provide a code module corresponding to a specific
478 // address.
479 class MinidumpModuleList : public MinidumpStream,
480                            public CodeModules {
481  public:
482   virtual ~MinidumpModuleList();
483 
set_max_modules(uint32_t max_modules)484   static void set_max_modules(uint32_t max_modules) {
485     max_modules_ = max_modules;
486   }
max_modules()487   static uint32_t max_modules() { return max_modules_; }
488 
489   // CodeModules implementation.
module_count()490   virtual unsigned int module_count() const {
491     return valid_ ? module_count_ : 0;
492   }
493   virtual const MinidumpModule* GetModuleForAddress(uint64_t address) const;
494   virtual const MinidumpModule* GetMainModule() const;
495   virtual const MinidumpModule* GetModuleAtSequence(
496       unsigned int sequence) const;
497   virtual const MinidumpModule* GetModuleAtIndex(unsigned int index) const;
498   virtual const CodeModules* Copy() const;
499 
500   // Print a human-readable representation of the object to stdout.
501   void Print();
502 
503  protected:
504   explicit MinidumpModuleList(Minidump* minidump);
505 
506  private:
507   friend class Minidump;
508 
509   typedef vector<MinidumpModule> MinidumpModules;
510 
511   static const uint32_t kStreamType = MD_MODULE_LIST_STREAM;
512 
513   bool Read(uint32_t expected_size);
514 
515   // The largest number of modules that will be read from a minidump.  The
516   // default is 1024.
517   static uint32_t max_modules_;
518 
519   // Access to modules using addresses as the key.
520   RangeMap<uint64_t, unsigned int> *range_map_;
521 
522   MinidumpModules *modules_;
523   uint32_t module_count_;
524 };
525 
526 
527 // MinidumpMemoryList corresponds to a minidump's MEMORY_LIST_STREAM stream,
528 // which references the snapshots of all of the memory regions contained
529 // within the minidump.  For a normal minidump, this includes stack memory
530 // (also referenced by each MinidumpThread, in fact, the MDMemoryDescriptors
531 // here and in MDRawThread both point to exactly the same data in a
532 // minidump file, conserving space), as well as a 256-byte snapshot of memory
533 // surrounding the instruction pointer in the case of an exception.  Other
534 // types of minidumps may contain significantly more memory regions.  Full-
535 // memory minidumps contain all of a process' mapped memory.
536 class MinidumpMemoryList : public MinidumpStream {
537  public:
538   virtual ~MinidumpMemoryList();
539 
set_max_regions(uint32_t max_regions)540   static void set_max_regions(uint32_t max_regions) {
541     max_regions_ = max_regions;
542   }
max_regions()543   static uint32_t max_regions() { return max_regions_; }
544 
region_count()545   unsigned int region_count() const { return valid_ ? region_count_ : 0; }
546 
547   // Sequential access to memory regions.
548   MinidumpMemoryRegion* GetMemoryRegionAtIndex(unsigned int index);
549 
550   // Random access to memory regions.  Returns the region encompassing
551   // the address identified by address.
552   virtual MinidumpMemoryRegion* GetMemoryRegionForAddress(uint64_t address);
553 
554   // Print a human-readable representation of the object to stdout.
555   void Print();
556 
557  private:
558   friend class Minidump;
559   friend class MockMinidumpMemoryList;
560 
561   typedef vector<MDMemoryDescriptor>   MemoryDescriptors;
562   typedef vector<MinidumpMemoryRegion> MemoryRegions;
563 
564   static const uint32_t kStreamType = MD_MEMORY_LIST_STREAM;
565 
566   explicit MinidumpMemoryList(Minidump* minidump);
567 
568   bool Read(uint32_t expected_size);
569 
570   // The largest number of memory regions that will be read from a minidump.
571   // The default is 256.
572   static uint32_t max_regions_;
573 
574   // Access to memory regions using addresses as the key.
575   RangeMap<uint64_t, unsigned int> *range_map_;
576 
577   // The list of descriptors.  This is maintained separately from the list
578   // of regions, because MemoryRegion doesn't own its MemoryDescriptor, it
579   // maintains a pointer to it.  descriptors_ provides the storage for this
580   // purpose.
581   MemoryDescriptors *descriptors_;
582 
583   // The list of regions.
584   MemoryRegions *regions_;
585   uint32_t region_count_;
586 };
587 
588 
589 // MinidumpException wraps MDRawExceptionStream, which contains information
590 // about the exception that caused the minidump to be generated, if the
591 // minidump was generated in an exception handler called as a result of an
592 // exception.  It also provides access to a MinidumpContext object, which
593 // contains the CPU context for the exception thread at the time the exception
594 // occurred.
595 class MinidumpException : public MinidumpStream {
596  public:
597   virtual ~MinidumpException();
598 
exception()599   const MDRawExceptionStream* exception() const {
600     return valid_ ? &exception_ : NULL;
601   }
602 
603   // The thread ID is used to determine if a thread is the exception thread,
604   // so a special getter is provided to retrieve this data from the
605   // MDRawExceptionStream structure.  Returns false if the thread ID cannot
606   // be determined.
607   bool GetThreadID(uint32_t *thread_id) const;
608 
609   MinidumpContext* GetContext();
610 
611   // Print a human-readable representation of the object to stdout.
612   void Print();
613 
614  private:
615   friend class Minidump;
616 
617   static const uint32_t kStreamType = MD_EXCEPTION_STREAM;
618 
619   explicit MinidumpException(Minidump* minidump);
620 
621   bool Read(uint32_t expected_size);
622 
623   MDRawExceptionStream exception_;
624   MinidumpContext*     context_;
625 };
626 
627 // MinidumpAssertion wraps MDRawAssertionInfo, which contains information
628 // about an assertion that caused the minidump to be generated.
629 class MinidumpAssertion : public MinidumpStream {
630  public:
631   virtual ~MinidumpAssertion();
632 
assertion()633   const MDRawAssertionInfo* assertion() const {
634     return valid_ ? &assertion_ : NULL;
635   }
636 
expression()637   string expression() const {
638     return valid_ ? expression_ : "";
639   }
640 
function()641   string function() const {
642     return valid_ ? function_ : "";
643   }
644 
file()645   string file() const {
646     return valid_ ? file_ : "";
647   }
648 
649   // Print a human-readable representation of the object to stdout.
650   void Print();
651 
652  private:
653   friend class Minidump;
654 
655   static const uint32_t kStreamType = MD_ASSERTION_INFO_STREAM;
656 
657   explicit MinidumpAssertion(Minidump* minidump);
658 
659   bool Read(uint32_t expected_size);
660 
661   MDRawAssertionInfo assertion_;
662   string expression_;
663   string function_;
664   string file_;
665 };
666 
667 
668 // MinidumpSystemInfo wraps MDRawSystemInfo and provides information about
669 // the system on which the minidump was generated.  See also MinidumpMiscInfo.
670 class MinidumpSystemInfo : public MinidumpStream {
671  public:
672   virtual ~MinidumpSystemInfo();
673 
system_info()674   const MDRawSystemInfo* system_info() const {
675     return valid_ ? &system_info_ : NULL;
676   }
677 
678   // GetOS and GetCPU return textual representations of the operating system
679   // and CPU that produced the minidump.  Unlike most other Minidump* methods,
680   // they return string objects, not weak pointers.  Defined values for
681   // GetOS() are "mac", "windows", and "linux".  Defined values for GetCPU
682   // are "x86" and "ppc".  These methods return an empty string when their
683   // values are unknown.
684   string GetOS();
685   string GetCPU();
686 
687   // I don't know what CSD stands for, but this field is documented as
688   // returning a textual representation of the OS service pack.  On other
689   // platforms, this provides additional information about an OS version
690   // level beyond major.minor.micro.  Returns NULL if unknown.
691   const string* GetCSDVersion();
692 
693   // If a CPU vendor string can be determined, returns a pointer to it,
694   // otherwise, returns NULL.  CPU vendor strings can be determined from
695   // x86 CPUs with CPUID 0.
696   const string* GetCPUVendor();
697 
698   // Print a human-readable representation of the object to stdout.
699   void Print();
700 
701  protected:
702   explicit MinidumpSystemInfo(Minidump* minidump);
703   MDRawSystemInfo system_info_;
704 
705   // Textual representation of the OS service pack, for minidumps produced
706   // by MiniDumpWriteDump on Windows.
707   const string* csd_version_;
708 
709  private:
710   friend class Minidump;
711 
712   static const uint32_t kStreamType = MD_SYSTEM_INFO_STREAM;
713 
714   bool Read(uint32_t expected_size);
715 
716   // A string identifying the CPU vendor, if known.
717   const string* cpu_vendor_;
718 };
719 
720 
721 // MinidumpMiscInfo wraps MDRawMiscInfo and provides information about
722 // the process that generated the minidump, and optionally additional system
723 // information.  See also MinidumpSystemInfo.
724 class MinidumpMiscInfo : public MinidumpStream {
725  public:
misc_info()726   const MDRawMiscInfo* misc_info() const {
727     return valid_ ? &misc_info_ : NULL;
728   }
729 
730   // Print a human-readable representation of the object to stdout.
731   void Print();
732 
733  private:
734   friend class Minidump;
735   friend class TestMinidumpMiscInfo;
736 
737   static const uint32_t kStreamType = MD_MISC_INFO_STREAM;
738 
739   explicit MinidumpMiscInfo(Minidump* minidump_);
740 
741   bool Read(uint32_t expected_size_);
742 
743   MDRawMiscInfo misc_info_;
744 
745   // Populated by Read.  Contains the converted strings from the corresponding
746   // UTF-16 fields in misc_info_
747   string standard_name_;
748   string daylight_name_;
749   string build_string_;
750   string dbg_bld_str_;
751 };
752 
753 
754 // MinidumpBreakpadInfo wraps MDRawBreakpadInfo, which is an optional stream in
755 // a minidump that provides additional information about the process state
756 // at the time the minidump was generated.
757 class MinidumpBreakpadInfo : public MinidumpStream {
758  public:
breakpad_info()759   const MDRawBreakpadInfo* breakpad_info() const {
760     return valid_ ? &breakpad_info_ : NULL;
761   }
762 
763   // These thread IDs are used to determine if threads deserve special
764   // treatment, so special getters are provided to retrieve this data from
765   // the MDRawBreakpadInfo structure.  The getters return false if the thread
766   // IDs cannot be determined.
767   bool GetDumpThreadID(uint32_t *thread_id) const;
768   bool GetRequestingThreadID(uint32_t *thread_id) const;
769 
770   // Print a human-readable representation of the object to stdout.
771   void Print();
772 
773  private:
774   friend class Minidump;
775 
776   static const uint32_t kStreamType = MD_BREAKPAD_INFO_STREAM;
777 
778   explicit MinidumpBreakpadInfo(Minidump* minidump_);
779 
780   bool Read(uint32_t expected_size_);
781 
782   MDRawBreakpadInfo breakpad_info_;
783 };
784 
785 // MinidumpMemoryInfo wraps MDRawMemoryInfo, which provides information
786 // about mapped memory regions in a process, including their ranges
787 // and protection.
788 class MinidumpMemoryInfo : public MinidumpObject {
789  public:
info()790   const MDRawMemoryInfo* info() const { return valid_ ? &memory_info_ : NULL; }
791 
792   // The address of the base of the memory region.
GetBase()793   uint64_t GetBase() const { return valid_ ? memory_info_.base_address : 0; }
794 
795   // The size, in bytes, of the memory region.
GetSize()796   uint64_t GetSize() const { return valid_ ? memory_info_.region_size : 0; }
797 
798   // Return true if the memory protection allows execution.
799   bool IsExecutable() const;
800 
801   // Return true if the memory protection allows writing.
802   bool IsWritable() const;
803 
804   // Print a human-readable representation of the object to stdout.
805   void Print();
806 
807  private:
808   // These objects are managed by MinidumpMemoryInfoList.
809   friend class MinidumpMemoryInfoList;
810 
811   explicit MinidumpMemoryInfo(Minidump* minidump);
812 
813   // This works like MinidumpStream::Read, but is driven by
814   // MinidumpMemoryInfoList.  No size checking is done, because
815   // MinidumpMemoryInfoList handles that directly.
816   bool Read();
817 
818   MDRawMemoryInfo memory_info_;
819 };
820 
821 // MinidumpMemoryInfoList contains a list of information about
822 // mapped memory regions for a process in the form of MDRawMemoryInfo.
823 // It maintains a map of these structures so that it may easily provide
824 // info corresponding to a specific address.
825 class MinidumpMemoryInfoList : public MinidumpStream {
826  public:
827   virtual ~MinidumpMemoryInfoList();
828 
info_count()829   unsigned int info_count() const { return valid_ ? info_count_ : 0; }
830 
831   const MinidumpMemoryInfo* GetMemoryInfoForAddress(uint64_t address) const;
832   const MinidumpMemoryInfo* GetMemoryInfoAtIndex(unsigned int index) const;
833 
834   // Print a human-readable representation of the object to stdout.
835   void Print();
836 
837  private:
838   friend class Minidump;
839 
840   typedef vector<MinidumpMemoryInfo> MinidumpMemoryInfos;
841 
842   static const uint32_t kStreamType = MD_MEMORY_INFO_LIST_STREAM;
843 
844   explicit MinidumpMemoryInfoList(Minidump* minidump);
845 
846   bool Read(uint32_t expected_size);
847 
848   // Access to memory info using addresses as the key.
849   RangeMap<uint64_t, unsigned int> *range_map_;
850 
851   MinidumpMemoryInfos* infos_;
852   uint32_t info_count_;
853 };
854 
855 
856 // Minidump is the user's interface to a minidump file.  It wraps MDRawHeader
857 // and provides access to the minidump's top-level stream directory.
858 class Minidump {
859  public:
860   // path is the pathname of a file containing the minidump.
861   explicit Minidump(const string& path);
862   // input is an istream wrapping minidump data. Minidump holds a
863   // weak pointer to input, and the caller must ensure that the stream
864   // is valid as long as the Minidump object is.
865   explicit Minidump(std::istream& input);
866 
867   virtual ~Minidump();
868 
869   // path may be empty if the minidump was not opened from a file
path()870   virtual string path() const {
871     return path_;
872   }
set_max_streams(uint32_t max_streams)873   static void set_max_streams(uint32_t max_streams) {
874     max_streams_ = max_streams;
875   }
max_streams()876   static uint32_t max_streams() { return max_streams_; }
877 
set_max_string_length(uint32_t max_string_length)878   static void set_max_string_length(uint32_t max_string_length) {
879     max_string_length_ = max_string_length;
880   }
max_string_length()881   static uint32_t max_string_length() { return max_string_length_; }
882 
header()883   virtual const MDRawHeader* header() const { return valid_ ? &header_ : NULL; }
884 
885   // Reads the CPU information from the system info stream and generates the
886   // appropriate CPU flags.  The returned context_cpu_flags are the same as
887   // if the CPU type bits were set in the context_flags of a context record.
888   // On success, context_cpu_flags will have the flags that identify the CPU.
889   // If a system info stream is missing, context_cpu_flags will be 0.
890   // Returns true if the current position in the stream was not changed.
891   // Returns false when the current location in the stream was changed and the
892   // attempt to restore the original position failed.
893   bool GetContextCPUFlagsFromSystemInfo(uint32_t* context_cpu_flags);
894 
895   // Reads the minidump file's header and top-level stream directory.
896   // The minidump is expected to be positioned at the beginning of the
897   // header.  Read() sets up the stream list and map, and validates the
898   // Minidump object.
899   virtual bool Read();
900 
901   // The next set of methods are stubs that call GetStream.  They exist to
902   // force code generation of the templatized API within the module, and
903   // to avoid exposing an ugly API (GetStream needs to accept a garbage
904   // parameter).
905   virtual MinidumpThreadList* GetThreadList();
906   virtual MinidumpModuleList* GetModuleList();
907   virtual MinidumpMemoryList* GetMemoryList();
908   virtual MinidumpException* GetException();
909   virtual MinidumpAssertion* GetAssertion();
910   virtual MinidumpSystemInfo* GetSystemInfo();
911   virtual MinidumpMiscInfo* GetMiscInfo();
912   virtual MinidumpBreakpadInfo* GetBreakpadInfo();
913   virtual MinidumpMemoryInfoList* GetMemoryInfoList();
914 
915   // The next set of methods are provided for users who wish to access
916   // data in minidump files directly, while leveraging the rest of
917   // this class and related classes to handle the basic minidump
918   // structure and known stream types.
919 
GetDirectoryEntryCount()920   unsigned int GetDirectoryEntryCount() const {
921     return valid_ ? header_.stream_count : 0;
922   }
923   const MDRawDirectory* GetDirectoryEntryAtIndex(unsigned int index) const;
924 
925   // The next 2 methods are lower-level I/O routines.  They use fd_.
926 
927   // Reads count bytes from the minidump at the current position into
928   // the storage area pointed to by bytes.  bytes must be of sufficient
929   // size.  After the read, the file position is advanced by count.
930   bool ReadBytes(void* bytes, size_t count);
931 
932   // Sets the position of the minidump file to offset.
933   bool SeekSet(off_t offset);
934 
935   // Returns the current position of the minidump file.
936   off_t Tell();
937 
938   // The next 2 methods are medium-level I/O routines.
939 
940   // ReadString returns a string which is owned by the caller!  offset
941   // specifies the offset that a length-encoded string is stored at in the
942   // minidump file.
943   string* ReadString(off_t offset);
944 
945   // SeekToStreamType positions the file at the beginning of a stream
946   // identified by stream_type, and informs the caller of the stream's
947   // length by setting *stream_length.  Because stream_map maps each stream
948   // type to only one stream in the file, this might mislead the user into
949   // thinking that the stream that this seeks to is the only stream with
950   // type stream_type.  That can't happen for streams that these classes
951   // deal with directly, because they're only supposed to be present in the
952   // file singly, and that's verified when stream_map_ is built.  Users who
953   // are looking for other stream types should be aware of this
954   // possibility, and consider using GetDirectoryEntryAtIndex (possibly
955   // with GetDirectoryEntryCount) if expecting multiple streams of the same
956   // type in a single minidump file.
957   bool SeekToStreamType(uint32_t stream_type, uint32_t* stream_length);
958 
swap()959   bool swap() const { return valid_ ? swap_ : false; }
960 
961   // Print a human-readable representation of the object to stdout.
962   void Print();
963 
964  private:
965   // MinidumpStreamInfo is used in the MinidumpStreamMap.  It lets
966   // the Minidump object locate interesting streams quickly, and
967   // provides a convenient place to stash MinidumpStream objects.
968   struct MinidumpStreamInfo {
MinidumpStreamInfoMinidumpStreamInfo969     MinidumpStreamInfo() : stream_index(0), stream(NULL) {}
~MinidumpStreamInfoMinidumpStreamInfo970     ~MinidumpStreamInfo() { delete stream; }
971 
972     // Index into the MinidumpDirectoryEntries vector
973     unsigned int    stream_index;
974 
975     // Pointer to the stream if cached, or NULL if not yet populated
976     MinidumpStream* stream;
977   };
978 
979   typedef vector<MDRawDirectory> MinidumpDirectoryEntries;
980   typedef map<uint32_t, MinidumpStreamInfo> MinidumpStreamMap;
981 
982   template<typename T> T* GetStream(T** stream);
983 
984   // Opens the minidump file, or if already open, seeks to the beginning.
985   bool Open();
986 
987   // The largest number of top-level streams that will be read from a minidump.
988   // Note that streams are only read (and only consume memory) as needed,
989   // when directed by the caller.  The default is 128.
990   static uint32_t max_streams_;
991 
992   // The maximum length of a UTF-16 string that will be read from a minidump
993   // in 16-bit words.  The default is 1024.  UTF-16 strings are converted
994   // to UTF-8 when stored in memory, and each UTF-16 word will be represented
995   // by as many as 3 bytes in UTF-8.
996   static unsigned int max_string_length_;
997 
998   MDRawHeader               header_;
999 
1000   // The list of streams.
1001   MinidumpDirectoryEntries* directory_;
1002 
1003   // Access to streams using the stream type as the key.
1004   MinidumpStreamMap*        stream_map_;
1005 
1006   // The pathname of the minidump file to process, set in the constructor.
1007   // This may be empty if the minidump was opened directly from a stream.
1008   const string              path_;
1009 
1010   // The stream for all file I/O.  Used by ReadBytes and SeekSet.
1011   // Set based on the path in Open, or directly in the constructor.
1012   std::istream*             stream_;
1013 
1014   // swap_ is true if the minidump file should be byte-swapped.  If the
1015   // minidump was produced by a CPU that is other-endian than the CPU
1016   // processing the minidump, this will be true.  If the two CPUs are
1017   // same-endian, this will be false.
1018   bool                      swap_;
1019 
1020   // Validity of the Minidump structure, false immediately after
1021   // construction or after a failed Read(); true following a successful
1022   // Read().
1023   bool                      valid_;
1024 };
1025 
1026 
1027 }  // namespace google_breakpad
1028 
1029 
1030 #endif  // GOOGLE_BREAKPAD_PROCESSOR_MINIDUMP_H__
1031