1 // All rights reserved.
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are
5 // met:
6 //
7 //     * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 //     * Redistributions in binary form must reproduce the above
10 // copyright notice, this list of conditions and the following disclaimer
11 // in the documentation and/or other materials provided with the
12 // distribution.
13 //     * Neither the name of Google Inc. nor the names of its
14 // contributors may be used to endorse or promote products derived from
15 // this software without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 
29 // disassembler_x86.h: Basic x86 bytecode disassembler
30 //
31 // Provides a simple disassembler which wraps libdisasm. This allows simple
32 // tests to be run against bytecode to test for various properties.
33 //
34 // Author: Cris Neckar
35 
36 #ifndef GOOGLE_BREAKPAD_PROCESSOR_DISASSEMBLER_X86_H_
37 #define GOOGLE_BREAKPAD_PROCESSOR_DISASSEMBLER_X86_H_
38 
39 #include <stddef.h>
40 #include <sys/types.h>
41 
42 #include "google_breakpad/common/breakpad_types.h"
43 
44 namespace libdis {
45 #include "third_party/libdisasm/libdis.h"
46 }
47 
48 namespace google_breakpad {
49 
50 enum {
51   DISX86_NONE =                 0x0,
52   DISX86_BAD_BRANCH_TARGET =    0x1,
53   DISX86_BAD_ARGUMENT_PASSED =  0x2,
54   DISX86_BAD_WRITE =            0x4,
55   DISX86_BAD_BLOCK_WRITE =      0x8,
56   DISX86_BAD_READ =             0x10,
57   DISX86_BAD_BLOCK_READ =       0x20,
58   DISX86_BAD_COMPARISON =       0x40
59 };
60 
61 class DisassemblerX86 {
62   public:
63     // TODO(cdn): Modify this class to take a MemoryRegion instead of just
64     // a raw buffer. This will make it easier to use this on arbitrary
65     // minidumps without first copying out the code segment.
66     DisassemblerX86(const uint8_t *bytecode, uint32_t, uint32_t);
67     ~DisassemblerX86();
68 
69     // This walks to the next instruction in the memory region and
70     // sets flags based on the type of instruction and previous state
71     // including any registers marked as bad through setBadRead()
72     // or setBadWrite(). This method can be called in a loop to
73     // disassemble until the end of a region.
74     uint32_t NextInstruction();
75 
76     // Indicates whether the current disassembled instruction was valid.
currentInstructionValid()77     bool currentInstructionValid() { return instr_valid_; }
78 
79     // Returns the current instruction as defined in libdis.h,
80     // or NULL if the current instruction is not valid.
currentInstruction()81     const libdis::x86_insn_t* currentInstruction() {
82       return instr_valid_ ? &current_instr_ : NULL;
83     }
84 
85     // Returns the type of the current instruction as defined in libdis.h.
currentInstructionGroup()86     libdis::x86_insn_group currentInstructionGroup() {
87       return current_instr_.group;
88     }
89 
90     // Indicates whether a return instruction has been encountered.
endOfBlock()91     bool endOfBlock() { return end_of_block_; }
92 
93     // The flags set so far for the disassembly.
flags()94     uint16_t flags() { return flags_; }
95 
96     // This sets an indicator that the register used to determine
97     // src or dest for the current instruction is tainted. These can
98     // be used after examining the current instruction to indicate,
99     // for example that a bad read or write occurred and the pointer
100     // stored in the register is currently invalid.
101     bool setBadRead();
102     bool setBadWrite();
103 
104   protected:
105     const uint8_t *bytecode_;
106     uint32_t size_;
107     uint32_t virtual_address_;
108     uint32_t current_byte_offset_;
109     uint32_t current_inst_offset_;
110 
111     bool instr_valid_;
112     libdis::x86_insn_t current_instr_;
113 
114     // TODO(cdn): Maybe also track an expression's index register.
115     // ex: mov eax, [ebx + ecx]; ebx is base, ecx is index.
116     bool register_valid_;
117     libdis::x86_reg_t bad_register_;
118 
119     bool pushed_bad_value_;
120     bool end_of_block_;
121 
122     uint16_t flags_;
123 };
124 
125 }  // namespace google_breakpad
126 
127 #endif  // GOOGLE_BREAKPAD_PROCESSOR_DISASSEMBLER_X86_H_
128