1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Implementation of MiniDisassembler.
6 
7 #ifdef _WIN64
8 #error The code in this file should not be used on 64-bit Windows.
9 #endif
10 
11 #include "sandbox/win/src/sidestep/mini_disassembler.h"
12 
13 namespace sidestep {
14 
MiniDisassembler(bool operand_default_is_32_bits,bool address_default_is_32_bits)15 MiniDisassembler::MiniDisassembler(bool operand_default_is_32_bits,
16                                    bool address_default_is_32_bits)
17     : operand_default_is_32_bits_(operand_default_is_32_bits),
18       address_default_is_32_bits_(address_default_is_32_bits) {
19   Initialize();
20 }
21 
MiniDisassembler()22 MiniDisassembler::MiniDisassembler()
23     : operand_default_is_32_bits_(true),
24       address_default_is_32_bits_(true) {
25   Initialize();
26 }
27 
Disassemble(unsigned char * start_byte,unsigned int * instruction_bytes)28 InstructionType MiniDisassembler::Disassemble(
29     unsigned char* start_byte,
30     unsigned int* instruction_bytes) {
31   // Clean up any state from previous invocations.
32   Initialize();
33 
34   // Start by processing any prefixes.
35   unsigned char* current_byte = start_byte;
36   unsigned int size = 0;
37   InstructionType instruction_type = ProcessPrefixes(current_byte, &size);
38 
39   if (IT_UNKNOWN == instruction_type)
40     return instruction_type;
41 
42   current_byte += size;
43   size = 0;
44 
45   // Invariant: We have stripped all prefixes, and the operand_is_32_bits_
46   // and address_is_32_bits_ flags are correctly set.
47 
48   instruction_type = ProcessOpcode(current_byte, 0, &size);
49 
50   // Check for error processing instruction
51   if ((IT_UNKNOWN == instruction_type_) || (IT_UNUSED == instruction_type_)) {
52     return IT_UNKNOWN;
53   }
54 
55   current_byte += size;
56 
57   // Invariant: operand_bytes_ indicates the total size of operands
58   // specified by the opcode and/or ModR/M byte and/or SIB byte.
59   // pCurrentByte points to the first byte after the ModR/M byte, or after
60   // the SIB byte if it is present (i.e. the first byte of any operands
61   // encoded in the instruction).
62 
63   // We get the total length of any prefixes, the opcode, and the ModR/M and
64   // SIB bytes if present, by taking the difference of the original starting
65   // address and the current byte (which points to the first byte of the
66   // operands if present, or to the first byte of the next instruction if
67   // they are not).  Adding the count of bytes in the operands encoded in
68   // the instruction gives us the full length of the instruction in bytes.
69   *instruction_bytes += operand_bytes_ + (current_byte - start_byte);
70 
71   // Return the instruction type, which was set by ProcessOpcode().
72   return instruction_type_;
73 }
74 
Initialize()75 void MiniDisassembler::Initialize() {
76   operand_is_32_bits_ = operand_default_is_32_bits_;
77   address_is_32_bits_ = address_default_is_32_bits_;
78   operand_bytes_ = 0;
79   have_modrm_ = false;
80   should_decode_modrm_ = false;
81   instruction_type_ = IT_UNKNOWN;
82   got_f2_prefix_ = false;
83   got_f3_prefix_ = false;
84   got_66_prefix_ = false;
85 }
86 
ProcessPrefixes(unsigned char * start_byte,unsigned int * size)87 InstructionType MiniDisassembler::ProcessPrefixes(unsigned char* start_byte,
88                                                   unsigned int* size) {
89   InstructionType instruction_type = IT_GENERIC;
90   const Opcode& opcode = s_ia32_opcode_map_[0].table_[*start_byte];
91 
92   switch (opcode.type_) {
93     case IT_PREFIX_ADDRESS:
94       address_is_32_bits_ = !address_default_is_32_bits_;
95       goto nochangeoperand;
96     case IT_PREFIX_OPERAND:
97       operand_is_32_bits_ = !operand_default_is_32_bits_;
98       nochangeoperand:
99     case IT_PREFIX:
100 
101       if (0xF2 == (*start_byte))
102         got_f2_prefix_ = true;
103       else if (0xF3 == (*start_byte))
104         got_f3_prefix_ = true;
105       else if (0x66 == (*start_byte))
106         got_66_prefix_ = true;
107 
108       instruction_type = opcode.type_;
109       (*size)++;
110       // we got a prefix, so add one and check next byte
111       ProcessPrefixes(start_byte + 1, size);
112     default:
113       break;   // not a prefix byte
114   }
115 
116   return instruction_type;
117 }
118 
ProcessOpcode(unsigned char * start_byte,unsigned int table_index,unsigned int * size)119 InstructionType MiniDisassembler::ProcessOpcode(unsigned char* start_byte,
120                                                 unsigned int table_index,
121                                                 unsigned int* size) {
122   const OpcodeTable& table = s_ia32_opcode_map_[table_index];   // Get our table
123   unsigned char current_byte = (*start_byte) >> table.shift_;
124   current_byte = current_byte & table.mask_;  // Mask out the bits we will use
125 
126   // Check whether the byte we have is inside the table we have.
127   if (current_byte < table.min_lim_ || current_byte > table.max_lim_) {
128     instruction_type_ = IT_UNKNOWN;
129     return instruction_type_;
130   }
131 
132   const Opcode& opcode = table.table_[current_byte];
133   if (IT_UNUSED == opcode.type_) {
134     // This instruction is not used by the IA-32 ISA, so we indicate
135     // this to the user.  Probably means that we were pointed to
136     // a byte in memory that was not the start of an instruction.
137     instruction_type_ = IT_UNUSED;
138     return instruction_type_;
139   } else if (IT_REFERENCE == opcode.type_) {
140     // We are looking at an opcode that has more bytes (or is continued
141     // in the ModR/M byte).  Recursively find the opcode definition in
142     // the table for the opcode's next byte.
143     (*size)++;
144     ProcessOpcode(start_byte + 1, opcode.table_index_, size);
145     return instruction_type_;
146   }
147 
148   const SpecificOpcode* specific_opcode = reinterpret_cast<
149                                               const SpecificOpcode*>(&opcode);
150   if (opcode.is_prefix_dependent_) {
151     if (got_f2_prefix_ && opcode.opcode_if_f2_prefix_.mnemonic_ != 0) {
152       specific_opcode = &opcode.opcode_if_f2_prefix_;
153     } else if (got_f3_prefix_ && opcode.opcode_if_f3_prefix_.mnemonic_ != 0) {
154       specific_opcode = &opcode.opcode_if_f3_prefix_;
155     } else if (got_66_prefix_ && opcode.opcode_if_66_prefix_.mnemonic_ != 0) {
156       specific_opcode = &opcode.opcode_if_66_prefix_;
157     }
158   }
159 
160   // Inv: The opcode type is known.
161   instruction_type_ = specific_opcode->type_;
162 
163   // Let's process the operand types to see if we have any immediate
164   // operands, and/or a ModR/M byte.
165 
166   ProcessOperand(specific_opcode->flag_dest_);
167   ProcessOperand(specific_opcode->flag_source_);
168   ProcessOperand(specific_opcode->flag_aux_);
169 
170   // Inv: We have processed the opcode and incremented operand_bytes_
171   // by the number of bytes of any operands specified by the opcode
172   // that are stored in the instruction (not registers etc.).  Now
173   // we need to return the total number of bytes for the opcode and
174   // for the ModR/M or SIB bytes if they are present.
175 
176   if (table.mask_ != 0xff) {
177     if (have_modrm_) {
178       // we're looking at a ModR/M byte so we're not going to
179       // count that into the opcode size
180       ProcessModrm(start_byte, size);
181       return IT_GENERIC;
182     } else {
183       // need to count the ModR/M byte even if it's just being
184       // used for opcode extension
185       (*size)++;
186       return IT_GENERIC;
187     }
188   } else {
189     if (have_modrm_) {
190       // The ModR/M byte is the next byte.
191       (*size)++;
192       ProcessModrm(start_byte + 1, size);
193       return IT_GENERIC;
194     } else {
195       (*size)++;
196       return IT_GENERIC;
197     }
198   }
199 }
200 
ProcessOperand(int flag_operand)201 bool MiniDisassembler::ProcessOperand(int flag_operand) {
202   bool succeeded = true;
203   if (AM_NOT_USED == flag_operand)
204     return succeeded;
205 
206   // Decide what to do based on the addressing mode.
207   switch (flag_operand & AM_MASK) {
208     // No ModR/M byte indicated by these addressing modes, and no
209     // additional (e.g. immediate) parameters.
210     case AM_A:  // Direct address
211     case AM_F:  // EFLAGS register
212     case AM_X:  // Memory addressed by the DS:SI register pair
213     case AM_Y:  // Memory addressed by the ES:DI register pair
214     case AM_IMPLICIT:  // Parameter is implicit, occupies no space in
215                        // instruction
216       break;
217 
218     // There is a ModR/M byte but it does not necessarily need
219     // to be decoded.
220     case AM_C:  // reg field of ModR/M selects a control register
221     case AM_D:  // reg field of ModR/M selects a debug register
222     case AM_G:  // reg field of ModR/M selects a general register
223     case AM_P:  // reg field of ModR/M selects an MMX register
224     case AM_R:  // mod field of ModR/M may refer only to a general register
225     case AM_S:  // reg field of ModR/M selects a segment register
226     case AM_T:  // reg field of ModR/M selects a test register
227     case AM_V:  // reg field of ModR/M selects a 128-bit XMM register
228       have_modrm_ = true;
229       break;
230 
231     // In these addressing modes, there is a ModR/M byte and it needs to be
232     // decoded. No other (e.g. immediate) params than indicated in ModR/M.
233     case AM_E:  // Operand is either a general-purpose register or memory,
234                 // specified by ModR/M byte
235     case AM_M:  // ModR/M byte will refer only to memory
236     case AM_Q:  // Operand is either an MMX register or memory (complex
237                 // evaluation), specified by ModR/M byte
238     case AM_W:  // Operand is either a 128-bit XMM register or memory (complex
239                 // eval), specified by ModR/M byte
240       have_modrm_ = true;
241       should_decode_modrm_ = true;
242       break;
243 
244     // These addressing modes specify an immediate or an offset value
245     // directly, so we need to look at the operand type to see how many
246     // bytes.
247     case AM_I:  // Immediate data.
248     case AM_J:  // Jump to offset.
249     case AM_O:  // Operand is at offset.
250       switch (flag_operand & OT_MASK) {
251         case OT_B:  // Byte regardless of operand-size attribute.
252           operand_bytes_ += OS_BYTE;
253           break;
254         case OT_C:  // Byte or word, depending on operand-size attribute.
255           if (operand_is_32_bits_)
256             operand_bytes_ += OS_WORD;
257           else
258             operand_bytes_ += OS_BYTE;
259           break;
260         case OT_D:  // Doubleword, regardless of operand-size attribute.
261           operand_bytes_ += OS_DOUBLE_WORD;
262           break;
263         case OT_DQ:  // Double-quadword, regardless of operand-size attribute.
264           operand_bytes_ += OS_DOUBLE_QUAD_WORD;
265           break;
266         case OT_P:  // 32-bit or 48-bit pointer, depending on operand-size
267                     // attribute.
268           if (operand_is_32_bits_)
269             operand_bytes_ += OS_48_BIT_POINTER;
270           else
271             operand_bytes_ += OS_32_BIT_POINTER;
272           break;
273         case OT_PS:  // 128-bit packed single-precision floating-point data.
274           operand_bytes_ += OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING;
275           break;
276         case OT_Q:  // Quadword, regardless of operand-size attribute.
277           operand_bytes_ += OS_QUAD_WORD;
278           break;
279         case OT_S:  // 6-byte pseudo-descriptor.
280           operand_bytes_ += OS_PSEUDO_DESCRIPTOR;
281           break;
282         case OT_SD:  // Scalar Double-Precision Floating-Point Value
283         case OT_PD:  // Unaligned packed double-precision floating point value
284           operand_bytes_ += OS_DOUBLE_PRECISION_FLOATING;
285           break;
286         case OT_SS:
287           // Scalar element of a 128-bit packed single-precision
288           // floating data.
289           // We simply return enItUnknown since we don't have to support
290           // floating point
291           succeeded = false;
292           break;
293         case OT_V:  // Word or doubleword, depending on operand-size attribute.
294           if (operand_is_32_bits_)
295             operand_bytes_ += OS_DOUBLE_WORD;
296           else
297             operand_bytes_ += OS_WORD;
298           break;
299         case OT_W:  // Word, regardless of operand-size attribute.
300           operand_bytes_ += OS_WORD;
301           break;
302 
303         // Can safely ignore these.
304         case OT_A:  // Two one-word operands in memory or two double-word
305                     // operands in memory
306         case OT_PI:  // Quadword MMX technology register (e.g. mm0)
307         case OT_SI:  // Doubleword integer register (e.g., eax)
308           break;
309 
310         default:
311           break;
312       }
313       break;
314 
315     default:
316       break;
317   }
318 
319   return succeeded;
320 }
321 
ProcessModrm(unsigned char * start_byte,unsigned int * size)322 bool MiniDisassembler::ProcessModrm(unsigned char* start_byte,
323                                     unsigned int* size) {
324   // If we don't need to decode, we just return the size of the ModR/M
325   // byte (there is never a SIB byte in this case).
326   if (!should_decode_modrm_) {
327     (*size)++;
328     return true;
329   }
330 
331   // We never care about the reg field, only the combination of the mod
332   // and r/m fields, so let's start by packing those fields together into
333   // 5 bits.
334   unsigned char modrm = (*start_byte);
335   unsigned char mod = modrm & 0xC0;  // mask out top two bits to get mod field
336   modrm = modrm & 0x07;  // mask out bottom 3 bits to get r/m field
337   mod = mod >> 3;  // shift the mod field to the right place
338   modrm = mod | modrm;  // combine the r/m and mod fields as discussed
339   mod = mod >> 3;  // shift the mod field to bits 2..0
340 
341   // Invariant: modrm contains the mod field in bits 4..3 and the r/m field
342   // in bits 2..0, and mod contains the mod field in bits 2..0
343 
344   const ModrmEntry* modrm_entry = 0;
345   if (address_is_32_bits_)
346     modrm_entry = &s_ia32_modrm_map_[modrm];
347   else
348     modrm_entry = &s_ia16_modrm_map_[modrm];
349 
350   // Invariant: modrm_entry points to information that we need to decode
351   // the ModR/M byte.
352 
353   // Add to the count of operand bytes, if the ModR/M byte indicates
354   // that some operands are encoded in the instruction.
355   if (modrm_entry->is_encoded_in_instruction_)
356     operand_bytes_ += modrm_entry->operand_size_;
357 
358   // Process the SIB byte if necessary, and return the count
359   // of ModR/M and SIB bytes.
360   if (modrm_entry->use_sib_byte_) {
361     (*size)++;
362     return ProcessSib(start_byte + 1, mod, size);
363   } else {
364     (*size)++;
365     return true;
366   }
367 }
368 
ProcessSib(unsigned char * start_byte,unsigned char mod,unsigned int * size)369 bool MiniDisassembler::ProcessSib(unsigned char* start_byte,
370                                   unsigned char mod,
371                                   unsigned int* size) {
372   // get the mod field from the 2..0 bits of the SIB byte
373   unsigned char sib_base = (*start_byte) & 0x07;
374   if (0x05 == sib_base) {
375     switch (mod) {
376       case 0x00:  // mod == 00
377       case 0x02:  // mod == 10
378         operand_bytes_ += OS_DOUBLE_WORD;
379         break;
380       case 0x01:  // mod == 01
381         operand_bytes_ += OS_BYTE;
382         break;
383       case 0x03:  // mod == 11
384         // According to the IA-32 docs, there does not seem to be a disp
385         // value for this value of mod
386       default:
387         break;
388     }
389   }
390 
391   (*size)++;
392   return true;
393 }
394 
395 };  // namespace sidestep
396