1 /*
2 * Stack-less Just-In-Time compiler
3 *
4 * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
sljit_get_platform_name(void)27 SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
28 {
29 return "x86" SLJIT_CPUINFO;
30 }
31
32 /*
33 32b register indexes:
34 0 - EAX
35 1 - ECX
36 2 - EDX
37 3 - EBX
38 4 - none
39 5 - EBP
40 6 - ESI
41 7 - EDI
42 */
43
44 /*
45 64b register indexes:
46 0 - RAX
47 1 - RCX
48 2 - RDX
49 3 - RBX
50 4 - none
51 5 - RBP
52 6 - RSI
53 7 - RDI
54 8 - R8 - From now on REX prefix is required
55 9 - R9
56 10 - R10
57 11 - R11
58 12 - R12
59 13 - R13
60 14 - R14
61 15 - R15
62 */
63
64 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
65
66 /* Last register + 1. */
67 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
68
69 static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
70 0, 0, 2, 1, 0, 0, 0, 0, 7, 6, 3, 4, 5
71 };
72
73 #define CHECK_EXTRA_REGS(p, w, do) \
74 if (p >= SLJIT_R3 && p <= SLJIT_R6) { \
75 w = FIXED_LOCALS_OFFSET + ((p) - (SLJIT_R3 + 4)) * sizeof(sljit_sw); \
76 p = SLJIT_MEM1(SLJIT_SP); \
77 do; \
78 }
79
80 #else /* SLJIT_CONFIG_X86_32 */
81
82 /* Last register + 1. */
83 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
84 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
85 #define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4)
86
87 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
88 Note: avoid to use r12 and r13 for memory addessing
89 therefore r12 is better for SAVED_EREG than SAVED_REG. */
90 #ifndef _WIN64
91 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
92 static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
93 0, 0, 6, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 7, 9
94 };
95 /* low-map. reg_map & 0x7. */
96 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
97 0, 0, 6, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 7, 1
98 };
99 #else
100 /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
101 static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
102 0, 0, 2, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 10, 8, 9
103 };
104 /* low-map. reg_map & 0x7. */
105 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
106 0, 0, 2, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 2, 0, 1
107 };
108 #endif
109
110 #define REX_W 0x48
111 #define REX_R 0x44
112 #define REX_X 0x42
113 #define REX_B 0x41
114 #define REX 0x40
115
116 #ifndef _WIN64
117 #define HALFWORD_MAX 0x7fffffffl
118 #define HALFWORD_MIN -0x80000000l
119 #else
120 #define HALFWORD_MAX 0x7fffffffll
121 #define HALFWORD_MIN -0x80000000ll
122 #endif
123
124 #define IS_HALFWORD(x) ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
125 #define NOT_HALFWORD(x) ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
126
127 #define CHECK_EXTRA_REGS(p, w, do)
128
129 #endif /* SLJIT_CONFIG_X86_32 */
130
131 #define TMP_FREG (0)
132
133 /* Size flags for emit_x86_instruction: */
134 #define EX86_BIN_INS 0x0010
135 #define EX86_SHIFT_INS 0x0020
136 #define EX86_REX 0x0040
137 #define EX86_NO_REXW 0x0080
138 #define EX86_BYTE_ARG 0x0100
139 #define EX86_HALF_ARG 0x0200
140 #define EX86_PREF_66 0x0400
141 #define EX86_PREF_F2 0x0800
142 #define EX86_PREF_F3 0x1000
143 #define EX86_SSE2_OP1 0x2000
144 #define EX86_SSE2_OP2 0x4000
145 #define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2)
146
147 /* --------------------------------------------------------------------- */
148 /* Instrucion forms */
149 /* --------------------------------------------------------------------- */
150
151 #define ADD (/* BINARY */ 0 << 3)
152 #define ADD_EAX_i32 0x05
153 #define ADD_r_rm 0x03
154 #define ADD_rm_r 0x01
155 #define ADDSD_x_xm 0x58
156 #define ADC (/* BINARY */ 2 << 3)
157 #define ADC_EAX_i32 0x15
158 #define ADC_r_rm 0x13
159 #define ADC_rm_r 0x11
160 #define AND (/* BINARY */ 4 << 3)
161 #define AND_EAX_i32 0x25
162 #define AND_r_rm 0x23
163 #define AND_rm_r 0x21
164 #define ANDPD_x_xm 0x54
165 #define BSR_r_rm (/* GROUP_0F */ 0xbd)
166 #define CALL_i32 0xe8
167 #define CALL_rm (/* GROUP_FF */ 2 << 3)
168 #define CDQ 0x99
169 #define CMOVNE_r_rm (/* GROUP_0F */ 0x45)
170 #define CMP (/* BINARY */ 7 << 3)
171 #define CMP_EAX_i32 0x3d
172 #define CMP_r_rm 0x3b
173 #define CMP_rm_r 0x39
174 #define CVTPD2PS_x_xm 0x5a
175 #define CVTSI2SD_x_rm 0x2a
176 #define CVTTSD2SI_r_xm 0x2c
177 #define DIV (/* GROUP_F7 */ 6 << 3)
178 #define DIVSD_x_xm 0x5e
179 #define INT3 0xcc
180 #define IDIV (/* GROUP_F7 */ 7 << 3)
181 #define IMUL (/* GROUP_F7 */ 5 << 3)
182 #define IMUL_r_rm (/* GROUP_0F */ 0xaf)
183 #define IMUL_r_rm_i8 0x6b
184 #define IMUL_r_rm_i32 0x69
185 #define JE_i8 0x74
186 #define JMP_i8 0xeb
187 #define JMP_i32 0xe9
188 #define JMP_rm (/* GROUP_FF */ 4 << 3)
189 #define LEA_r_m 0x8d
190 #define MOV_r_rm 0x8b
191 #define MOV_r_i32 0xb8
192 #define MOV_rm_r 0x89
193 #define MOV_rm_i32 0xc7
194 #define MOV_rm8_i8 0xc6
195 #define MOV_rm8_r8 0x88
196 #define MOVSD_x_xm 0x10
197 #define MOVSD_xm_x 0x11
198 #define MOVSXD_r_rm 0x63
199 #define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe)
200 #define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf)
201 #define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6)
202 #define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7)
203 #define MUL (/* GROUP_F7 */ 4 << 3)
204 #define MULSD_x_xm 0x59
205 #define NEG_rm (/* GROUP_F7 */ 3 << 3)
206 #define NOP 0x90
207 #define NOT_rm (/* GROUP_F7 */ 2 << 3)
208 #define OR (/* BINARY */ 1 << 3)
209 #define OR_r_rm 0x0b
210 #define OR_EAX_i32 0x0d
211 #define OR_rm_r 0x09
212 #define OR_rm8_r8 0x08
213 #define POP_r 0x58
214 #define POP_rm 0x8f
215 #define POPF 0x9d
216 #define PUSH_i32 0x68
217 #define PUSH_r 0x50
218 #define PUSH_rm (/* GROUP_FF */ 6 << 3)
219 #define PUSHF 0x9c
220 #define RET_near 0xc3
221 #define RET_i16 0xc2
222 #define SBB (/* BINARY */ 3 << 3)
223 #define SBB_EAX_i32 0x1d
224 #define SBB_r_rm 0x1b
225 #define SBB_rm_r 0x19
226 #define SAR (/* SHIFT */ 7 << 3)
227 #define SHL (/* SHIFT */ 4 << 3)
228 #define SHR (/* SHIFT */ 5 << 3)
229 #define SUB (/* BINARY */ 5 << 3)
230 #define SUB_EAX_i32 0x2d
231 #define SUB_r_rm 0x2b
232 #define SUB_rm_r 0x29
233 #define SUBSD_x_xm 0x5c
234 #define TEST_EAX_i32 0xa9
235 #define TEST_rm_r 0x85
236 #define UCOMISD_x_xm 0x2e
237 #define UNPCKLPD_x_xm 0x14
238 #define XCHG_EAX_r 0x90
239 #define XCHG_r_rm 0x87
240 #define XOR (/* BINARY */ 6 << 3)
241 #define XOR_EAX_i32 0x35
242 #define XOR_r_rm 0x33
243 #define XOR_rm_r 0x31
244 #define XORPD_x_xm 0x57
245
246 #define GROUP_0F 0x0f
247 #define GROUP_F7 0xf7
248 #define GROUP_FF 0xff
249 #define GROUP_BINARY_81 0x81
250 #define GROUP_BINARY_83 0x83
251 #define GROUP_SHIFT_1 0xd1
252 #define GROUP_SHIFT_N 0xc1
253 #define GROUP_SHIFT_CL 0xd3
254
255 #define MOD_REG 0xc0
256 #define MOD_DISP8 0x40
257
258 #define INC_SIZE(s) (*inst++ = (s), compiler->size += (s))
259
260 #define PUSH_REG(r) (*inst++ = (PUSH_r + (r)))
261 #define POP_REG(r) (*inst++ = (POP_r + (r)))
262 #define RET() (*inst++ = (RET_near))
263 #define RET_I16(n) (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0)
264 /* r32, r/m32 */
265 #define MOV_RM(mod, reg, rm) (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm))
266
267 /* Multithreading does not affect these static variables, since they store
268 built-in CPU features. Therefore they can be overwritten by different threads
269 if they detect the CPU features in the same time. */
270 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
271 static sljit_si cpu_has_sse2 = -1;
272 #endif
273 static sljit_si cpu_has_cmov = -1;
274
275 #if defined(_MSC_VER) && _MSC_VER >= 1400
276 #include <intrin.h>
277 #endif
278
get_cpu_features(void)279 static void get_cpu_features(void)
280 {
281 sljit_ui features;
282
283 #if defined(_MSC_VER) && _MSC_VER >= 1400
284
285 int CPUInfo[4];
286 __cpuid(CPUInfo, 1);
287 features = (sljit_ui)CPUInfo[3];
288
289 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
290
291 /* AT&T syntax. */
292 __asm__ (
293 "movl $0x1, %%eax\n"
294 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
295 /* On x86-32, there is no red zone, so this
296 should work (no need for a local variable). */
297 "push %%ebx\n"
298 #endif
299 "cpuid\n"
300 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
301 "pop %%ebx\n"
302 #endif
303 "movl %%edx, %0\n"
304 : "=g" (features)
305 :
306 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
307 : "%eax", "%ecx", "%edx"
308 #else
309 : "%rax", "%rbx", "%rcx", "%rdx"
310 #endif
311 );
312
313 #else /* _MSC_VER && _MSC_VER >= 1400 */
314
315 /* Intel syntax. */
316 __asm {
317 mov eax, 1
318 cpuid
319 mov features, edx
320 }
321
322 #endif /* _MSC_VER && _MSC_VER >= 1400 */
323
324 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
325 cpu_has_sse2 = (features >> 26) & 0x1;
326 #endif
327 cpu_has_cmov = (features >> 15) & 0x1;
328 }
329
get_jump_code(sljit_si type)330 static sljit_ub get_jump_code(sljit_si type)
331 {
332 switch (type) {
333 case SLJIT_C_EQUAL:
334 case SLJIT_C_FLOAT_EQUAL:
335 return 0x84 /* je */;
336
337 case SLJIT_C_NOT_EQUAL:
338 case SLJIT_C_FLOAT_NOT_EQUAL:
339 return 0x85 /* jne */;
340
341 case SLJIT_C_LESS:
342 case SLJIT_C_FLOAT_LESS:
343 return 0x82 /* jc */;
344
345 case SLJIT_C_GREATER_EQUAL:
346 case SLJIT_C_FLOAT_GREATER_EQUAL:
347 return 0x83 /* jae */;
348
349 case SLJIT_C_GREATER:
350 case SLJIT_C_FLOAT_GREATER:
351 return 0x87 /* jnbe */;
352
353 case SLJIT_C_LESS_EQUAL:
354 case SLJIT_C_FLOAT_LESS_EQUAL:
355 return 0x86 /* jbe */;
356
357 case SLJIT_C_SIG_LESS:
358 return 0x8c /* jl */;
359
360 case SLJIT_C_SIG_GREATER_EQUAL:
361 return 0x8d /* jnl */;
362
363 case SLJIT_C_SIG_GREATER:
364 return 0x8f /* jnle */;
365
366 case SLJIT_C_SIG_LESS_EQUAL:
367 return 0x8e /* jle */;
368
369 case SLJIT_C_OVERFLOW:
370 case SLJIT_C_MUL_OVERFLOW:
371 return 0x80 /* jo */;
372
373 case SLJIT_C_NOT_OVERFLOW:
374 case SLJIT_C_MUL_NOT_OVERFLOW:
375 return 0x81 /* jno */;
376
377 case SLJIT_C_FLOAT_UNORDERED:
378 return 0x8a /* jp */;
379
380 case SLJIT_C_FLOAT_ORDERED:
381 return 0x8b /* jpo */;
382 }
383 return 0;
384 }
385
386 static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_si type);
387
388 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
389 static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_sw addr, sljit_si type);
390 #endif
391
generate_near_jump_code(struct sljit_jump * jump,sljit_ub * code_ptr,sljit_ub * code,sljit_si type)392 static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_ub *code, sljit_si type)
393 {
394 sljit_si short_jump;
395 sljit_uw label_addr;
396
397 if (jump->flags & JUMP_LABEL)
398 label_addr = (sljit_uw)(code + jump->u.label->size);
399 else
400 label_addr = jump->u.target;
401 short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
402
403 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
404 if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
405 return generate_far_jump_code(jump, code_ptr, type);
406 #endif
407
408 if (type == SLJIT_JUMP) {
409 if (short_jump)
410 *code_ptr++ = JMP_i8;
411 else
412 *code_ptr++ = JMP_i32;
413 jump->addr++;
414 }
415 else if (type >= SLJIT_FAST_CALL) {
416 short_jump = 0;
417 *code_ptr++ = CALL_i32;
418 jump->addr++;
419 }
420 else if (short_jump) {
421 *code_ptr++ = get_jump_code(type) - 0x10;
422 jump->addr++;
423 }
424 else {
425 *code_ptr++ = GROUP_0F;
426 *code_ptr++ = get_jump_code(type);
427 jump->addr += 2;
428 }
429
430 if (short_jump) {
431 jump->flags |= PATCH_MB;
432 code_ptr += sizeof(sljit_sb);
433 } else {
434 jump->flags |= PATCH_MW;
435 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
436 code_ptr += sizeof(sljit_sw);
437 #else
438 code_ptr += sizeof(sljit_si);
439 #endif
440 }
441
442 return code_ptr;
443 }
444
sljit_generate_code(struct sljit_compiler * compiler)445 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
446 {
447 struct sljit_memory_fragment *buf;
448 sljit_ub *code;
449 sljit_ub *code_ptr;
450 sljit_ub *buf_ptr;
451 sljit_ub *buf_end;
452 sljit_ub len;
453
454 struct sljit_label *label;
455 struct sljit_jump *jump;
456 struct sljit_const *const_;
457
458 CHECK_ERROR_PTR();
459 check_sljit_generate_code(compiler);
460 reverse_buf(compiler);
461
462 /* Second code generation pass. */
463 code = (sljit_ub*)SLJIT_MALLOC_EXEC(compiler->size);
464 PTR_FAIL_WITH_EXEC_IF(code);
465 buf = compiler->buf;
466
467 code_ptr = code;
468 label = compiler->labels;
469 jump = compiler->jumps;
470 const_ = compiler->consts;
471 do {
472 buf_ptr = buf->memory;
473 buf_end = buf_ptr + buf->used_size;
474 do {
475 len = *buf_ptr++;
476 if (len > 0) {
477 /* The code is already generated. */
478 SLJIT_MEMMOVE(code_ptr, buf_ptr, len);
479 code_ptr += len;
480 buf_ptr += len;
481 }
482 else {
483 if (*buf_ptr >= 4) {
484 jump->addr = (sljit_uw)code_ptr;
485 if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
486 code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4);
487 else
488 code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4);
489 jump = jump->next;
490 }
491 else if (*buf_ptr == 0) {
492 label->addr = (sljit_uw)code_ptr;
493 label->size = code_ptr - code;
494 label = label->next;
495 }
496 else if (*buf_ptr == 1) {
497 const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
498 const_ = const_->next;
499 }
500 else {
501 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
502 *code_ptr++ = (*buf_ptr == 2) ? CALL_i32 : JMP_i32;
503 buf_ptr++;
504 *(sljit_sw*)code_ptr = *(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw));
505 code_ptr += sizeof(sljit_sw);
506 buf_ptr += sizeof(sljit_sw) - 1;
507 #else
508 code_ptr = generate_fixed_jump(code_ptr, *(sljit_sw*)(buf_ptr + 1), *buf_ptr);
509 buf_ptr += sizeof(sljit_sw);
510 #endif
511 }
512 buf_ptr++;
513 }
514 } while (buf_ptr < buf_end);
515 SLJIT_ASSERT(buf_ptr == buf_end);
516 buf = buf->next;
517 } while (buf);
518
519 SLJIT_ASSERT(!label);
520 SLJIT_ASSERT(!jump);
521 SLJIT_ASSERT(!const_);
522
523 jump = compiler->jumps;
524 while (jump) {
525 if (jump->flags & PATCH_MB) {
526 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) <= 127);
527 *(sljit_ub*)jump->addr = (sljit_ub)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb)));
528 } else if (jump->flags & PATCH_MW) {
529 if (jump->flags & JUMP_LABEL) {
530 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
531 *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sw)));
532 #else
533 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) <= HALFWORD_MAX);
534 *(sljit_si*)jump->addr = (sljit_si)(jump->u.label->addr - (jump->addr + sizeof(sljit_si)));
535 #endif
536 }
537 else {
538 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
539 *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_sw)));
540 #else
541 SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) <= HALFWORD_MAX);
542 *(sljit_si*)jump->addr = (sljit_si)(jump->u.target - (jump->addr + sizeof(sljit_si)));
543 #endif
544 }
545 }
546 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
547 else if (jump->flags & PATCH_MD)
548 *(sljit_sw*)jump->addr = jump->u.label->addr;
549 #endif
550
551 jump = jump->next;
552 }
553
554 /* Maybe we waste some space because of short jumps. */
555 SLJIT_ASSERT(code_ptr <= code + compiler->size);
556 compiler->error = SLJIT_ERR_COMPILED;
557 compiler->executable_size = code_ptr - code;
558 return (void*)code;
559 }
560
561 /* --------------------------------------------------------------------- */
562 /* Operators */
563 /* --------------------------------------------------------------------- */
564
565 static sljit_si emit_cum_binary(struct sljit_compiler *compiler,
566 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
567 sljit_si dst, sljit_sw dstw,
568 sljit_si src1, sljit_sw src1w,
569 sljit_si src2, sljit_sw src2w);
570
571 static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler,
572 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
573 sljit_si dst, sljit_sw dstw,
574 sljit_si src1, sljit_sw src1w,
575 sljit_si src2, sljit_sw src2w);
576
577 static sljit_si emit_mov(struct sljit_compiler *compiler,
578 sljit_si dst, sljit_sw dstw,
579 sljit_si src, sljit_sw srcw);
580
emit_save_flags(struct sljit_compiler * compiler)581 static SLJIT_INLINE sljit_si emit_save_flags(struct sljit_compiler *compiler)
582 {
583 sljit_ub *inst;
584
585 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
586 inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
587 FAIL_IF(!inst);
588 INC_SIZE(5);
589 #else
590 inst = (sljit_ub*)ensure_buf(compiler, 1 + 6);
591 FAIL_IF(!inst);
592 INC_SIZE(6);
593 *inst++ = REX_W;
594 #endif
595 *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp + sizeof(sljit_sw)] */
596 *inst++ = 0x64;
597 *inst++ = 0x24;
598 *inst++ = (sljit_ub)sizeof(sljit_sw);
599 *inst++ = PUSHF;
600 compiler->flags_saved = 1;
601 return SLJIT_SUCCESS;
602 }
603
emit_restore_flags(struct sljit_compiler * compiler,sljit_si keep_flags)604 static SLJIT_INLINE sljit_si emit_restore_flags(struct sljit_compiler *compiler, sljit_si keep_flags)
605 {
606 sljit_ub *inst;
607
608 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
609 inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
610 FAIL_IF(!inst);
611 INC_SIZE(5);
612 *inst++ = POPF;
613 #else
614 inst = (sljit_ub*)ensure_buf(compiler, 1 + 6);
615 FAIL_IF(!inst);
616 INC_SIZE(6);
617 *inst++ = POPF;
618 *inst++ = REX_W;
619 #endif
620 *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp - sizeof(sljit_sw)] */
621 *inst++ = 0x64;
622 *inst++ = 0x24;
623 *inst++ = (sljit_ub)-(sljit_sb)sizeof(sljit_sw);
624 compiler->flags_saved = keep_flags;
625 return SLJIT_SUCCESS;
626 }
627
628 #ifdef _WIN32
629 #include <malloc.h>
630
sljit_grow_stack(sljit_sw local_size)631 static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size)
632 {
633 /* Workaround for calling the internal _chkstk() function on Windows.
634 This function touches all 4k pages belongs to the requested stack space,
635 which size is passed in local_size. This is necessary on Windows where
636 the stack can only grow in 4k steps. However, this function just burn
637 CPU cycles if the stack is large enough. However, you don't know it in
638 advance, so it must always be called. I think this is a bad design in
639 general even if it has some reasons. */
640 *(volatile sljit_si*)alloca(local_size) = 0;
641 }
642
643 #endif
644
645 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
646 #include "sljitNativeX86_32.c"
647 #else
648 #include "sljitNativeX86_64.c"
649 #endif
650
emit_mov(struct sljit_compiler * compiler,sljit_si dst,sljit_sw dstw,sljit_si src,sljit_sw srcw)651 static sljit_si emit_mov(struct sljit_compiler *compiler,
652 sljit_si dst, sljit_sw dstw,
653 sljit_si src, sljit_sw srcw)
654 {
655 sljit_ub* inst;
656
657 if (dst == SLJIT_UNUSED) {
658 /* No destination, doesn't need to setup flags. */
659 if (src & SLJIT_MEM) {
660 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
661 FAIL_IF(!inst);
662 *inst = MOV_r_rm;
663 }
664 return SLJIT_SUCCESS;
665 }
666 if (FAST_IS_REG(src)) {
667 inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
668 FAIL_IF(!inst);
669 *inst = MOV_rm_r;
670 return SLJIT_SUCCESS;
671 }
672 if (src & SLJIT_IMM) {
673 if (FAST_IS_REG(dst)) {
674 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
675 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
676 #else
677 if (!compiler->mode32) {
678 if (NOT_HALFWORD(srcw))
679 return emit_load_imm64(compiler, dst, srcw);
680 }
681 else
682 return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw);
683 #endif
684 }
685 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
686 if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
687 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw));
688 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw);
689 FAIL_IF(!inst);
690 *inst = MOV_rm_r;
691 return SLJIT_SUCCESS;
692 }
693 #endif
694 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
695 FAIL_IF(!inst);
696 *inst = MOV_rm_i32;
697 return SLJIT_SUCCESS;
698 }
699 if (FAST_IS_REG(dst)) {
700 inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
701 FAIL_IF(!inst);
702 *inst = MOV_r_rm;
703 return SLJIT_SUCCESS;
704 }
705
706 /* Memory to memory move. Requires two instruction. */
707 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
708 FAIL_IF(!inst);
709 *inst = MOV_r_rm;
710 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
711 FAIL_IF(!inst);
712 *inst = MOV_rm_r;
713 return SLJIT_SUCCESS;
714 }
715
716 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
717 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
718
sljit_emit_op0(struct sljit_compiler * compiler,sljit_si op)719 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
720 {
721 sljit_ub *inst;
722 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
723 sljit_si size;
724 #endif
725
726 CHECK_ERROR();
727 check_sljit_emit_op0(compiler, op);
728
729 switch (GET_OPCODE(op)) {
730 case SLJIT_BREAKPOINT:
731 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
732 FAIL_IF(!inst);
733 INC_SIZE(1);
734 *inst = INT3;
735 break;
736 case SLJIT_NOP:
737 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
738 FAIL_IF(!inst);
739 INC_SIZE(1);
740 *inst = NOP;
741 break;
742 case SLJIT_UMUL:
743 case SLJIT_SMUL:
744 case SLJIT_UDIV:
745 case SLJIT_SDIV:
746 compiler->flags_saved = 0;
747 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
748 #ifdef _WIN64
749 SLJIT_COMPILE_ASSERT(
750 reg_map[SLJIT_R0] == 0
751 && reg_map[SLJIT_R1] == 2
752 && reg_map[TMP_REG1] > 7,
753 invalid_register_assignment_for_div_mul);
754 #else
755 SLJIT_COMPILE_ASSERT(
756 reg_map[SLJIT_R0] == 0
757 && reg_map[SLJIT_R1] < 7
758 && reg_map[TMP_REG1] == 2,
759 invalid_register_assignment_for_div_mul);
760 #endif
761 compiler->mode32 = op & SLJIT_INT_OP;
762 #endif
763
764 op = GET_OPCODE(op);
765 if (op == SLJIT_UDIV) {
766 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
767 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
768 inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);
769 #else
770 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
771 #endif
772 FAIL_IF(!inst);
773 *inst = XOR_r_rm;
774 }
775
776 if (op == SLJIT_SDIV) {
777 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
778 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
779 #endif
780
781 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
782 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
783 FAIL_IF(!inst);
784 INC_SIZE(1);
785 *inst = CDQ;
786 #else
787 if (compiler->mode32) {
788 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
789 FAIL_IF(!inst);
790 INC_SIZE(1);
791 *inst = CDQ;
792 } else {
793 inst = (sljit_ub*)ensure_buf(compiler, 1 + 2);
794 FAIL_IF(!inst);
795 INC_SIZE(2);
796 *inst++ = REX_W;
797 *inst = CDQ;
798 }
799 #endif
800 }
801
802 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
803 inst = (sljit_ub*)ensure_buf(compiler, 1 + 2);
804 FAIL_IF(!inst);
805 INC_SIZE(2);
806 *inst++ = GROUP_F7;
807 *inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
808 #else
809 #ifdef _WIN64
810 size = (!compiler->mode32 || op >= SLJIT_UDIV) ? 3 : 2;
811 #else
812 size = (!compiler->mode32) ? 3 : 2;
813 #endif
814 inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
815 FAIL_IF(!inst);
816 INC_SIZE(size);
817 #ifdef _WIN64
818 if (!compiler->mode32)
819 *inst++ = REX_W | ((op >= SLJIT_UDIV) ? REX_B : 0);
820 else if (op >= SLJIT_UDIV)
821 *inst++ = REX_B;
822 *inst++ = GROUP_F7;
823 *inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
824 #else
825 if (!compiler->mode32)
826 *inst++ = REX_W;
827 *inst++ = GROUP_F7;
828 *inst = MOD_REG | reg_map[SLJIT_R1];
829 #endif
830 #endif
831 switch (op) {
832 case SLJIT_UMUL:
833 *inst |= MUL;
834 break;
835 case SLJIT_SMUL:
836 *inst |= IMUL;
837 break;
838 case SLJIT_UDIV:
839 *inst |= DIV;
840 break;
841 case SLJIT_SDIV:
842 *inst |= IDIV;
843 break;
844 }
845 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
846 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
847 #endif
848 break;
849 }
850
851 return SLJIT_SUCCESS;
852 }
853
854 #define ENCODE_PREFIX(prefix) \
855 do { \
856 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); \
857 FAIL_IF(!inst); \
858 INC_SIZE(1); \
859 *inst = (prefix); \
860 } while (0)
861
emit_mov_byte(struct sljit_compiler * compiler,sljit_si sign,sljit_si dst,sljit_sw dstw,sljit_si src,sljit_sw srcw)862 static sljit_si emit_mov_byte(struct sljit_compiler *compiler, sljit_si sign,
863 sljit_si dst, sljit_sw dstw,
864 sljit_si src, sljit_sw srcw)
865 {
866 sljit_ub* inst;
867 sljit_si dst_r;
868 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
869 sljit_si work_r;
870 #endif
871
872 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
873 compiler->mode32 = 0;
874 #endif
875
876 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
877 return SLJIT_SUCCESS; /* Empty instruction. */
878
879 if (src & SLJIT_IMM) {
880 if (FAST_IS_REG(dst)) {
881 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
882 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
883 #else
884 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
885 FAIL_IF(!inst);
886 *inst = MOV_rm_i32;
887 return SLJIT_SUCCESS;
888 #endif
889 }
890 inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
891 FAIL_IF(!inst);
892 *inst = MOV_rm8_i8;
893 return SLJIT_SUCCESS;
894 }
895
896 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
897
898 if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {
899 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
900 if (reg_map[src] >= 4) {
901 SLJIT_ASSERT(dst_r == TMP_REG1);
902 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
903 } else
904 dst_r = src;
905 #else
906 dst_r = src;
907 #endif
908 }
909 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
910 else if (FAST_IS_REG(src) && reg_map[src] >= 4) {
911 /* src, dst are registers. */
912 SLJIT_ASSERT(SLOW_IS_REG(dst));
913 if (reg_map[dst] < 4) {
914 if (dst != src)
915 EMIT_MOV(compiler, dst, 0, src, 0);
916 inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
917 FAIL_IF(!inst);
918 *inst++ = GROUP_0F;
919 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
920 }
921 else {
922 if (dst != src)
923 EMIT_MOV(compiler, dst, 0, src, 0);
924 if (sign) {
925 /* shl reg, 24 */
926 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
927 FAIL_IF(!inst);
928 *inst |= SHL;
929 /* sar reg, 24 */
930 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
931 FAIL_IF(!inst);
932 *inst |= SAR;
933 }
934 else {
935 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
936 FAIL_IF(!inst);
937 *(inst + 1) |= AND;
938 }
939 }
940 return SLJIT_SUCCESS;
941 }
942 #endif
943 else {
944 /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
945 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
946 FAIL_IF(!inst);
947 *inst++ = GROUP_0F;
948 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
949 }
950
951 if (dst & SLJIT_MEM) {
952 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
953 if (dst_r == TMP_REG1) {
954 /* Find a non-used register, whose reg_map[src] < 4. */
955 if ((dst & REG_MASK) == SLJIT_R0) {
956 if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1))
957 work_r = SLJIT_R2;
958 else
959 work_r = SLJIT_R1;
960 }
961 else {
962 if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
963 work_r = SLJIT_R0;
964 else if ((dst & REG_MASK) == SLJIT_R1)
965 work_r = SLJIT_R2;
966 else
967 work_r = SLJIT_R1;
968 }
969
970 if (work_r == SLJIT_R0) {
971 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
972 }
973 else {
974 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
975 FAIL_IF(!inst);
976 *inst = XCHG_r_rm;
977 }
978
979 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
980 FAIL_IF(!inst);
981 *inst = MOV_rm8_r8;
982
983 if (work_r == SLJIT_R0) {
984 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
985 }
986 else {
987 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
988 FAIL_IF(!inst);
989 *inst = XCHG_r_rm;
990 }
991 }
992 else {
993 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
994 FAIL_IF(!inst);
995 *inst = MOV_rm8_r8;
996 }
997 #else
998 inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
999 FAIL_IF(!inst);
1000 *inst = MOV_rm8_r8;
1001 #endif
1002 }
1003
1004 return SLJIT_SUCCESS;
1005 }
1006
emit_mov_half(struct sljit_compiler * compiler,sljit_si sign,sljit_si dst,sljit_sw dstw,sljit_si src,sljit_sw srcw)1007 static sljit_si emit_mov_half(struct sljit_compiler *compiler, sljit_si sign,
1008 sljit_si dst, sljit_sw dstw,
1009 sljit_si src, sljit_sw srcw)
1010 {
1011 sljit_ub* inst;
1012 sljit_si dst_r;
1013
1014 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1015 compiler->mode32 = 0;
1016 #endif
1017
1018 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
1019 return SLJIT_SUCCESS; /* Empty instruction. */
1020
1021 if (src & SLJIT_IMM) {
1022 if (FAST_IS_REG(dst)) {
1023 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1024 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
1025 #else
1026 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1027 FAIL_IF(!inst);
1028 *inst = MOV_rm_i32;
1029 return SLJIT_SUCCESS;
1030 #endif
1031 }
1032 inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
1033 FAIL_IF(!inst);
1034 *inst = MOV_rm_i32;
1035 return SLJIT_SUCCESS;
1036 }
1037
1038 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1039
1040 if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
1041 dst_r = src;
1042 else {
1043 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
1044 FAIL_IF(!inst);
1045 *inst++ = GROUP_0F;
1046 *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16;
1047 }
1048
1049 if (dst & SLJIT_MEM) {
1050 inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
1051 FAIL_IF(!inst);
1052 *inst = MOV_rm_r;
1053 }
1054
1055 return SLJIT_SUCCESS;
1056 }
1057
emit_unary(struct sljit_compiler * compiler,sljit_ub opcode,sljit_si dst,sljit_sw dstw,sljit_si src,sljit_sw srcw)1058 static sljit_si emit_unary(struct sljit_compiler *compiler, sljit_ub opcode,
1059 sljit_si dst, sljit_sw dstw,
1060 sljit_si src, sljit_sw srcw)
1061 {
1062 sljit_ub* inst;
1063
1064 if (dst == SLJIT_UNUSED) {
1065 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1066 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1067 FAIL_IF(!inst);
1068 *inst++ = GROUP_F7;
1069 *inst |= opcode;
1070 return SLJIT_SUCCESS;
1071 }
1072 if (dst == src && dstw == srcw) {
1073 /* Same input and output */
1074 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1075 FAIL_IF(!inst);
1076 *inst++ = GROUP_F7;
1077 *inst |= opcode;
1078 return SLJIT_SUCCESS;
1079 }
1080 if (FAST_IS_REG(dst)) {
1081 EMIT_MOV(compiler, dst, 0, src, srcw);
1082 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1083 FAIL_IF(!inst);
1084 *inst++ = GROUP_F7;
1085 *inst |= opcode;
1086 return SLJIT_SUCCESS;
1087 }
1088 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1089 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1090 FAIL_IF(!inst);
1091 *inst++ = GROUP_F7;
1092 *inst |= opcode;
1093 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1094 return SLJIT_SUCCESS;
1095 }
1096
emit_not_with_flags(struct sljit_compiler * compiler,sljit_si dst,sljit_sw dstw,sljit_si src,sljit_sw srcw)1097 static sljit_si emit_not_with_flags(struct sljit_compiler *compiler,
1098 sljit_si dst, sljit_sw dstw,
1099 sljit_si src, sljit_sw srcw)
1100 {
1101 sljit_ub* inst;
1102
1103 if (dst == SLJIT_UNUSED) {
1104 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1105 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1106 FAIL_IF(!inst);
1107 *inst++ = GROUP_F7;
1108 *inst |= NOT_rm;
1109 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1110 FAIL_IF(!inst);
1111 *inst = OR_r_rm;
1112 return SLJIT_SUCCESS;
1113 }
1114 if (FAST_IS_REG(dst)) {
1115 EMIT_MOV(compiler, dst, 0, src, srcw);
1116 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1117 FAIL_IF(!inst);
1118 *inst++ = GROUP_F7;
1119 *inst |= NOT_rm;
1120 inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
1121 FAIL_IF(!inst);
1122 *inst = OR_r_rm;
1123 return SLJIT_SUCCESS;
1124 }
1125 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1126 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1127 FAIL_IF(!inst);
1128 *inst++ = GROUP_F7;
1129 *inst |= NOT_rm;
1130 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1131 FAIL_IF(!inst);
1132 *inst = OR_r_rm;
1133 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1134 return SLJIT_SUCCESS;
1135 }
1136
emit_clz(struct sljit_compiler * compiler,sljit_si op_flags,sljit_si dst,sljit_sw dstw,sljit_si src,sljit_sw srcw)1137 static sljit_si emit_clz(struct sljit_compiler *compiler, sljit_si op_flags,
1138 sljit_si dst, sljit_sw dstw,
1139 sljit_si src, sljit_sw srcw)
1140 {
1141 sljit_ub* inst;
1142 sljit_si dst_r;
1143
1144 SLJIT_UNUSED_ARG(op_flags);
1145 if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
1146 /* Just set the zero flag. */
1147 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1148 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1149 FAIL_IF(!inst);
1150 *inst++ = GROUP_F7;
1151 *inst |= NOT_rm;
1152 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1153 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REG1, 0);
1154 #else
1155 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, TMP_REG1, 0);
1156 #endif
1157 FAIL_IF(!inst);
1158 *inst |= SHR;
1159 return SLJIT_SUCCESS;
1160 }
1161
1162 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
1163 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
1164 src = TMP_REG1;
1165 srcw = 0;
1166 }
1167
1168 inst = emit_x86_instruction(compiler, 2, TMP_REG1, 0, src, srcw);
1169 FAIL_IF(!inst);
1170 *inst++ = GROUP_0F;
1171 *inst = BSR_r_rm;
1172
1173 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1174 if (FAST_IS_REG(dst))
1175 dst_r = dst;
1176 else {
1177 /* Find an unused temporary register. */
1178 if ((dst & REG_MASK) != SLJIT_R0 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
1179 dst_r = SLJIT_R0;
1180 else if ((dst & REG_MASK) != SLJIT_R1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R1))
1181 dst_r = SLJIT_R1;
1182 else
1183 dst_r = SLJIT_R2;
1184 EMIT_MOV(compiler, dst, dstw, dst_r, 0);
1185 }
1186 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
1187 #else
1188 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
1189 compiler->mode32 = 0;
1190 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 64 + 63 : 32 + 31);
1191 compiler->mode32 = op_flags & SLJIT_INT_OP;
1192 #endif
1193
1194 if (cpu_has_cmov == -1)
1195 get_cpu_features();
1196
1197 if (cpu_has_cmov) {
1198 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
1199 FAIL_IF(!inst);
1200 *inst++ = GROUP_0F;
1201 *inst = CMOVNE_r_rm;
1202 } else {
1203 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1204 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1205 FAIL_IF(!inst);
1206 INC_SIZE(4);
1207
1208 *inst++ = JE_i8;
1209 *inst++ = 2;
1210 *inst++ = MOV_r_rm;
1211 *inst++ = MOD_REG | (reg_map[dst_r] << 3) | reg_map[TMP_REG1];
1212 #else
1213 inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
1214 FAIL_IF(!inst);
1215 INC_SIZE(5);
1216
1217 *inst++ = JE_i8;
1218 *inst++ = 3;
1219 *inst++ = REX_W | (reg_map[dst_r] >= 8 ? REX_R : 0) | (reg_map[TMP_REG1] >= 8 ? REX_B : 0);
1220 *inst++ = MOV_r_rm;
1221 *inst++ = MOD_REG | (reg_lmap[dst_r] << 3) | reg_lmap[TMP_REG1];
1222 #endif
1223 }
1224
1225 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1226 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
1227 #else
1228 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, dst_r, 0);
1229 #endif
1230 FAIL_IF(!inst);
1231 *(inst + 1) |= XOR;
1232
1233 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1234 if (dst & SLJIT_MEM) {
1235 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1236 FAIL_IF(!inst);
1237 *inst = XCHG_r_rm;
1238 }
1239 #else
1240 if (dst & SLJIT_MEM)
1241 EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0);
1242 #endif
1243 return SLJIT_SUCCESS;
1244 }
1245
sljit_emit_op1(struct sljit_compiler * compiler,sljit_si op,sljit_si dst,sljit_sw dstw,sljit_si src,sljit_sw srcw)1246 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
1247 sljit_si dst, sljit_sw dstw,
1248 sljit_si src, sljit_sw srcw)
1249 {
1250 sljit_ub* inst;
1251 sljit_si update = 0;
1252 sljit_si op_flags = GET_ALL_FLAGS(op);
1253 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1254 sljit_si dst_is_ereg = 0;
1255 sljit_si src_is_ereg = 0;
1256 #else
1257 # define src_is_ereg 0
1258 #endif
1259
1260 CHECK_ERROR();
1261 check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
1262 ADJUST_LOCAL_OFFSET(dst, dstw);
1263 ADJUST_LOCAL_OFFSET(src, srcw);
1264
1265 CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
1266 CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);
1267 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1268 compiler->mode32 = op_flags & SLJIT_INT_OP;
1269 #endif
1270
1271 op = GET_OPCODE(op);
1272 if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) {
1273 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1274 compiler->mode32 = 0;
1275 #endif
1276
1277 if (op_flags & SLJIT_INT_OP) {
1278 if (FAST_IS_REG(src) && src == dst) {
1279 if (!TYPE_CAST_NEEDED(op))
1280 return SLJIT_SUCCESS;
1281 }
1282 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1283 if (op == SLJIT_MOV_SI && (src & SLJIT_MEM))
1284 op = SLJIT_MOV_UI;
1285 if (op == SLJIT_MOVU_SI && (src & SLJIT_MEM))
1286 op = SLJIT_MOVU_UI;
1287 if (op == SLJIT_MOV_UI && (src & SLJIT_IMM))
1288 op = SLJIT_MOV_SI;
1289 if (op == SLJIT_MOVU_UI && (src & SLJIT_IMM))
1290 op = SLJIT_MOVU_SI;
1291 #endif
1292 }
1293
1294 SLJIT_COMPILE_ASSERT(SLJIT_MOV + 8 == SLJIT_MOVU, movu_offset);
1295 if (op >= SLJIT_MOVU) {
1296 update = 1;
1297 op -= 8;
1298 }
1299
1300 if (src & SLJIT_IMM) {
1301 switch (op) {
1302 case SLJIT_MOV_UB:
1303 srcw = (sljit_ub)srcw;
1304 break;
1305 case SLJIT_MOV_SB:
1306 srcw = (sljit_sb)srcw;
1307 break;
1308 case SLJIT_MOV_UH:
1309 srcw = (sljit_uh)srcw;
1310 break;
1311 case SLJIT_MOV_SH:
1312 srcw = (sljit_sh)srcw;
1313 break;
1314 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1315 case SLJIT_MOV_UI:
1316 srcw = (sljit_ui)srcw;
1317 break;
1318 case SLJIT_MOV_SI:
1319 srcw = (sljit_si)srcw;
1320 break;
1321 #endif
1322 }
1323 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1324 if (SLJIT_UNLIKELY(dst_is_ereg))
1325 return emit_mov(compiler, dst, dstw, src, srcw);
1326 #endif
1327 }
1328
1329 if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & REG_MASK) && (srcw != 0 || (src & OFFS_REG_MASK) != 0)) {
1330 inst = emit_x86_instruction(compiler, 1, src & REG_MASK, 0, src, srcw);
1331 FAIL_IF(!inst);
1332 *inst = LEA_r_m;
1333 src &= SLJIT_MEM | 0xf;
1334 srcw = 0;
1335 }
1336
1337 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1338 if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
1339 SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
1340 dst = TMP_REG1;
1341 }
1342 #endif
1343
1344 switch (op) {
1345 case SLJIT_MOV:
1346 case SLJIT_MOV_P:
1347 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1348 case SLJIT_MOV_UI:
1349 case SLJIT_MOV_SI:
1350 #endif
1351 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
1352 break;
1353 case SLJIT_MOV_UB:
1354 FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
1355 break;
1356 case SLJIT_MOV_SB:
1357 FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
1358 break;
1359 case SLJIT_MOV_UH:
1360 FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
1361 break;
1362 case SLJIT_MOV_SH:
1363 FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
1364 break;
1365 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1366 case SLJIT_MOV_UI:
1367 FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
1368 break;
1369 case SLJIT_MOV_SI:
1370 FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
1371 break;
1372 #endif
1373 }
1374
1375 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1376 if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
1377 return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
1378 #endif
1379
1380 if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK) && (dstw != 0 || (dst & OFFS_REG_MASK) != 0)) {
1381 inst = emit_x86_instruction(compiler, 1, dst & REG_MASK, 0, dst, dstw);
1382 FAIL_IF(!inst);
1383 *inst = LEA_r_m;
1384 }
1385 return SLJIT_SUCCESS;
1386 }
1387
1388 if (SLJIT_UNLIKELY(GET_FLAGS(op_flags)))
1389 compiler->flags_saved = 0;
1390
1391 switch (op) {
1392 case SLJIT_NOT:
1393 if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_E))
1394 return emit_not_with_flags(compiler, dst, dstw, src, srcw);
1395 return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
1396
1397 case SLJIT_NEG:
1398 if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1399 FAIL_IF(emit_save_flags(compiler));
1400 return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw);
1401
1402 case SLJIT_CLZ:
1403 if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1404 FAIL_IF(emit_save_flags(compiler));
1405 return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
1406 }
1407
1408 return SLJIT_SUCCESS;
1409
1410 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1411 # undef src_is_ereg
1412 #endif
1413 }
1414
1415 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1416
1417 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1418 if (IS_HALFWORD(immw) || compiler->mode32) { \
1419 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1420 FAIL_IF(!inst); \
1421 *(inst + 1) |= (op_imm); \
1422 } \
1423 else { \
1424 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \
1425 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \
1426 FAIL_IF(!inst); \
1427 *inst = (op_mr); \
1428 }
1429
1430 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1431 FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
1432
1433 #else
1434
1435 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1436 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1437 FAIL_IF(!inst); \
1438 *(inst + 1) |= (op_imm);
1439
1440 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1441 FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
1442
1443 #endif
1444
emit_cum_binary(struct sljit_compiler * compiler,sljit_ub op_rm,sljit_ub op_mr,sljit_ub op_imm,sljit_ub op_eax_imm,sljit_si dst,sljit_sw dstw,sljit_si src1,sljit_sw src1w,sljit_si src2,sljit_sw src2w)1445 static sljit_si emit_cum_binary(struct sljit_compiler *compiler,
1446 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
1447 sljit_si dst, sljit_sw dstw,
1448 sljit_si src1, sljit_sw src1w,
1449 sljit_si src2, sljit_sw src2w)
1450 {
1451 sljit_ub* inst;
1452
1453 if (dst == SLJIT_UNUSED) {
1454 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1455 if (src2 & SLJIT_IMM) {
1456 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1457 }
1458 else {
1459 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1460 FAIL_IF(!inst);
1461 *inst = op_rm;
1462 }
1463 return SLJIT_SUCCESS;
1464 }
1465
1466 if (dst == src1 && dstw == src1w) {
1467 if (src2 & SLJIT_IMM) {
1468 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1469 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1470 #else
1471 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1472 #endif
1473 BINARY_EAX_IMM(op_eax_imm, src2w);
1474 }
1475 else {
1476 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1477 }
1478 }
1479 else if (FAST_IS_REG(dst)) {
1480 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1481 FAIL_IF(!inst);
1482 *inst = op_rm;
1483 }
1484 else if (FAST_IS_REG(src2)) {
1485 /* Special exception for sljit_emit_op_flags. */
1486 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1487 FAIL_IF(!inst);
1488 *inst = op_mr;
1489 }
1490 else {
1491 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1492 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1493 FAIL_IF(!inst);
1494 *inst = op_mr;
1495 }
1496 return SLJIT_SUCCESS;
1497 }
1498
1499 /* Only for cumulative operations. */
1500 if (dst == src2 && dstw == src2w) {
1501 if (src1 & SLJIT_IMM) {
1502 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1503 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1504 #else
1505 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) {
1506 #endif
1507 BINARY_EAX_IMM(op_eax_imm, src1w);
1508 }
1509 else {
1510 BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
1511 }
1512 }
1513 else if (FAST_IS_REG(dst)) {
1514 inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
1515 FAIL_IF(!inst);
1516 *inst = op_rm;
1517 }
1518 else if (FAST_IS_REG(src1)) {
1519 inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
1520 FAIL_IF(!inst);
1521 *inst = op_mr;
1522 }
1523 else {
1524 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1525 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1526 FAIL_IF(!inst);
1527 *inst = op_mr;
1528 }
1529 return SLJIT_SUCCESS;
1530 }
1531
1532 /* General version. */
1533 if (FAST_IS_REG(dst)) {
1534 EMIT_MOV(compiler, dst, 0, src1, src1w);
1535 if (src2 & SLJIT_IMM) {
1536 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1537 }
1538 else {
1539 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1540 FAIL_IF(!inst);
1541 *inst = op_rm;
1542 }
1543 }
1544 else {
1545 /* This version requires less memory writing. */
1546 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1547 if (src2 & SLJIT_IMM) {
1548 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1549 }
1550 else {
1551 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1552 FAIL_IF(!inst);
1553 *inst = op_rm;
1554 }
1555 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1556 }
1557
1558 return SLJIT_SUCCESS;
1559 }
1560
1561 static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler,
1562 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
1563 sljit_si dst, sljit_sw dstw,
1564 sljit_si src1, sljit_sw src1w,
1565 sljit_si src2, sljit_sw src2w)
1566 {
1567 sljit_ub* inst;
1568
1569 if (dst == SLJIT_UNUSED) {
1570 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1571 if (src2 & SLJIT_IMM) {
1572 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1573 }
1574 else {
1575 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1576 FAIL_IF(!inst);
1577 *inst = op_rm;
1578 }
1579 return SLJIT_SUCCESS;
1580 }
1581
1582 if (dst == src1 && dstw == src1w) {
1583 if (src2 & SLJIT_IMM) {
1584 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1585 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1586 #else
1587 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1588 #endif
1589 BINARY_EAX_IMM(op_eax_imm, src2w);
1590 }
1591 else {
1592 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1593 }
1594 }
1595 else if (FAST_IS_REG(dst)) {
1596 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1597 FAIL_IF(!inst);
1598 *inst = op_rm;
1599 }
1600 else if (FAST_IS_REG(src2)) {
1601 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1602 FAIL_IF(!inst);
1603 *inst = op_mr;
1604 }
1605 else {
1606 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1607 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1608 FAIL_IF(!inst);
1609 *inst = op_mr;
1610 }
1611 return SLJIT_SUCCESS;
1612 }
1613
1614 /* General version. */
1615 if (FAST_IS_REG(dst) && dst != src2) {
1616 EMIT_MOV(compiler, dst, 0, src1, src1w);
1617 if (src2 & SLJIT_IMM) {
1618 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1619 }
1620 else {
1621 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1622 FAIL_IF(!inst);
1623 *inst = op_rm;
1624 }
1625 }
1626 else {
1627 /* This version requires less memory writing. */
1628 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1629 if (src2 & SLJIT_IMM) {
1630 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1631 }
1632 else {
1633 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1634 FAIL_IF(!inst);
1635 *inst = op_rm;
1636 }
1637 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1638 }
1639
1640 return SLJIT_SUCCESS;
1641 }
1642
1643 static sljit_si emit_mul(struct sljit_compiler *compiler,
1644 sljit_si dst, sljit_sw dstw,
1645 sljit_si src1, sljit_sw src1w,
1646 sljit_si src2, sljit_sw src2w)
1647 {
1648 sljit_ub* inst;
1649 sljit_si dst_r;
1650
1651 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1652
1653 /* Register destination. */
1654 if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
1655 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1656 FAIL_IF(!inst);
1657 *inst++ = GROUP_0F;
1658 *inst = IMUL_r_rm;
1659 }
1660 else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
1661 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
1662 FAIL_IF(!inst);
1663 *inst++ = GROUP_0F;
1664 *inst = IMUL_r_rm;
1665 }
1666 else if (src1 & SLJIT_IMM) {
1667 if (src2 & SLJIT_IMM) {
1668 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
1669 src2 = dst_r;
1670 src2w = 0;
1671 }
1672
1673 if (src1w <= 127 && src1w >= -128) {
1674 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1675 FAIL_IF(!inst);
1676 *inst = IMUL_r_rm_i8;
1677 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
1678 FAIL_IF(!inst);
1679 INC_SIZE(1);
1680 *inst = (sljit_sb)src1w;
1681 }
1682 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1683 else {
1684 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1685 FAIL_IF(!inst);
1686 *inst = IMUL_r_rm_i32;
1687 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1688 FAIL_IF(!inst);
1689 INC_SIZE(4);
1690 *(sljit_sw*)inst = src1w;
1691 }
1692 #else
1693 else if (IS_HALFWORD(src1w)) {
1694 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1695 FAIL_IF(!inst);
1696 *inst = IMUL_r_rm_i32;
1697 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1698 FAIL_IF(!inst);
1699 INC_SIZE(4);
1700 *(sljit_si*)inst = (sljit_si)src1w;
1701 }
1702 else {
1703 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1704 if (dst_r != src2)
1705 EMIT_MOV(compiler, dst_r, 0, src2, src2w);
1706 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1707 FAIL_IF(!inst);
1708 *inst++ = GROUP_0F;
1709 *inst = IMUL_r_rm;
1710 }
1711 #endif
1712 }
1713 else if (src2 & SLJIT_IMM) {
1714 /* Note: src1 is NOT immediate. */
1715
1716 if (src2w <= 127 && src2w >= -128) {
1717 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1718 FAIL_IF(!inst);
1719 *inst = IMUL_r_rm_i8;
1720 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
1721 FAIL_IF(!inst);
1722 INC_SIZE(1);
1723 *inst = (sljit_sb)src2w;
1724 }
1725 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1726 else {
1727 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1728 FAIL_IF(!inst);
1729 *inst = IMUL_r_rm_i32;
1730 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1731 FAIL_IF(!inst);
1732 INC_SIZE(4);
1733 *(sljit_sw*)inst = src2w;
1734 }
1735 #else
1736 else if (IS_HALFWORD(src2w)) {
1737 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1738 FAIL_IF(!inst);
1739 *inst = IMUL_r_rm_i32;
1740 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1741 FAIL_IF(!inst);
1742 INC_SIZE(4);
1743 *(sljit_si*)inst = (sljit_si)src2w;
1744 }
1745 else {
1746 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1747 if (dst_r != src1)
1748 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1749 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1750 FAIL_IF(!inst);
1751 *inst++ = GROUP_0F;
1752 *inst = IMUL_r_rm;
1753 }
1754 #endif
1755 }
1756 else {
1757 /* Neither argument is immediate. */
1758 if (ADDRESSING_DEPENDS_ON(src2, dst_r))
1759 dst_r = TMP_REG1;
1760 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1761 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1762 FAIL_IF(!inst);
1763 *inst++ = GROUP_0F;
1764 *inst = IMUL_r_rm;
1765 }
1766
1767 if (dst_r == TMP_REG1)
1768 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1769
1770 return SLJIT_SUCCESS;
1771 }
1772
1773 static sljit_si emit_lea_binary(struct sljit_compiler *compiler, sljit_si keep_flags,
1774 sljit_si dst, sljit_sw dstw,
1775 sljit_si src1, sljit_sw src1w,
1776 sljit_si src2, sljit_sw src2w)
1777 {
1778 sljit_ub* inst;
1779 sljit_si dst_r, done = 0;
1780
1781 /* These cases better be left to handled by normal way. */
1782 if (!keep_flags) {
1783 if (dst == src1 && dstw == src1w)
1784 return SLJIT_ERR_UNSUPPORTED;
1785 if (dst == src2 && dstw == src2w)
1786 return SLJIT_ERR_UNSUPPORTED;
1787 }
1788
1789 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1790
1791 if (FAST_IS_REG(src1)) {
1792 if (FAST_IS_REG(src2)) {
1793 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
1794 FAIL_IF(!inst);
1795 *inst = LEA_r_m;
1796 done = 1;
1797 }
1798 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1799 if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1800 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_si)src2w);
1801 #else
1802 if (src2 & SLJIT_IMM) {
1803 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
1804 #endif
1805 FAIL_IF(!inst);
1806 *inst = LEA_r_m;
1807 done = 1;
1808 }
1809 }
1810 else if (FAST_IS_REG(src2)) {
1811 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1812 if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1813 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_si)src1w);
1814 #else
1815 if (src1 & SLJIT_IMM) {
1816 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
1817 #endif
1818 FAIL_IF(!inst);
1819 *inst = LEA_r_m;
1820 done = 1;
1821 }
1822 }
1823
1824 if (done) {
1825 if (dst_r == TMP_REG1)
1826 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
1827 return SLJIT_SUCCESS;
1828 }
1829 return SLJIT_ERR_UNSUPPORTED;
1830 }
1831
1832 static sljit_si emit_cmp_binary(struct sljit_compiler *compiler,
1833 sljit_si src1, sljit_sw src1w,
1834 sljit_si src2, sljit_sw src2w)
1835 {
1836 sljit_ub* inst;
1837
1838 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1839 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1840 #else
1841 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1842 #endif
1843 BINARY_EAX_IMM(CMP_EAX_i32, src2w);
1844 return SLJIT_SUCCESS;
1845 }
1846
1847 if (FAST_IS_REG(src1)) {
1848 if (src2 & SLJIT_IMM) {
1849 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
1850 }
1851 else {
1852 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1853 FAIL_IF(!inst);
1854 *inst = CMP_r_rm;
1855 }
1856 return SLJIT_SUCCESS;
1857 }
1858
1859 if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) {
1860 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1861 FAIL_IF(!inst);
1862 *inst = CMP_rm_r;
1863 return SLJIT_SUCCESS;
1864 }
1865
1866 if (src2 & SLJIT_IMM) {
1867 if (src1 & SLJIT_IMM) {
1868 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1869 src1 = TMP_REG1;
1870 src1w = 0;
1871 }
1872 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
1873 }
1874 else {
1875 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1876 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1877 FAIL_IF(!inst);
1878 *inst = CMP_r_rm;
1879 }
1880 return SLJIT_SUCCESS;
1881 }
1882
1883 static sljit_si emit_test_binary(struct sljit_compiler *compiler,
1884 sljit_si src1, sljit_sw src1w,
1885 sljit_si src2, sljit_sw src2w)
1886 {
1887 sljit_ub* inst;
1888
1889 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1890 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1891 #else
1892 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1893 #endif
1894 BINARY_EAX_IMM(TEST_EAX_i32, src2w);
1895 return SLJIT_SUCCESS;
1896 }
1897
1898 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1899 if (src2 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1900 #else
1901 if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
1902 #endif
1903 BINARY_EAX_IMM(TEST_EAX_i32, src1w);
1904 return SLJIT_SUCCESS;
1905 }
1906
1907 if (FAST_IS_REG(src1)) {
1908 if (src2 & SLJIT_IMM) {
1909 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1910 if (IS_HALFWORD(src2w) || compiler->mode32) {
1911 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
1912 FAIL_IF(!inst);
1913 *inst = GROUP_F7;
1914 }
1915 else {
1916 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1917 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, 0);
1918 FAIL_IF(!inst);
1919 *inst = TEST_rm_r;
1920 }
1921 #else
1922 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
1923 FAIL_IF(!inst);
1924 *inst = GROUP_F7;
1925 #endif
1926 }
1927 else {
1928 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1929 FAIL_IF(!inst);
1930 *inst = TEST_rm_r;
1931 }
1932 return SLJIT_SUCCESS;
1933 }
1934
1935 if (FAST_IS_REG(src2)) {
1936 if (src1 & SLJIT_IMM) {
1937 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1938 if (IS_HALFWORD(src1w) || compiler->mode32) {
1939 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, 0);
1940 FAIL_IF(!inst);
1941 *inst = GROUP_F7;
1942 }
1943 else {
1944 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
1945 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, 0);
1946 FAIL_IF(!inst);
1947 *inst = TEST_rm_r;
1948 }
1949 #else
1950 inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, 0);
1951 FAIL_IF(!inst);
1952 *inst = GROUP_F7;
1953 #endif
1954 }
1955 else {
1956 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1957 FAIL_IF(!inst);
1958 *inst = TEST_rm_r;
1959 }
1960 return SLJIT_SUCCESS;
1961 }
1962
1963 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1964 if (src2 & SLJIT_IMM) {
1965 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1966 if (IS_HALFWORD(src2w) || compiler->mode32) {
1967 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
1968 FAIL_IF(!inst);
1969 *inst = GROUP_F7;
1970 }
1971 else {
1972 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1973 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);
1974 FAIL_IF(!inst);
1975 *inst = TEST_rm_r;
1976 }
1977 #else
1978 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
1979 FAIL_IF(!inst);
1980 *inst = GROUP_F7;
1981 #endif
1982 }
1983 else {
1984 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1985 FAIL_IF(!inst);
1986 *inst = TEST_rm_r;
1987 }
1988 return SLJIT_SUCCESS;
1989 }
1990
1991 static sljit_si emit_shift(struct sljit_compiler *compiler,
1992 sljit_ub mode,
1993 sljit_si dst, sljit_sw dstw,
1994 sljit_si src1, sljit_sw src1w,
1995 sljit_si src2, sljit_sw src2w)
1996 {
1997 sljit_ub* inst;
1998
1999 if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
2000 if (dst == src1 && dstw == src1w) {
2001 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
2002 FAIL_IF(!inst);
2003 *inst |= mode;
2004 return SLJIT_SUCCESS;
2005 }
2006 if (dst == SLJIT_UNUSED) {
2007 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2008 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2009 FAIL_IF(!inst);
2010 *inst |= mode;
2011 return SLJIT_SUCCESS;
2012 }
2013 if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
2014 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2015 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2016 FAIL_IF(!inst);
2017 *inst |= mode;
2018 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2019 return SLJIT_SUCCESS;
2020 }
2021 if (FAST_IS_REG(dst)) {
2022 EMIT_MOV(compiler, dst, 0, src1, src1w);
2023 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
2024 FAIL_IF(!inst);
2025 *inst |= mode;
2026 return SLJIT_SUCCESS;
2027 }
2028
2029 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2030 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2031 FAIL_IF(!inst);
2032 *inst |= mode;
2033 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2034 return SLJIT_SUCCESS;
2035 }
2036
2037 if (dst == SLJIT_PREF_SHIFT_REG) {
2038 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2039 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2040 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2041 FAIL_IF(!inst);
2042 *inst |= mode;
2043 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2044 }
2045 else if (FAST_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
2046 if (src1 != dst)
2047 EMIT_MOV(compiler, dst, 0, src1, src1w);
2048 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2049 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2050 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
2051 FAIL_IF(!inst);
2052 *inst |= mode;
2053 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2054 }
2055 else {
2056 /* This case is really difficult, since ecx itself may used for
2057 addressing, and we must ensure to work even in that case. */
2058 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2059 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2060 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
2061 #else
2062 /* [esp+0] contains the flags. */
2063 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0);
2064 #endif
2065 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2066 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2067 FAIL_IF(!inst);
2068 *inst |= mode;
2069 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2070 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
2071 #else
2072 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw));
2073 #endif
2074 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2075 }
2076
2077 return SLJIT_SUCCESS;
2078 }
2079
2080 static sljit_si emit_shift_with_flags(struct sljit_compiler *compiler,
2081 sljit_ub mode, sljit_si set_flags,
2082 sljit_si dst, sljit_sw dstw,
2083 sljit_si src1, sljit_sw src1w,
2084 sljit_si src2, sljit_sw src2w)
2085 {
2086 /* The CPU does not set flags if the shift count is 0. */
2087 if (src2 & SLJIT_IMM) {
2088 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2089 if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
2090 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2091 #else
2092 if ((src2w & 0x1f) != 0)
2093 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2094 #endif
2095 if (!set_flags)
2096 return emit_mov(compiler, dst, dstw, src1, src1w);
2097 /* OR dst, src, 0 */
2098 return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
2099 dst, dstw, src1, src1w, SLJIT_IMM, 0);
2100 }
2101
2102 if (!set_flags)
2103 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2104
2105 if (!FAST_IS_REG(dst))
2106 FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
2107
2108 FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w));
2109
2110 if (FAST_IS_REG(dst))
2111 return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
2112 return SLJIT_SUCCESS;
2113 }
2114
2115 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
2116 sljit_si dst, sljit_sw dstw,
2117 sljit_si src1, sljit_sw src1w,
2118 sljit_si src2, sljit_sw src2w)
2119 {
2120 CHECK_ERROR();
2121 check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2122 ADJUST_LOCAL_OFFSET(dst, dstw);
2123 ADJUST_LOCAL_OFFSET(src1, src1w);
2124 ADJUST_LOCAL_OFFSET(src2, src2w);
2125
2126 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2127 CHECK_EXTRA_REGS(src1, src1w, (void)0);
2128 CHECK_EXTRA_REGS(src2, src2w, (void)0);
2129 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2130 compiler->mode32 = op & SLJIT_INT_OP;
2131 #endif
2132
2133 if (GET_OPCODE(op) >= SLJIT_MUL) {
2134 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2135 compiler->flags_saved = 0;
2136 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2137 FAIL_IF(emit_save_flags(compiler));
2138 }
2139
2140 switch (GET_OPCODE(op)) {
2141 case SLJIT_ADD:
2142 if (!GET_FLAGS(op)) {
2143 if (emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
2144 return compiler->error;
2145 }
2146 else
2147 compiler->flags_saved = 0;
2148 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2149 FAIL_IF(emit_save_flags(compiler));
2150 return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
2151 dst, dstw, src1, src1w, src2, src2w);
2152 case SLJIT_ADDC:
2153 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
2154 FAIL_IF(emit_restore_flags(compiler, 1));
2155 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
2156 FAIL_IF(emit_save_flags(compiler));
2157 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2158 compiler->flags_saved = 0;
2159 return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32,
2160 dst, dstw, src1, src1w, src2, src2w);
2161 case SLJIT_SUB:
2162 if (!GET_FLAGS(op)) {
2163 if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
2164 return compiler->error;
2165 }
2166 else
2167 compiler->flags_saved = 0;
2168 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2169 FAIL_IF(emit_save_flags(compiler));
2170 if (dst == SLJIT_UNUSED)
2171 return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
2172 return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
2173 dst, dstw, src1, src1w, src2, src2w);
2174 case SLJIT_SUBC:
2175 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
2176 FAIL_IF(emit_restore_flags(compiler, 1));
2177 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
2178 FAIL_IF(emit_save_flags(compiler));
2179 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2180 compiler->flags_saved = 0;
2181 return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32,
2182 dst, dstw, src1, src1w, src2, src2w);
2183 case SLJIT_MUL:
2184 return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
2185 case SLJIT_AND:
2186 if (dst == SLJIT_UNUSED)
2187 return emit_test_binary(compiler, src1, src1w, src2, src2w);
2188 return emit_cum_binary(compiler, AND_r_rm, AND_rm_r, AND, AND_EAX_i32,
2189 dst, dstw, src1, src1w, src2, src2w);
2190 case SLJIT_OR:
2191 return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
2192 dst, dstw, src1, src1w, src2, src2w);
2193 case SLJIT_XOR:
2194 return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32,
2195 dst, dstw, src1, src1w, src2, src2w);
2196 case SLJIT_SHL:
2197 return emit_shift_with_flags(compiler, SHL, GET_FLAGS(op),
2198 dst, dstw, src1, src1w, src2, src2w);
2199 case SLJIT_LSHR:
2200 return emit_shift_with_flags(compiler, SHR, GET_FLAGS(op),
2201 dst, dstw, src1, src1w, src2, src2w);
2202 case SLJIT_ASHR:
2203 return emit_shift_with_flags(compiler, SAR, GET_FLAGS(op),
2204 dst, dstw, src1, src1w, src2, src2w);
2205 }
2206
2207 return SLJIT_SUCCESS;
2208 }
2209
2210 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
2211 {
2212 check_sljit_get_register_index(reg);
2213 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2214 if (reg >= SLJIT_R3 && reg <= SLJIT_R6)
2215 return -1;
2216 #endif
2217 return reg_map[reg];
2218 }
2219
2220 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
2221 {
2222 check_sljit_get_float_register_index(reg);
2223 return reg;
2224 }
2225
2226 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
2227 void *instruction, sljit_si size)
2228 {
2229 sljit_ub *inst;
2230
2231 CHECK_ERROR();
2232 check_sljit_emit_op_custom(compiler, instruction, size);
2233 SLJIT_ASSERT(size > 0 && size < 16);
2234
2235 inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
2236 FAIL_IF(!inst);
2237 INC_SIZE(size);
2238 SLJIT_MEMMOVE(inst, instruction, size);
2239 return SLJIT_SUCCESS;
2240 }
2241
2242 /* --------------------------------------------------------------------- */
2243 /* Floating point operators */
2244 /* --------------------------------------------------------------------- */
2245
2246 /* Alignment + 2 * 16 bytes. */
2247 static sljit_si sse2_data[3 + (4 + 4) * 2];
2248 static sljit_si *sse2_buffer;
2249
2250 static void init_compiler(void)
2251 {
2252 sse2_buffer = (sljit_si*)(((sljit_uw)sse2_data + 15) & ~0xf);
2253 /* Single precision constants. */
2254 sse2_buffer[0] = 0x80000000;
2255 sse2_buffer[4] = 0x7fffffff;
2256 /* Double precision constants. */
2257 sse2_buffer[8] = 0;
2258 sse2_buffer[9] = 0x80000000;
2259 sse2_buffer[12] = 0xffffffff;
2260 sse2_buffer[13] = 0x7fffffff;
2261 }
2262
2263 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
2264 {
2265 #ifdef SLJIT_IS_FPU_AVAILABLE
2266 return SLJIT_IS_FPU_AVAILABLE;
2267 #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
2268 if (cpu_has_sse2 == -1)
2269 get_cpu_features();
2270 return cpu_has_sse2;
2271 #else /* SLJIT_DETECT_SSE2 */
2272 return 1;
2273 #endif /* SLJIT_DETECT_SSE2 */
2274 }
2275
2276 static sljit_si emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode,
2277 sljit_si single, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w)
2278 {
2279 sljit_ub *inst;
2280
2281 inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2282 FAIL_IF(!inst);
2283 *inst++ = GROUP_0F;
2284 *inst = opcode;
2285 return SLJIT_SUCCESS;
2286 }
2287
2288 static sljit_si emit_sse2_logic(struct sljit_compiler *compiler, sljit_ub opcode,
2289 sljit_si pref66, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w)
2290 {
2291 sljit_ub *inst;
2292
2293 inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2294 FAIL_IF(!inst);
2295 *inst++ = GROUP_0F;
2296 *inst = opcode;
2297 return SLJIT_SUCCESS;
2298 }
2299
2300 static SLJIT_INLINE sljit_si emit_sse2_load(struct sljit_compiler *compiler,
2301 sljit_si single, sljit_si dst, sljit_si src, sljit_sw srcw)
2302 {
2303 return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw);
2304 }
2305
2306 static SLJIT_INLINE sljit_si emit_sse2_store(struct sljit_compiler *compiler,
2307 sljit_si single, sljit_si dst, sljit_sw dstw, sljit_si src)
2308 {
2309 return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
2310 }
2311
2312 static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op,
2313 sljit_si dst, sljit_sw dstw,
2314 sljit_si src, sljit_sw srcw)
2315 {
2316 sljit_si dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
2317 sljit_ub *inst;
2318
2319 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2320 if (GET_OPCODE(op) == SLJIT_CONVW_FROMD)
2321 compiler->mode32 = 0;
2322 #endif
2323
2324 inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_SINGLE_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw);
2325 FAIL_IF(!inst);
2326 *inst++ = GROUP_0F;
2327 *inst = CVTTSD2SI_r_xm;
2328
2329 if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED)
2330 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2331 return SLJIT_SUCCESS;
2332 }
2333
2334 static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op,
2335 sljit_si dst, sljit_sw dstw,
2336 sljit_si src, sljit_sw srcw)
2337 {
2338 sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2339 sljit_ub *inst;
2340
2341 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2342 if (GET_OPCODE(op) == SLJIT_CONVD_FROMW)
2343 compiler->mode32 = 0;
2344 #endif
2345
2346 if (src & SLJIT_IMM) {
2347 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2348 if (GET_OPCODE(op) == SLJIT_CONVD_FROMI)
2349 srcw = (sljit_si)srcw;
2350 #endif
2351 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
2352 src = TMP_REG1;
2353 srcw = 0;
2354 }
2355
2356 inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_SINGLE_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw);
2357 FAIL_IF(!inst);
2358 *inst++ = GROUP_0F;
2359 *inst = CVTSI2SD_x_rm;
2360
2361 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2362 compiler->mode32 = 1;
2363 #endif
2364 if (dst_r == TMP_FREG)
2365 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2366 return SLJIT_SUCCESS;
2367 }
2368
2369 static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op,
2370 sljit_si src1, sljit_sw src1w,
2371 sljit_si src2, sljit_sw src2w)
2372 {
2373 compiler->flags_saved = 0;
2374 if (!FAST_IS_REG(src1)) {
2375 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
2376 src1 = TMP_FREG;
2377 }
2378 return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_SINGLE_OP), src1, src2, src2w);
2379 }
2380
2381 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
2382 sljit_si dst, sljit_sw dstw,
2383 sljit_si src, sljit_sw srcw)
2384 {
2385 sljit_si dst_r;
2386
2387 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2388 compiler->mode32 = 1;
2389 #endif
2390
2391 CHECK_ERROR();
2392 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2393
2394 if (GET_OPCODE(op) == SLJIT_MOVD) {
2395 if (FAST_IS_REG(dst))
2396 return emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst, src, srcw);
2397 if (FAST_IS_REG(src))
2398 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, src);
2399 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src, srcw));
2400 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2401 }
2402
2403 if (GET_OPCODE(op) == SLJIT_CONVD_FROMS) {
2404 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2405 if (FAST_IS_REG(src)) {
2406 /* We overwrite the high bits of source. From SLJIT point of view,
2407 this is not an issue.
2408 Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
2409 FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_SINGLE_OP, src, src, 0));
2410 }
2411 else {
2412 FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_SINGLE_OP), TMP_FREG, src, srcw));
2413 src = TMP_FREG;
2414 }
2415
2416 FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_SINGLE_OP, dst_r, src, 0));
2417 if (dst_r == TMP_FREG)
2418 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2419 return SLJIT_SUCCESS;
2420 }
2421
2422 if (SLOW_IS_REG(dst)) {
2423 dst_r = dst;
2424 if (dst != src)
2425 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw));
2426 }
2427 else {
2428 dst_r = TMP_FREG;
2429 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw));
2430 }
2431
2432 switch (GET_OPCODE(op)) {
2433 case SLJIT_NEGD:
2434 FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer : sse2_buffer + 8)));
2435 break;
2436
2437 case SLJIT_ABSD:
2438 FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer + 4 : sse2_buffer + 12)));
2439 break;
2440 }
2441
2442 if (dst_r == TMP_FREG)
2443 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2444 return SLJIT_SUCCESS;
2445 }
2446
2447 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
2448 sljit_si dst, sljit_sw dstw,
2449 sljit_si src1, sljit_sw src1w,
2450 sljit_si src2, sljit_sw src2w)
2451 {
2452 sljit_si dst_r;
2453
2454 CHECK_ERROR();
2455 check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2456 ADJUST_LOCAL_OFFSET(dst, dstw);
2457 ADJUST_LOCAL_OFFSET(src1, src1w);
2458 ADJUST_LOCAL_OFFSET(src2, src2w);
2459
2460 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2461 compiler->mode32 = 1;
2462 #endif
2463
2464 if (FAST_IS_REG(dst)) {
2465 dst_r = dst;
2466 if (dst == src1)
2467 ; /* Do nothing here. */
2468 else if (dst == src2 && (op == SLJIT_ADDD || op == SLJIT_MULD)) {
2469 /* Swap arguments. */
2470 src2 = src1;
2471 src2w = src1w;
2472 }
2473 else if (dst != src2)
2474 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src1, src1w));
2475 else {
2476 dst_r = TMP_FREG;
2477 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
2478 }
2479 }
2480 else {
2481 dst_r = TMP_FREG;
2482 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
2483 }
2484
2485 switch (GET_OPCODE(op)) {
2486 case SLJIT_ADDD:
2487 FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2488 break;
2489
2490 case SLJIT_SUBD:
2491 FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2492 break;
2493
2494 case SLJIT_MULD:
2495 FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2496 break;
2497
2498 case SLJIT_DIVD:
2499 FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2500 break;
2501 }
2502
2503 if (dst_r == TMP_FREG)
2504 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2505 return SLJIT_SUCCESS;
2506 }
2507
2508 /* --------------------------------------------------------------------- */
2509 /* Conditional instructions */
2510 /* --------------------------------------------------------------------- */
2511
2512 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2513 {
2514 sljit_ub *inst;
2515 struct sljit_label *label;
2516
2517 CHECK_ERROR_PTR();
2518 check_sljit_emit_label(compiler);
2519
2520 /* We should restore the flags before the label,
2521 since other taken jumps has their own flags as well. */
2522 if (SLJIT_UNLIKELY(compiler->flags_saved))
2523 PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2524
2525 if (compiler->last_label && compiler->last_label->size == compiler->size)
2526 return compiler->last_label;
2527
2528 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2529 PTR_FAIL_IF(!label);
2530 set_label(label, compiler);
2531
2532 inst = (sljit_ub*)ensure_buf(compiler, 2);
2533 PTR_FAIL_IF(!inst);
2534
2535 *inst++ = 0;
2536 *inst++ = 0;
2537
2538 return label;
2539 }
2540
2541 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
2542 {
2543 sljit_ub *inst;
2544 struct sljit_jump *jump;
2545
2546 CHECK_ERROR_PTR();
2547 check_sljit_emit_jump(compiler, type);
2548
2549 if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2550 if ((type & 0xff) <= SLJIT_JUMP)
2551 PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2552 compiler->flags_saved = 0;
2553 }
2554
2555 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2556 PTR_FAIL_IF_NULL(jump);
2557 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2558 type &= 0xff;
2559
2560 if (type >= SLJIT_CALL1)
2561 PTR_FAIL_IF(call_with_args(compiler, type));
2562
2563 /* Worst case size. */
2564 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2565 compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
2566 #else
2567 compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
2568 #endif
2569
2570 inst = (sljit_ub*)ensure_buf(compiler, 2);
2571 PTR_FAIL_IF_NULL(inst);
2572
2573 *inst++ = 0;
2574 *inst++ = type + 4;
2575 return jump;
2576 }
2577
2578 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
2579 {
2580 sljit_ub *inst;
2581 struct sljit_jump *jump;
2582
2583 CHECK_ERROR();
2584 check_sljit_emit_ijump(compiler, type, src, srcw);
2585 ADJUST_LOCAL_OFFSET(src, srcw);
2586
2587 CHECK_EXTRA_REGS(src, srcw, (void)0);
2588
2589 if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2590 if (type <= SLJIT_JUMP)
2591 FAIL_IF(emit_restore_flags(compiler, 0));
2592 compiler->flags_saved = 0;
2593 }
2594
2595 if (type >= SLJIT_CALL1) {
2596 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2597 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
2598 if (src == SLJIT_R2) {
2599 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
2600 src = TMP_REG1;
2601 }
2602 if (src == SLJIT_MEM1(SLJIT_SP) && type >= SLJIT_CALL3)
2603 srcw += sizeof(sljit_sw);
2604 #endif
2605 #endif
2606 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
2607 if (src == SLJIT_R2) {
2608 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
2609 src = TMP_REG1;
2610 }
2611 #endif
2612 FAIL_IF(call_with_args(compiler, type));
2613 }
2614
2615 if (src == SLJIT_IMM) {
2616 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2617 FAIL_IF_NULL(jump);
2618 set_jump(jump, compiler, JUMP_ADDR);
2619 jump->u.target = srcw;
2620
2621 /* Worst case size. */
2622 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2623 compiler->size += 5;
2624 #else
2625 compiler->size += 10 + 3;
2626 #endif
2627
2628 inst = (sljit_ub*)ensure_buf(compiler, 2);
2629 FAIL_IF_NULL(inst);
2630
2631 *inst++ = 0;
2632 *inst++ = type + 4;
2633 }
2634 else {
2635 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2636 /* REX_W is not necessary (src is not immediate). */
2637 compiler->mode32 = 1;
2638 #endif
2639 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2640 FAIL_IF(!inst);
2641 *inst++ = GROUP_FF;
2642 *inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm;
2643 }
2644 return SLJIT_SUCCESS;
2645 }
2646
2647 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
2648 sljit_si dst, sljit_sw dstw,
2649 sljit_si src, sljit_sw srcw,
2650 sljit_si type)
2651 {
2652 sljit_ub *inst;
2653 sljit_ub cond_set = 0;
2654 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2655 sljit_si reg;
2656 #else
2657 /* CHECK_EXTRA_REGS migh overwrite these values. */
2658 sljit_si dst_save = dst;
2659 sljit_sw dstw_save = dstw;
2660 #endif
2661
2662 CHECK_ERROR();
2663 check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type);
2664
2665 if (dst == SLJIT_UNUSED)
2666 return SLJIT_SUCCESS;
2667
2668 ADJUST_LOCAL_OFFSET(dst, dstw);
2669 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2670 if (SLJIT_UNLIKELY(compiler->flags_saved))
2671 FAIL_IF(emit_restore_flags(compiler, op & SLJIT_KEEP_FLAGS));
2672
2673 /* setcc = jcc + 0x10. */
2674 cond_set = get_jump_code(type) + 0x10;
2675
2676 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2677 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src) {
2678 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 3);
2679 FAIL_IF(!inst);
2680 INC_SIZE(4 + 3);
2681 /* Set low register to conditional flag. */
2682 *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
2683 *inst++ = GROUP_0F;
2684 *inst++ = cond_set;
2685 *inst++ = MOD_REG | reg_lmap[TMP_REG1];
2686 *inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B);
2687 *inst++ = OR_rm8_r8;
2688 *inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst];
2689 return SLJIT_SUCCESS;
2690 }
2691
2692 reg = (op == SLJIT_MOV && FAST_IS_REG(dst)) ? dst : TMP_REG1;
2693
2694 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4);
2695 FAIL_IF(!inst);
2696 INC_SIZE(4 + 4);
2697 /* Set low register to conditional flag. */
2698 *inst++ = (reg_map[reg] <= 7) ? REX : REX_B;
2699 *inst++ = GROUP_0F;
2700 *inst++ = cond_set;
2701 *inst++ = MOD_REG | reg_lmap[reg];
2702 *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
2703 *inst++ = GROUP_0F;
2704 *inst++ = MOVZX_r_rm8;
2705 *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg];
2706
2707 if (reg != TMP_REG1)
2708 return SLJIT_SUCCESS;
2709
2710 if (GET_OPCODE(op) < SLJIT_ADD) {
2711 compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
2712 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2713 }
2714 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
2715 compiler->skip_checks = 1;
2716 #endif
2717 return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REG1, 0);
2718 #else /* SLJIT_CONFIG_X86_64 */
2719 if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) {
2720 if (reg_map[dst] <= 4) {
2721 /* Low byte is accessible. */
2722 inst = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
2723 FAIL_IF(!inst);
2724 INC_SIZE(3 + 3);
2725 /* Set low byte to conditional flag. */
2726 *inst++ = GROUP_0F;
2727 *inst++ = cond_set;
2728 *inst++ = MOD_REG | reg_map[dst];
2729
2730 *inst++ = GROUP_0F;
2731 *inst++ = MOVZX_r_rm8;
2732 *inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst];
2733 return SLJIT_SUCCESS;
2734 }
2735
2736 /* Low byte is not accessible. */
2737 if (cpu_has_cmov == -1)
2738 get_cpu_features();
2739
2740 if (cpu_has_cmov) {
2741 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
2742 /* a xor reg, reg operation would overwrite the flags. */
2743 EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);
2744
2745 inst = (sljit_ub*)ensure_buf(compiler, 1 + 3);
2746 FAIL_IF(!inst);
2747 INC_SIZE(3);
2748
2749 *inst++ = GROUP_0F;
2750 /* cmovcc = setcc - 0x50. */
2751 *inst++ = cond_set - 0x50;
2752 *inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1];
2753 return SLJIT_SUCCESS;
2754 }
2755
2756 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2757 FAIL_IF(!inst);
2758 INC_SIZE(1 + 3 + 3 + 1);
2759 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2760 /* Set al to conditional flag. */
2761 *inst++ = GROUP_0F;
2762 *inst++ = cond_set;
2763 *inst++ = MOD_REG | 0 /* eax */;
2764
2765 *inst++ = GROUP_0F;
2766 *inst++ = MOVZX_r_rm8;
2767 *inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */;
2768 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2769 return SLJIT_SUCCESS;
2770 }
2771
2772 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src && reg_map[dst] <= 4) {
2773 SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] == 0, scratch_reg1_must_be_eax);
2774 if (dst != SLJIT_R0) {
2775 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
2776 FAIL_IF(!inst);
2777 INC_SIZE(1 + 3 + 2 + 1);
2778 /* Set low register to conditional flag. */
2779 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2780 *inst++ = GROUP_0F;
2781 *inst++ = cond_set;
2782 *inst++ = MOD_REG | 0 /* eax */;
2783 *inst++ = OR_rm8_r8;
2784 *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst];
2785 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2786 }
2787 else {
2788 inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2);
2789 FAIL_IF(!inst);
2790 INC_SIZE(2 + 3 + 2 + 2);
2791 /* Set low register to conditional flag. */
2792 *inst++ = XCHG_r_rm;
2793 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
2794 *inst++ = GROUP_0F;
2795 *inst++ = cond_set;
2796 *inst++ = MOD_REG | 1 /* ecx */;
2797 *inst++ = OR_rm8_r8;
2798 *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */;
2799 *inst++ = XCHG_r_rm;
2800 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
2801 }
2802 return SLJIT_SUCCESS;
2803 }
2804
2805 /* Set TMP_REG1 to the bit. */
2806 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2807 FAIL_IF(!inst);
2808 INC_SIZE(1 + 3 + 3 + 1);
2809 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2810 /* Set al to conditional flag. */
2811 *inst++ = GROUP_0F;
2812 *inst++ = cond_set;
2813 *inst++ = MOD_REG | 0 /* eax */;
2814
2815 *inst++ = GROUP_0F;
2816 *inst++ = MOVZX_r_rm8;
2817 *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */;
2818
2819 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2820
2821 if (GET_OPCODE(op) < SLJIT_ADD)
2822 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2823
2824 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
2825 compiler->skip_checks = 1;
2826 #endif
2827 return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
2828 #endif /* SLJIT_CONFIG_X86_64 */
2829 }
2830
2831 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset)
2832 {
2833 CHECK_ERROR();
2834 check_sljit_get_local_base(compiler, dst, dstw, offset);
2835 ADJUST_LOCAL_OFFSET(dst, dstw);
2836
2837 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2838
2839 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2840 compiler->mode32 = 0;
2841 #endif
2842
2843 ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
2844
2845 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2846 if (NOT_HALFWORD(offset)) {
2847 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
2848 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
2849 SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
2850 return compiler->error;
2851 #else
2852 return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
2853 #endif
2854 }
2855 #endif
2856
2857 if (offset != 0)
2858 return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
2859 return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
2860 }
2861
2862 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
2863 {
2864 sljit_ub *inst;
2865 struct sljit_const *const_;
2866 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2867 sljit_si reg;
2868 #endif
2869
2870 CHECK_ERROR_PTR();
2871 check_sljit_emit_const(compiler, dst, dstw, init_value);
2872 ADJUST_LOCAL_OFFSET(dst, dstw);
2873
2874 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2875
2876 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2877 PTR_FAIL_IF(!const_);
2878 set_const(const_, compiler);
2879
2880 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2881 compiler->mode32 = 0;
2882 reg = SLOW_IS_REG(dst) ? dst : TMP_REG1;
2883
2884 if (emit_load_imm64(compiler, reg, init_value))
2885 return NULL;
2886 #else
2887 if (dst == SLJIT_UNUSED)
2888 dst = TMP_REG1;
2889
2890 if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
2891 return NULL;
2892 #endif
2893
2894 inst = (sljit_ub*)ensure_buf(compiler, 2);
2895 PTR_FAIL_IF(!inst);
2896
2897 *inst++ = 0;
2898 *inst++ = 1;
2899
2900 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2901 if (dst & SLJIT_MEM)
2902 if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
2903 return NULL;
2904 #endif
2905
2906 return const_;
2907 }
2908
2909 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
2910 {
2911 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2912 *(sljit_sw*)addr = new_addr - (addr + 4);
2913 #else
2914 *(sljit_uw*)addr = new_addr;
2915 #endif
2916 }
2917
2918 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
2919 {
2920 *(sljit_sw*)addr = new_constant;
2921 }
2922