1 /*
2  *    Stack-less Just-In-Time compiler
3  *
4  *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without modification, are
7  * permitted provided that the following conditions are met:
8  *
9  *   1. Redistributions of source code must retain the above copyright notice, this list of
10  *      conditions and the following disclaimer.
11  *
12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13  *      of conditions and the following disclaimer in the documentation and/or other materials
14  *      provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
sljit_get_platform_name(void)27 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
28 {
29 	return "x86" SLJIT_CPUINFO;
30 }
31 
32 /*
33    32b register indexes:
34      0 - EAX
35      1 - ECX
36      2 - EDX
37      3 - EBX
38      4 - none
39      5 - EBP
40      6 - ESI
41      7 - EDI
42 */
43 
44 /*
45    64b register indexes:
46      0 - RAX
47      1 - RCX
48      2 - RDX
49      3 - RBX
50      4 - none
51      5 - RBP
52      6 - RSI
53      7 - RDI
54      8 - R8   - From now on REX prefix is required
55      9 - R9
56     10 - R10
57     11 - R11
58     12 - R12
59     13 - R13
60     14 - R14
61     15 - R15
62 */
63 
64 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
65 
66 /* Last register + 1. */
67 #define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
68 
69 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
70 	0, 0, 2, 1, 0, 0, 0, 0, 7, 6, 3, 4, 5
71 };
72 
73 #define CHECK_EXTRA_REGS(p, w, do) \
74 	if (p >= SLJIT_R3 && p <= SLJIT_R6) { \
75 		w = SLJIT_LOCALS_OFFSET + ((p) - (SLJIT_R3 + 4)) * sizeof(sljit_sw); \
76 		p = SLJIT_MEM1(SLJIT_SP); \
77 		do; \
78 	}
79 
80 #else /* SLJIT_CONFIG_X86_32 */
81 
82 /* Last register + 1. */
83 #define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
84 #define TMP_REG2	(SLJIT_NUMBER_OF_REGISTERS + 3)
85 #define TMP_REG3	(SLJIT_NUMBER_OF_REGISTERS + 4)
86 
87 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
88    Note: avoid to use r12 and r13 for memory addessing
89    therefore r12 is better for SAVED_EREG than SAVED_REG. */
90 #ifndef _WIN64
91 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
92 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
93 	0, 0, 6, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 7, 9
94 };
95 /* low-map. reg_map & 0x7. */
96 static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
97 	0, 0, 6, 1, 0, 3,  2,  4,  5,  5,  6,  7, 3, 4, 2, 7, 1
98 };
99 #else
100 /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
101 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
102 	0, 0, 2, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 10, 8, 9
103 };
104 /* low-map. reg_map & 0x7. */
105 static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
106 	0, 0, 2, 1, 3,  4,  5,  5, 6,  7,  7, 6, 3, 4, 2,  0, 1
107 };
108 #endif
109 
110 #define REX_W		0x48
111 #define REX_R		0x44
112 #define REX_X		0x42
113 #define REX_B		0x41
114 #define REX		0x40
115 
116 #ifndef _WIN64
117 #define HALFWORD_MAX 0x7fffffffl
118 #define HALFWORD_MIN -0x80000000l
119 #else
120 #define HALFWORD_MAX 0x7fffffffll
121 #define HALFWORD_MIN -0x80000000ll
122 #endif
123 
124 #define IS_HALFWORD(x)		((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
125 #define NOT_HALFWORD(x)		((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
126 
127 #define CHECK_EXTRA_REGS(p, w, do)
128 
129 #endif /* SLJIT_CONFIG_X86_32 */
130 
131 #define TMP_FREG	(0)
132 
133 /* Size flags for emit_x86_instruction: */
134 #define EX86_BIN_INS		0x0010
135 #define EX86_SHIFT_INS		0x0020
136 #define EX86_REX		0x0040
137 #define EX86_NO_REXW		0x0080
138 #define EX86_BYTE_ARG		0x0100
139 #define EX86_HALF_ARG		0x0200
140 #define EX86_PREF_66		0x0400
141 #define EX86_PREF_F2		0x0800
142 #define EX86_PREF_F3		0x1000
143 #define EX86_SSE2_OP1		0x2000
144 #define EX86_SSE2_OP2		0x4000
145 #define EX86_SSE2		(EX86_SSE2_OP1 | EX86_SSE2_OP2)
146 
147 /* --------------------------------------------------------------------- */
148 /*  Instrucion forms                                                     */
149 /* --------------------------------------------------------------------- */
150 
151 #define ADD		(/* BINARY */ 0 << 3)
152 #define ADD_EAX_i32	0x05
153 #define ADD_r_rm	0x03
154 #define ADD_rm_r	0x01
155 #define ADDSD_x_xm	0x58
156 #define ADC		(/* BINARY */ 2 << 3)
157 #define ADC_EAX_i32	0x15
158 #define ADC_r_rm	0x13
159 #define ADC_rm_r	0x11
160 #define AND		(/* BINARY */ 4 << 3)
161 #define AND_EAX_i32	0x25
162 #define AND_r_rm	0x23
163 #define AND_rm_r	0x21
164 #define ANDPD_x_xm	0x54
165 #define BSR_r_rm	(/* GROUP_0F */ 0xbd)
166 #define CALL_i32	0xe8
167 #define CALL_rm		(/* GROUP_FF */ 2 << 3)
168 #define CDQ		0x99
169 #define CMOVNE_r_rm	(/* GROUP_0F */ 0x45)
170 #define CMP		(/* BINARY */ 7 << 3)
171 #define CMP_EAX_i32	0x3d
172 #define CMP_r_rm	0x3b
173 #define CMP_rm_r	0x39
174 #define CVTPD2PS_x_xm	0x5a
175 #define CVTSI2SD_x_rm	0x2a
176 #define CVTTSD2SI_r_xm	0x2c
177 #define DIV		(/* GROUP_F7 */ 6 << 3)
178 #define DIVSD_x_xm	0x5e
179 #define INT3		0xcc
180 #define IDIV		(/* GROUP_F7 */ 7 << 3)
181 #define IMUL		(/* GROUP_F7 */ 5 << 3)
182 #define IMUL_r_rm	(/* GROUP_0F */ 0xaf)
183 #define IMUL_r_rm_i8	0x6b
184 #define IMUL_r_rm_i32	0x69
185 #define JE_i8		0x74
186 #define JNE_i8		0x75
187 #define JMP_i8		0xeb
188 #define JMP_i32		0xe9
189 #define JMP_rm		(/* GROUP_FF */ 4 << 3)
190 #define LEA_r_m		0x8d
191 #define MOV_r_rm	0x8b
192 #define MOV_r_i32	0xb8
193 #define MOV_rm_r	0x89
194 #define MOV_rm_i32	0xc7
195 #define MOV_rm8_i8	0xc6
196 #define MOV_rm8_r8	0x88
197 #define MOVSD_x_xm	0x10
198 #define MOVSD_xm_x	0x11
199 #define MOVSXD_r_rm	0x63
200 #define MOVSX_r_rm8	(/* GROUP_0F */ 0xbe)
201 #define MOVSX_r_rm16	(/* GROUP_0F */ 0xbf)
202 #define MOVZX_r_rm8	(/* GROUP_0F */ 0xb6)
203 #define MOVZX_r_rm16	(/* GROUP_0F */ 0xb7)
204 #define MUL		(/* GROUP_F7 */ 4 << 3)
205 #define MULSD_x_xm	0x59
206 #define NEG_rm		(/* GROUP_F7 */ 3 << 3)
207 #define NOP		0x90
208 #define NOT_rm		(/* GROUP_F7 */ 2 << 3)
209 #define OR		(/* BINARY */ 1 << 3)
210 #define OR_r_rm		0x0b
211 #define OR_EAX_i32	0x0d
212 #define OR_rm_r		0x09
213 #define OR_rm8_r8	0x08
214 #define POP_r		0x58
215 #define POP_rm		0x8f
216 #define POPF		0x9d
217 #define PUSH_i32	0x68
218 #define PUSH_r		0x50
219 #define PUSH_rm		(/* GROUP_FF */ 6 << 3)
220 #define PUSHF		0x9c
221 #define RET_near	0xc3
222 #define RET_i16		0xc2
223 #define SBB		(/* BINARY */ 3 << 3)
224 #define SBB_EAX_i32	0x1d
225 #define SBB_r_rm	0x1b
226 #define SBB_rm_r	0x19
227 #define SAR		(/* SHIFT */ 7 << 3)
228 #define SHL		(/* SHIFT */ 4 << 3)
229 #define SHR		(/* SHIFT */ 5 << 3)
230 #define SUB		(/* BINARY */ 5 << 3)
231 #define SUB_EAX_i32	0x2d
232 #define SUB_r_rm	0x2b
233 #define SUB_rm_r	0x29
234 #define SUBSD_x_xm	0x5c
235 #define TEST_EAX_i32	0xa9
236 #define TEST_rm_r	0x85
237 #define UCOMISD_x_xm	0x2e
238 #define UNPCKLPD_x_xm	0x14
239 #define XCHG_EAX_r	0x90
240 #define XCHG_r_rm	0x87
241 #define XOR		(/* BINARY */ 6 << 3)
242 #define XOR_EAX_i32	0x35
243 #define XOR_r_rm	0x33
244 #define XOR_rm_r	0x31
245 #define XORPD_x_xm	0x57
246 
247 #define GROUP_0F	0x0f
248 #define GROUP_F7	0xf7
249 #define GROUP_FF	0xff
250 #define GROUP_BINARY_81	0x81
251 #define GROUP_BINARY_83	0x83
252 #define GROUP_SHIFT_1	0xd1
253 #define GROUP_SHIFT_N	0xc1
254 #define GROUP_SHIFT_CL	0xd3
255 
256 #define MOD_REG		0xc0
257 #define MOD_DISP8	0x40
258 
259 #define INC_SIZE(s)			(*inst++ = (s), compiler->size += (s))
260 
261 #define PUSH_REG(r)			(*inst++ = (PUSH_r + (r)))
262 #define POP_REG(r)			(*inst++ = (POP_r + (r)))
263 #define RET()				(*inst++ = (RET_near))
264 #define RET_I16(n)			(*inst++ = (RET_i16), *inst++ = n, *inst++ = 0)
265 /* r32, r/m32 */
266 #define MOV_RM(mod, reg, rm)		(*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm))
267 
268 /* Multithreading does not affect these static variables, since they store
269    built-in CPU features. Therefore they can be overwritten by different threads
270    if they detect the CPU features in the same time. */
271 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
272 static sljit_s32 cpu_has_sse2 = -1;
273 #endif
274 static sljit_s32 cpu_has_cmov = -1;
275 
276 #ifdef _WIN32_WCE
277 #include <cmnintrin.h>
278 #elif defined(_MSC_VER) && _MSC_VER >= 1400
279 #include <intrin.h>
280 #endif
281 
282 /******************************************************/
283 /*    Unaligned-store functions                       */
284 /******************************************************/
285 
sljit_unaligned_store_s16(void * addr,sljit_s16 value)286 static SLJIT_INLINE void sljit_unaligned_store_s16(void *addr, sljit_s16 value)
287 {
288 	SLJIT_MEMCPY(addr, &value, sizeof(value));
289 }
290 
sljit_unaligned_store_s32(void * addr,sljit_s32 value)291 static SLJIT_INLINE void sljit_unaligned_store_s32(void *addr, sljit_s32 value)
292 {
293 	SLJIT_MEMCPY(addr, &value, sizeof(value));
294 }
295 
sljit_unaligned_store_sw(void * addr,sljit_sw value)296 static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value)
297 {
298 	SLJIT_MEMCPY(addr, &value, sizeof(value));
299 }
300 
301 /******************************************************/
302 /*    Utility functions                               */
303 /******************************************************/
304 
get_cpu_features(void)305 static void get_cpu_features(void)
306 {
307 	sljit_u32 features;
308 
309 #if defined(_MSC_VER) && _MSC_VER >= 1400
310 
311 	int CPUInfo[4];
312 	__cpuid(CPUInfo, 1);
313 	features = (sljit_u32)CPUInfo[3];
314 
315 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
316 
317 	/* AT&T syntax. */
318 	__asm__ (
319 		"movl $0x1, %%eax\n"
320 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
321 		/* On x86-32, there is no red zone, so this
322 		   should work (no need for a local variable). */
323 		"push %%ebx\n"
324 #endif
325 		"cpuid\n"
326 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
327 		"pop %%ebx\n"
328 #endif
329 		"movl %%edx, %0\n"
330 		: "=g" (features)
331 		:
332 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
333 		: "%eax", "%ecx", "%edx"
334 #else
335 		: "%rax", "%rbx", "%rcx", "%rdx"
336 #endif
337 	);
338 
339 #else /* _MSC_VER && _MSC_VER >= 1400 */
340 
341 	/* Intel syntax. */
342 	__asm {
343 		mov eax, 1
344 		cpuid
345 		mov features, edx
346 	}
347 
348 #endif /* _MSC_VER && _MSC_VER >= 1400 */
349 
350 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
351 	cpu_has_sse2 = (features >> 26) & 0x1;
352 #endif
353 	cpu_has_cmov = (features >> 15) & 0x1;
354 }
355 
get_jump_code(sljit_s32 type)356 static sljit_u8 get_jump_code(sljit_s32 type)
357 {
358 	switch (type) {
359 	case SLJIT_EQUAL:
360 	case SLJIT_EQUAL_F64:
361 		return 0x84 /* je */;
362 
363 	case SLJIT_NOT_EQUAL:
364 	case SLJIT_NOT_EQUAL_F64:
365 		return 0x85 /* jne */;
366 
367 	case SLJIT_LESS:
368 	case SLJIT_LESS_F64:
369 		return 0x82 /* jc */;
370 
371 	case SLJIT_GREATER_EQUAL:
372 	case SLJIT_GREATER_EQUAL_F64:
373 		return 0x83 /* jae */;
374 
375 	case SLJIT_GREATER:
376 	case SLJIT_GREATER_F64:
377 		return 0x87 /* jnbe */;
378 
379 	case SLJIT_LESS_EQUAL:
380 	case SLJIT_LESS_EQUAL_F64:
381 		return 0x86 /* jbe */;
382 
383 	case SLJIT_SIG_LESS:
384 		return 0x8c /* jl */;
385 
386 	case SLJIT_SIG_GREATER_EQUAL:
387 		return 0x8d /* jnl */;
388 
389 	case SLJIT_SIG_GREATER:
390 		return 0x8f /* jnle */;
391 
392 	case SLJIT_SIG_LESS_EQUAL:
393 		return 0x8e /* jle */;
394 
395 	case SLJIT_OVERFLOW:
396 	case SLJIT_MUL_OVERFLOW:
397 		return 0x80 /* jo */;
398 
399 	case SLJIT_NOT_OVERFLOW:
400 	case SLJIT_MUL_NOT_OVERFLOW:
401 		return 0x81 /* jno */;
402 
403 	case SLJIT_UNORDERED_F64:
404 		return 0x8a /* jp */;
405 
406 	case SLJIT_ORDERED_F64:
407 		return 0x8b /* jpo */;
408 	}
409 	return 0;
410 }
411 
412 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type);
413 
414 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
415 static sljit_u8* generate_fixed_jump(sljit_u8 *code_ptr, sljit_sw addr, sljit_s32 type);
416 #endif
417 
generate_near_jump_code(struct sljit_jump * jump,sljit_u8 * code_ptr,sljit_u8 * code,sljit_s32 type)418 static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_s32 type)
419 {
420 	sljit_s32 short_jump;
421 	sljit_uw label_addr;
422 
423 	if (jump->flags & JUMP_LABEL)
424 		label_addr = (sljit_uw)(code + jump->u.label->size);
425 	else
426 		label_addr = jump->u.target;
427 	short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
428 
429 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
430 	if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
431 		return generate_far_jump_code(jump, code_ptr, type);
432 #endif
433 
434 	if (type == SLJIT_JUMP) {
435 		if (short_jump)
436 			*code_ptr++ = JMP_i8;
437 		else
438 			*code_ptr++ = JMP_i32;
439 		jump->addr++;
440 	}
441 	else if (type >= SLJIT_FAST_CALL) {
442 		short_jump = 0;
443 		*code_ptr++ = CALL_i32;
444 		jump->addr++;
445 	}
446 	else if (short_jump) {
447 		*code_ptr++ = get_jump_code(type) - 0x10;
448 		jump->addr++;
449 	}
450 	else {
451 		*code_ptr++ = GROUP_0F;
452 		*code_ptr++ = get_jump_code(type);
453 		jump->addr += 2;
454 	}
455 
456 	if (short_jump) {
457 		jump->flags |= PATCH_MB;
458 		code_ptr += sizeof(sljit_s8);
459 	} else {
460 		jump->flags |= PATCH_MW;
461 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
462 		code_ptr += sizeof(sljit_sw);
463 #else
464 		code_ptr += sizeof(sljit_s32);
465 #endif
466 	}
467 
468 	return code_ptr;
469 }
470 
sljit_generate_code(struct sljit_compiler * compiler)471 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
472 {
473 	struct sljit_memory_fragment *buf;
474 	sljit_u8 *code;
475 	sljit_u8 *code_ptr;
476 	sljit_u8 *buf_ptr;
477 	sljit_u8 *buf_end;
478 	sljit_u8 len;
479 
480 	struct sljit_label *label;
481 	struct sljit_jump *jump;
482 	struct sljit_const *const_;
483 
484 	CHECK_ERROR_PTR();
485 	CHECK_PTR(check_sljit_generate_code(compiler));
486 	reverse_buf(compiler);
487 
488 	/* Second code generation pass. */
489 	code = (sljit_u8*)SLJIT_MALLOC_EXEC(compiler->size);
490 	PTR_FAIL_WITH_EXEC_IF(code);
491 	buf = compiler->buf;
492 
493 	code_ptr = code;
494 	label = compiler->labels;
495 	jump = compiler->jumps;
496 	const_ = compiler->consts;
497 	do {
498 		buf_ptr = buf->memory;
499 		buf_end = buf_ptr + buf->used_size;
500 		do {
501 			len = *buf_ptr++;
502 			if (len > 0) {
503 				/* The code is already generated. */
504 				SLJIT_MEMCPY(code_ptr, buf_ptr, len);
505 				code_ptr += len;
506 				buf_ptr += len;
507 			}
508 			else {
509 				if (*buf_ptr >= 4) {
510 					jump->addr = (sljit_uw)code_ptr;
511 					if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
512 						code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4);
513 					else
514 						code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4);
515 					jump = jump->next;
516 				}
517 				else if (*buf_ptr == 0) {
518 					label->addr = (sljit_uw)code_ptr;
519 					label->size = code_ptr - code;
520 					label = label->next;
521 				}
522 				else if (*buf_ptr == 1) {
523 					const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
524 					const_ = const_->next;
525 				}
526 				else {
527 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
528 					*code_ptr++ = (*buf_ptr == 2) ? CALL_i32 : JMP_i32;
529 					buf_ptr++;
530 					sljit_unaligned_store_sw(code_ptr, *(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw)));
531 					code_ptr += sizeof(sljit_sw);
532 					buf_ptr += sizeof(sljit_sw) - 1;
533 #else
534 					code_ptr = generate_fixed_jump(code_ptr, *(sljit_sw*)(buf_ptr + 1), *buf_ptr);
535 					buf_ptr += sizeof(sljit_sw);
536 #endif
537 				}
538 				buf_ptr++;
539 			}
540 		} while (buf_ptr < buf_end);
541 		SLJIT_ASSERT(buf_ptr == buf_end);
542 		buf = buf->next;
543 	} while (buf);
544 
545 	SLJIT_ASSERT(!label);
546 	SLJIT_ASSERT(!jump);
547 	SLJIT_ASSERT(!const_);
548 
549 	jump = compiler->jumps;
550 	while (jump) {
551 		if (jump->flags & PATCH_MB) {
552 			SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s8))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s8))) <= 127);
553 			*(sljit_u8*)jump->addr = (sljit_u8)(jump->u.label->addr - (jump->addr + sizeof(sljit_s8)));
554 		} else if (jump->flags & PATCH_MW) {
555 			if (jump->flags & JUMP_LABEL) {
556 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
557 				sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sw))));
558 #else
559 				SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
560 				sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32))));
561 #endif
562 			}
563 			else {
564 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
565 				sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_sw))));
566 #else
567 				SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
568 				sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.target - (jump->addr + sizeof(sljit_s32))));
569 #endif
570 			}
571 		}
572 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
573 		else if (jump->flags & PATCH_MD)
574 			sljit_unaligned_store_sw((void*)jump->addr, jump->u.label->addr);
575 #endif
576 
577 		jump = jump->next;
578 	}
579 
580 	/* Maybe we waste some space because of short jumps. */
581 	SLJIT_ASSERT(code_ptr <= code + compiler->size);
582 	compiler->error = SLJIT_ERR_COMPILED;
583 	compiler->executable_size = code_ptr - code;
584 	return (void*)code;
585 }
586 
587 /* --------------------------------------------------------------------- */
588 /*  Operators                                                            */
589 /* --------------------------------------------------------------------- */
590 
591 static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
592 	sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm,
593 	sljit_s32 dst, sljit_sw dstw,
594 	sljit_s32 src1, sljit_sw src1w,
595 	sljit_s32 src2, sljit_sw src2w);
596 
597 static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
598 	sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm,
599 	sljit_s32 dst, sljit_sw dstw,
600 	sljit_s32 src1, sljit_sw src1w,
601 	sljit_s32 src2, sljit_sw src2w);
602 
603 static sljit_s32 emit_mov(struct sljit_compiler *compiler,
604 	sljit_s32 dst, sljit_sw dstw,
605 	sljit_s32 src, sljit_sw srcw);
606 
emit_save_flags(struct sljit_compiler * compiler)607 static SLJIT_INLINE sljit_s32 emit_save_flags(struct sljit_compiler *compiler)
608 {
609 	sljit_u8 *inst;
610 
611 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
612 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
613 	FAIL_IF(!inst);
614 	INC_SIZE(5);
615 #else
616 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 6);
617 	FAIL_IF(!inst);
618 	INC_SIZE(6);
619 	*inst++ = REX_W;
620 #endif
621 	*inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp + sizeof(sljit_sw)] */
622 	*inst++ = 0x64;
623 	*inst++ = 0x24;
624 	*inst++ = (sljit_u8)sizeof(sljit_sw);
625 	*inst++ = PUSHF;
626 	compiler->flags_saved = 1;
627 	return SLJIT_SUCCESS;
628 }
629 
emit_restore_flags(struct sljit_compiler * compiler,sljit_s32 keep_flags)630 static SLJIT_INLINE sljit_s32 emit_restore_flags(struct sljit_compiler *compiler, sljit_s32 keep_flags)
631 {
632 	sljit_u8 *inst;
633 
634 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
635 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
636 	FAIL_IF(!inst);
637 	INC_SIZE(5);
638 	*inst++ = POPF;
639 #else
640 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 6);
641 	FAIL_IF(!inst);
642 	INC_SIZE(6);
643 	*inst++ = POPF;
644 	*inst++ = REX_W;
645 #endif
646 	*inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp - sizeof(sljit_sw)] */
647 	*inst++ = 0x64;
648 	*inst++ = 0x24;
649 	*inst++ = (sljit_u8)(-(sljit_s8)sizeof(sljit_sw));
650 	compiler->flags_saved = keep_flags;
651 	return SLJIT_SUCCESS;
652 }
653 
654 #ifdef _WIN32
655 #include <malloc.h>
656 
sljit_grow_stack(sljit_sw local_size)657 static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size)
658 {
659 	/* Workaround for calling the internal _chkstk() function on Windows.
660 	This function touches all 4k pages belongs to the requested stack space,
661 	which size is passed in local_size. This is necessary on Windows where
662 	the stack can only grow in 4k steps. However, this function just burn
663 	CPU cycles if the stack is large enough. However, you don't know it in
664 	advance, so it must always be called. I think this is a bad design in
665 	general even if it has some reasons. */
666 	*(volatile sljit_s32*)alloca(local_size) = 0;
667 }
668 
669 #endif
670 
671 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
672 #include "sljitNativeX86_32.c"
673 #else
674 #include "sljitNativeX86_64.c"
675 #endif
676 
emit_mov(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)677 static sljit_s32 emit_mov(struct sljit_compiler *compiler,
678 	sljit_s32 dst, sljit_sw dstw,
679 	sljit_s32 src, sljit_sw srcw)
680 {
681 	sljit_u8* inst;
682 
683 	if (dst == SLJIT_UNUSED) {
684 		/* No destination, doesn't need to setup flags. */
685 		if (src & SLJIT_MEM) {
686 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
687 			FAIL_IF(!inst);
688 			*inst = MOV_r_rm;
689 		}
690 		return SLJIT_SUCCESS;
691 	}
692 	if (FAST_IS_REG(src)) {
693 		inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
694 		FAIL_IF(!inst);
695 		*inst = MOV_rm_r;
696 		return SLJIT_SUCCESS;
697 	}
698 	if (src & SLJIT_IMM) {
699 		if (FAST_IS_REG(dst)) {
700 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
701 			return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
702 #else
703 			if (!compiler->mode32) {
704 				if (NOT_HALFWORD(srcw))
705 					return emit_load_imm64(compiler, dst, srcw);
706 			}
707 			else
708 				return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw);
709 #endif
710 		}
711 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
712 		if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
713 			FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw));
714 			inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw);
715 			FAIL_IF(!inst);
716 			*inst = MOV_rm_r;
717 			return SLJIT_SUCCESS;
718 		}
719 #endif
720 		inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
721 		FAIL_IF(!inst);
722 		*inst = MOV_rm_i32;
723 		return SLJIT_SUCCESS;
724 	}
725 	if (FAST_IS_REG(dst)) {
726 		inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
727 		FAIL_IF(!inst);
728 		*inst = MOV_r_rm;
729 		return SLJIT_SUCCESS;
730 	}
731 
732 	/* Memory to memory move. Requires two instruction. */
733 	inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
734 	FAIL_IF(!inst);
735 	*inst = MOV_r_rm;
736 	inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
737 	FAIL_IF(!inst);
738 	*inst = MOV_rm_r;
739 	return SLJIT_SUCCESS;
740 }
741 
742 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
743 	FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
744 
sljit_emit_op0(struct sljit_compiler * compiler,sljit_s32 op)745 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
746 {
747 	sljit_u8 *inst;
748 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
749 	sljit_s32 size;
750 #endif
751 
752 	CHECK_ERROR();
753 	CHECK(check_sljit_emit_op0(compiler, op));
754 
755 	switch (GET_OPCODE(op)) {
756 	case SLJIT_BREAKPOINT:
757 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
758 		FAIL_IF(!inst);
759 		INC_SIZE(1);
760 		*inst = INT3;
761 		break;
762 	case SLJIT_NOP:
763 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
764 		FAIL_IF(!inst);
765 		INC_SIZE(1);
766 		*inst = NOP;
767 		break;
768 	case SLJIT_LMUL_UW:
769 	case SLJIT_LMUL_SW:
770 	case SLJIT_DIVMOD_UW:
771 	case SLJIT_DIVMOD_SW:
772 	case SLJIT_DIV_UW:
773 	case SLJIT_DIV_SW:
774 		compiler->flags_saved = 0;
775 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
776 #ifdef _WIN64
777 		SLJIT_COMPILE_ASSERT(
778 			reg_map[SLJIT_R0] == 0
779 			&& reg_map[SLJIT_R1] == 2
780 			&& reg_map[TMP_REG1] > 7,
781 			invalid_register_assignment_for_div_mul);
782 #else
783 		SLJIT_COMPILE_ASSERT(
784 			reg_map[SLJIT_R0] == 0
785 			&& reg_map[SLJIT_R1] < 7
786 			&& reg_map[TMP_REG1] == 2,
787 			invalid_register_assignment_for_div_mul);
788 #endif
789 		compiler->mode32 = op & SLJIT_I32_OP;
790 #endif
791 		SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
792 
793 		op = GET_OPCODE(op);
794 		if ((op | 0x2) == SLJIT_DIV_UW) {
795 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
796 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
797 			inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);
798 #else
799 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
800 #endif
801 			FAIL_IF(!inst);
802 			*inst = XOR_r_rm;
803 		}
804 
805 		if ((op | 0x2) == SLJIT_DIV_SW) {
806 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
807 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
808 #endif
809 
810 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
811 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
812 			FAIL_IF(!inst);
813 			INC_SIZE(1);
814 			*inst = CDQ;
815 #else
816 			if (compiler->mode32) {
817 				inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
818 				FAIL_IF(!inst);
819 				INC_SIZE(1);
820 				*inst = CDQ;
821 			} else {
822 				inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
823 				FAIL_IF(!inst);
824 				INC_SIZE(2);
825 				*inst++ = REX_W;
826 				*inst = CDQ;
827 			}
828 #endif
829 		}
830 
831 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
832 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
833 		FAIL_IF(!inst);
834 		INC_SIZE(2);
835 		*inst++ = GROUP_F7;
836 		*inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
837 #else
838 #ifdef _WIN64
839 		size = (!compiler->mode32 || op >= SLJIT_DIVMOD_UW) ? 3 : 2;
840 #else
841 		size = (!compiler->mode32) ? 3 : 2;
842 #endif
843 		inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
844 		FAIL_IF(!inst);
845 		INC_SIZE(size);
846 #ifdef _WIN64
847 		if (!compiler->mode32)
848 			*inst++ = REX_W | ((op >= SLJIT_DIVMOD_UW) ? REX_B : 0);
849 		else if (op >= SLJIT_DIVMOD_UW)
850 			*inst++ = REX_B;
851 		*inst++ = GROUP_F7;
852 		*inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
853 #else
854 		if (!compiler->mode32)
855 			*inst++ = REX_W;
856 		*inst++ = GROUP_F7;
857 		*inst = MOD_REG | reg_map[SLJIT_R1];
858 #endif
859 #endif
860 		switch (op) {
861 		case SLJIT_LMUL_UW:
862 			*inst |= MUL;
863 			break;
864 		case SLJIT_LMUL_SW:
865 			*inst |= IMUL;
866 			break;
867 		case SLJIT_DIVMOD_UW:
868 		case SLJIT_DIV_UW:
869 			*inst |= DIV;
870 			break;
871 		case SLJIT_DIVMOD_SW:
872 		case SLJIT_DIV_SW:
873 			*inst |= IDIV;
874 			break;
875 		}
876 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
877 		if (op <= SLJIT_DIVMOD_SW)
878 			EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
879 #else
880 		if (op >= SLJIT_DIV_UW)
881 			EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
882 #endif
883 		break;
884 	}
885 
886 	return SLJIT_SUCCESS;
887 }
888 
889 #define ENCODE_PREFIX(prefix) \
890 	do { \
891 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); \
892 		FAIL_IF(!inst); \
893 		INC_SIZE(1); \
894 		*inst = (prefix); \
895 	} while (0)
896 
emit_mov_byte(struct sljit_compiler * compiler,sljit_s32 sign,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)897 static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign,
898 	sljit_s32 dst, sljit_sw dstw,
899 	sljit_s32 src, sljit_sw srcw)
900 {
901 	sljit_u8* inst;
902 	sljit_s32 dst_r;
903 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
904 	sljit_s32 work_r;
905 #endif
906 
907 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
908 	compiler->mode32 = 0;
909 #endif
910 
911 	if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
912 		return SLJIT_SUCCESS; /* Empty instruction. */
913 
914 	if (src & SLJIT_IMM) {
915 		if (FAST_IS_REG(dst)) {
916 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
917 			return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
918 #else
919 			inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
920 			FAIL_IF(!inst);
921 			*inst = MOV_rm_i32;
922 			return SLJIT_SUCCESS;
923 #endif
924 		}
925 		inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
926 		FAIL_IF(!inst);
927 		*inst = MOV_rm8_i8;
928 		return SLJIT_SUCCESS;
929 	}
930 
931 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
932 
933 	if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {
934 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
935 		if (reg_map[src] >= 4) {
936 			SLJIT_ASSERT(dst_r == TMP_REG1);
937 			EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
938 		} else
939 			dst_r = src;
940 #else
941 		dst_r = src;
942 #endif
943 	}
944 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
945 	else if (FAST_IS_REG(src) && reg_map[src] >= 4) {
946 		/* src, dst are registers. */
947 		SLJIT_ASSERT(SLOW_IS_REG(dst));
948 		if (reg_map[dst] < 4) {
949 			if (dst != src)
950 				EMIT_MOV(compiler, dst, 0, src, 0);
951 			inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
952 			FAIL_IF(!inst);
953 			*inst++ = GROUP_0F;
954 			*inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
955 		}
956 		else {
957 			if (dst != src)
958 				EMIT_MOV(compiler, dst, 0, src, 0);
959 			if (sign) {
960 				/* shl reg, 24 */
961 				inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
962 				FAIL_IF(!inst);
963 				*inst |= SHL;
964 				/* sar reg, 24 */
965 				inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
966 				FAIL_IF(!inst);
967 				*inst |= SAR;
968 			}
969 			else {
970 				inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
971 				FAIL_IF(!inst);
972 				*(inst + 1) |= AND;
973 			}
974 		}
975 		return SLJIT_SUCCESS;
976 	}
977 #endif
978 	else {
979 		/* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
980 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
981 		FAIL_IF(!inst);
982 		*inst++ = GROUP_0F;
983 		*inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
984 	}
985 
986 	if (dst & SLJIT_MEM) {
987 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
988 		if (dst_r == TMP_REG1) {
989 			/* Find a non-used register, whose reg_map[src] < 4. */
990 			if ((dst & REG_MASK) == SLJIT_R0) {
991 				if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1))
992 					work_r = SLJIT_R2;
993 				else
994 					work_r = SLJIT_R1;
995 			}
996 			else {
997 				if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
998 					work_r = SLJIT_R0;
999 				else if ((dst & REG_MASK) == SLJIT_R1)
1000 					work_r = SLJIT_R2;
1001 				else
1002 					work_r = SLJIT_R1;
1003 			}
1004 
1005 			if (work_r == SLJIT_R0) {
1006 				ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
1007 			}
1008 			else {
1009 				inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
1010 				FAIL_IF(!inst);
1011 				*inst = XCHG_r_rm;
1012 			}
1013 
1014 			inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
1015 			FAIL_IF(!inst);
1016 			*inst = MOV_rm8_r8;
1017 
1018 			if (work_r == SLJIT_R0) {
1019 				ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
1020 			}
1021 			else {
1022 				inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
1023 				FAIL_IF(!inst);
1024 				*inst = XCHG_r_rm;
1025 			}
1026 		}
1027 		else {
1028 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1029 			FAIL_IF(!inst);
1030 			*inst = MOV_rm8_r8;
1031 		}
1032 #else
1033 		inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
1034 		FAIL_IF(!inst);
1035 		*inst = MOV_rm8_r8;
1036 #endif
1037 	}
1038 
1039 	return SLJIT_SUCCESS;
1040 }
1041 
emit_mov_half(struct sljit_compiler * compiler,sljit_s32 sign,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1042 static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign,
1043 	sljit_s32 dst, sljit_sw dstw,
1044 	sljit_s32 src, sljit_sw srcw)
1045 {
1046 	sljit_u8* inst;
1047 	sljit_s32 dst_r;
1048 
1049 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1050 	compiler->mode32 = 0;
1051 #endif
1052 
1053 	if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
1054 		return SLJIT_SUCCESS; /* Empty instruction. */
1055 
1056 	if (src & SLJIT_IMM) {
1057 		if (FAST_IS_REG(dst)) {
1058 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1059 			return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
1060 #else
1061 			inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1062 			FAIL_IF(!inst);
1063 			*inst = MOV_rm_i32;
1064 			return SLJIT_SUCCESS;
1065 #endif
1066 		}
1067 		inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
1068 		FAIL_IF(!inst);
1069 		*inst = MOV_rm_i32;
1070 		return SLJIT_SUCCESS;
1071 	}
1072 
1073 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1074 
1075 	if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
1076 		dst_r = src;
1077 	else {
1078 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
1079 		FAIL_IF(!inst);
1080 		*inst++ = GROUP_0F;
1081 		*inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16;
1082 	}
1083 
1084 	if (dst & SLJIT_MEM) {
1085 		inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
1086 		FAIL_IF(!inst);
1087 		*inst = MOV_rm_r;
1088 	}
1089 
1090 	return SLJIT_SUCCESS;
1091 }
1092 
emit_unary(struct sljit_compiler * compiler,sljit_u8 opcode,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1093 static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode,
1094 	sljit_s32 dst, sljit_sw dstw,
1095 	sljit_s32 src, sljit_sw srcw)
1096 {
1097 	sljit_u8* inst;
1098 
1099 	if (dst == SLJIT_UNUSED) {
1100 		EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1101 		inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1102 		FAIL_IF(!inst);
1103 		*inst++ = GROUP_F7;
1104 		*inst |= opcode;
1105 		return SLJIT_SUCCESS;
1106 	}
1107 	if (dst == src && dstw == srcw) {
1108 		/* Same input and output */
1109 		inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1110 		FAIL_IF(!inst);
1111 		*inst++ = GROUP_F7;
1112 		*inst |= opcode;
1113 		return SLJIT_SUCCESS;
1114 	}
1115 	if (FAST_IS_REG(dst)) {
1116 		EMIT_MOV(compiler, dst, 0, src, srcw);
1117 		inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1118 		FAIL_IF(!inst);
1119 		*inst++ = GROUP_F7;
1120 		*inst |= opcode;
1121 		return SLJIT_SUCCESS;
1122 	}
1123 	EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1124 	inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1125 	FAIL_IF(!inst);
1126 	*inst++ = GROUP_F7;
1127 	*inst |= opcode;
1128 	EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1129 	return SLJIT_SUCCESS;
1130 }
1131 
emit_not_with_flags(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1132 static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler,
1133 	sljit_s32 dst, sljit_sw dstw,
1134 	sljit_s32 src, sljit_sw srcw)
1135 {
1136 	sljit_u8* inst;
1137 
1138 	if (dst == SLJIT_UNUSED) {
1139 		EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1140 		inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1141 		FAIL_IF(!inst);
1142 		*inst++ = GROUP_F7;
1143 		*inst |= NOT_rm;
1144 		inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1145 		FAIL_IF(!inst);
1146 		*inst = OR_r_rm;
1147 		return SLJIT_SUCCESS;
1148 	}
1149 	if (FAST_IS_REG(dst)) {
1150 		EMIT_MOV(compiler, dst, 0, src, srcw);
1151 		inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1152 		FAIL_IF(!inst);
1153 		*inst++ = GROUP_F7;
1154 		*inst |= NOT_rm;
1155 		inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
1156 		FAIL_IF(!inst);
1157 		*inst = OR_r_rm;
1158 		return SLJIT_SUCCESS;
1159 	}
1160 	EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1161 	inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1162 	FAIL_IF(!inst);
1163 	*inst++ = GROUP_F7;
1164 	*inst |= NOT_rm;
1165 	inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1166 	FAIL_IF(!inst);
1167 	*inst = OR_r_rm;
1168 	EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1169 	return SLJIT_SUCCESS;
1170 }
1171 
emit_clz(struct sljit_compiler * compiler,sljit_s32 op_flags,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1172 static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags,
1173 	sljit_s32 dst, sljit_sw dstw,
1174 	sljit_s32 src, sljit_sw srcw)
1175 {
1176 	sljit_u8* inst;
1177 	sljit_s32 dst_r;
1178 
1179 	SLJIT_UNUSED_ARG(op_flags);
1180 	if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
1181 		/* Just set the zero flag. */
1182 		EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1183 		inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1184 		FAIL_IF(!inst);
1185 		*inst++ = GROUP_F7;
1186 		*inst |= NOT_rm;
1187 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1188 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REG1, 0);
1189 #else
1190 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, TMP_REG1, 0);
1191 #endif
1192 		FAIL_IF(!inst);
1193 		*inst |= SHR;
1194 		return SLJIT_SUCCESS;
1195 	}
1196 
1197 	if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
1198 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
1199 		src = TMP_REG1;
1200 		srcw = 0;
1201 	}
1202 
1203 	inst = emit_x86_instruction(compiler, 2, TMP_REG1, 0, src, srcw);
1204 	FAIL_IF(!inst);
1205 	*inst++ = GROUP_0F;
1206 	*inst = BSR_r_rm;
1207 
1208 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1209 	if (FAST_IS_REG(dst))
1210 		dst_r = dst;
1211 	else {
1212 		/* Find an unused temporary register. */
1213 		if ((dst & REG_MASK) != SLJIT_R0 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
1214 			dst_r = SLJIT_R0;
1215 		else if ((dst & REG_MASK) != SLJIT_R1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R1))
1216 			dst_r = SLJIT_R1;
1217 		else
1218 			dst_r = SLJIT_R2;
1219 		EMIT_MOV(compiler, dst, dstw, dst_r, 0);
1220 	}
1221 	EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
1222 #else
1223 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
1224 	compiler->mode32 = 0;
1225 	EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 64 + 63 : 32 + 31);
1226 	compiler->mode32 = op_flags & SLJIT_I32_OP;
1227 #endif
1228 
1229 	if (cpu_has_cmov == -1)
1230 		get_cpu_features();
1231 
1232 	if (cpu_has_cmov) {
1233 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
1234 		FAIL_IF(!inst);
1235 		*inst++ = GROUP_0F;
1236 		*inst = CMOVNE_r_rm;
1237 	} else {
1238 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1239 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1240 		FAIL_IF(!inst);
1241 		INC_SIZE(4);
1242 
1243 		*inst++ = JE_i8;
1244 		*inst++ = 2;
1245 		*inst++ = MOV_r_rm;
1246 		*inst++ = MOD_REG | (reg_map[dst_r] << 3) | reg_map[TMP_REG1];
1247 #else
1248 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
1249 		FAIL_IF(!inst);
1250 		INC_SIZE(5);
1251 
1252 		*inst++ = JE_i8;
1253 		*inst++ = 3;
1254 		*inst++ = REX_W | (reg_map[dst_r] >= 8 ? REX_R : 0) | (reg_map[TMP_REG1] >= 8 ? REX_B : 0);
1255 		*inst++ = MOV_r_rm;
1256 		*inst++ = MOD_REG | (reg_lmap[dst_r] << 3) | reg_lmap[TMP_REG1];
1257 #endif
1258 	}
1259 
1260 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1261 	inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
1262 #else
1263 	inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, dst_r, 0);
1264 #endif
1265 	FAIL_IF(!inst);
1266 	*(inst + 1) |= XOR;
1267 
1268 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1269 	if (dst & SLJIT_MEM) {
1270 		inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1271 		FAIL_IF(!inst);
1272 		*inst = XCHG_r_rm;
1273 	}
1274 #else
1275 	if (dst & SLJIT_MEM)
1276 		EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0);
1277 #endif
1278 	return SLJIT_SUCCESS;
1279 }
1280 
sljit_emit_op1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1281 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1282 	sljit_s32 dst, sljit_sw dstw,
1283 	sljit_s32 src, sljit_sw srcw)
1284 {
1285 	sljit_u8* inst;
1286 	sljit_s32 update = 0;
1287 	sljit_s32 op_flags = GET_ALL_FLAGS(op);
1288 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1289 	sljit_s32 dst_is_ereg = 0;
1290 	sljit_s32 src_is_ereg = 0;
1291 #else
1292 #	define src_is_ereg 0
1293 #endif
1294 
1295 	CHECK_ERROR();
1296 	CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1297 	ADJUST_LOCAL_OFFSET(dst, dstw);
1298 	ADJUST_LOCAL_OFFSET(src, srcw);
1299 
1300 	CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
1301 	CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);
1302 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1303 	compiler->mode32 = op_flags & SLJIT_I32_OP;
1304 #endif
1305 
1306 	op = GET_OPCODE(op);
1307 	if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) {
1308 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1309 		compiler->mode32 = 0;
1310 #endif
1311 
1312 		if (op_flags & SLJIT_I32_OP) {
1313 			if (FAST_IS_REG(src) && src == dst) {
1314 				if (!TYPE_CAST_NEEDED(op))
1315 					return SLJIT_SUCCESS;
1316 			}
1317 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1318 			if (op == SLJIT_MOV_S32 && (src & SLJIT_MEM))
1319 				op = SLJIT_MOV_U32;
1320 			if (op == SLJIT_MOVU_S32 && (src & SLJIT_MEM))
1321 				op = SLJIT_MOVU_U32;
1322 			if (op == SLJIT_MOV_U32 && (src & SLJIT_IMM))
1323 				op = SLJIT_MOV_S32;
1324 			if (op == SLJIT_MOVU_U32 && (src & SLJIT_IMM))
1325 				op = SLJIT_MOVU_S32;
1326 #endif
1327 		}
1328 
1329 		SLJIT_COMPILE_ASSERT(SLJIT_MOV + 8 == SLJIT_MOVU, movu_offset);
1330 		if (op >= SLJIT_MOVU) {
1331 			update = 1;
1332 			op -= 8;
1333 		}
1334 
1335 		if (src & SLJIT_IMM) {
1336 			switch (op) {
1337 			case SLJIT_MOV_U8:
1338 				srcw = (sljit_u8)srcw;
1339 				break;
1340 			case SLJIT_MOV_S8:
1341 				srcw = (sljit_s8)srcw;
1342 				break;
1343 			case SLJIT_MOV_U16:
1344 				srcw = (sljit_u16)srcw;
1345 				break;
1346 			case SLJIT_MOV_S16:
1347 				srcw = (sljit_s16)srcw;
1348 				break;
1349 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1350 			case SLJIT_MOV_U32:
1351 				srcw = (sljit_u32)srcw;
1352 				break;
1353 			case SLJIT_MOV_S32:
1354 				srcw = (sljit_s32)srcw;
1355 				break;
1356 #endif
1357 			}
1358 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1359 			if (SLJIT_UNLIKELY(dst_is_ereg))
1360 				return emit_mov(compiler, dst, dstw, src, srcw);
1361 #endif
1362 		}
1363 
1364 		if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & REG_MASK) && (srcw != 0 || (src & OFFS_REG_MASK) != 0)) {
1365 			inst = emit_x86_instruction(compiler, 1, src & REG_MASK, 0, src, srcw);
1366 			FAIL_IF(!inst);
1367 			*inst = LEA_r_m;
1368 			src &= SLJIT_MEM | 0xf;
1369 			srcw = 0;
1370 		}
1371 
1372 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1373 		if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
1374 			SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
1375 			dst = TMP_REG1;
1376 		}
1377 #endif
1378 
1379 		switch (op) {
1380 		case SLJIT_MOV:
1381 		case SLJIT_MOV_P:
1382 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1383 		case SLJIT_MOV_U32:
1384 		case SLJIT_MOV_S32:
1385 #endif
1386 			FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
1387 			break;
1388 		case SLJIT_MOV_U8:
1389 			FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
1390 			break;
1391 		case SLJIT_MOV_S8:
1392 			FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
1393 			break;
1394 		case SLJIT_MOV_U16:
1395 			FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
1396 			break;
1397 		case SLJIT_MOV_S16:
1398 			FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
1399 			break;
1400 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1401 		case SLJIT_MOV_U32:
1402 			FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
1403 			break;
1404 		case SLJIT_MOV_S32:
1405 			FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
1406 			break;
1407 #endif
1408 		}
1409 
1410 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1411 		if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
1412 			return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
1413 #endif
1414 
1415 		if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK) && (dstw != 0 || (dst & OFFS_REG_MASK) != 0)) {
1416 			inst = emit_x86_instruction(compiler, 1, dst & REG_MASK, 0, dst, dstw);
1417 			FAIL_IF(!inst);
1418 			*inst = LEA_r_m;
1419 		}
1420 		return SLJIT_SUCCESS;
1421 	}
1422 
1423 	if (SLJIT_UNLIKELY(GET_FLAGS(op_flags)))
1424 		compiler->flags_saved = 0;
1425 
1426 	switch (op) {
1427 	case SLJIT_NOT:
1428 		if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_E))
1429 			return emit_not_with_flags(compiler, dst, dstw, src, srcw);
1430 		return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
1431 
1432 	case SLJIT_NEG:
1433 		if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1434 			FAIL_IF(emit_save_flags(compiler));
1435 		return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw);
1436 
1437 	case SLJIT_CLZ:
1438 		if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1439 			FAIL_IF(emit_save_flags(compiler));
1440 		return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
1441 	}
1442 
1443 	return SLJIT_SUCCESS;
1444 
1445 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1446 #	undef src_is_ereg
1447 #endif
1448 }
1449 
1450 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1451 
1452 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1453 	if (IS_HALFWORD(immw) || compiler->mode32) { \
1454 		inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1455 		FAIL_IF(!inst); \
1456 		*(inst + 1) |= (op_imm); \
1457 	} \
1458 	else { \
1459 		FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \
1460 		inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \
1461 		FAIL_IF(!inst); \
1462 		*inst = (op_mr); \
1463 	}
1464 
1465 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1466 	FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
1467 
1468 #else
1469 
1470 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1471 	inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1472 	FAIL_IF(!inst); \
1473 	*(inst + 1) |= (op_imm);
1474 
1475 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1476 	FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
1477 
1478 #endif
1479 
emit_cum_binary(struct sljit_compiler * compiler,sljit_u8 op_rm,sljit_u8 op_mr,sljit_u8 op_imm,sljit_u8 op_eax_imm,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1480 static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
1481 	sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm,
1482 	sljit_s32 dst, sljit_sw dstw,
1483 	sljit_s32 src1, sljit_sw src1w,
1484 	sljit_s32 src2, sljit_sw src2w)
1485 {
1486 	sljit_u8* inst;
1487 
1488 	if (dst == SLJIT_UNUSED) {
1489 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1490 		if (src2 & SLJIT_IMM) {
1491 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1492 		}
1493 		else {
1494 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1495 			FAIL_IF(!inst);
1496 			*inst = op_rm;
1497 		}
1498 		return SLJIT_SUCCESS;
1499 	}
1500 
1501 	if (dst == src1 && dstw == src1w) {
1502 		if (src2 & SLJIT_IMM) {
1503 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1504 			if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1505 #else
1506 			if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1507 #endif
1508 				BINARY_EAX_IMM(op_eax_imm, src2w);
1509 			}
1510 			else {
1511 				BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1512 			}
1513 		}
1514 		else if (FAST_IS_REG(dst)) {
1515 			inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1516 			FAIL_IF(!inst);
1517 			*inst = op_rm;
1518 		}
1519 		else if (FAST_IS_REG(src2)) {
1520 			/* Special exception for sljit_emit_op_flags. */
1521 			inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1522 			FAIL_IF(!inst);
1523 			*inst = op_mr;
1524 		}
1525 		else {
1526 			EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1527 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1528 			FAIL_IF(!inst);
1529 			*inst = op_mr;
1530 		}
1531 		return SLJIT_SUCCESS;
1532 	}
1533 
1534 	/* Only for cumulative operations. */
1535 	if (dst == src2 && dstw == src2w) {
1536 		if (src1 & SLJIT_IMM) {
1537 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1538 			if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1539 #else
1540 			if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) {
1541 #endif
1542 				BINARY_EAX_IMM(op_eax_imm, src1w);
1543 			}
1544 			else {
1545 				BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
1546 			}
1547 		}
1548 		else if (FAST_IS_REG(dst)) {
1549 			inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
1550 			FAIL_IF(!inst);
1551 			*inst = op_rm;
1552 		}
1553 		else if (FAST_IS_REG(src1)) {
1554 			inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
1555 			FAIL_IF(!inst);
1556 			*inst = op_mr;
1557 		}
1558 		else {
1559 			EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1560 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1561 			FAIL_IF(!inst);
1562 			*inst = op_mr;
1563 		}
1564 		return SLJIT_SUCCESS;
1565 	}
1566 
1567 	/* General version. */
1568 	if (FAST_IS_REG(dst)) {
1569 		EMIT_MOV(compiler, dst, 0, src1, src1w);
1570 		if (src2 & SLJIT_IMM) {
1571 			BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1572 		}
1573 		else {
1574 			inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1575 			FAIL_IF(!inst);
1576 			*inst = op_rm;
1577 		}
1578 	}
1579 	else {
1580 		/* This version requires less memory writing. */
1581 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1582 		if (src2 & SLJIT_IMM) {
1583 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1584 		}
1585 		else {
1586 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1587 			FAIL_IF(!inst);
1588 			*inst = op_rm;
1589 		}
1590 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1591 	}
1592 
1593 	return SLJIT_SUCCESS;
1594 }
1595 
1596 static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
1597 	sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm,
1598 	sljit_s32 dst, sljit_sw dstw,
1599 	sljit_s32 src1, sljit_sw src1w,
1600 	sljit_s32 src2, sljit_sw src2w)
1601 {
1602 	sljit_u8* inst;
1603 
1604 	if (dst == SLJIT_UNUSED) {
1605 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1606 		if (src2 & SLJIT_IMM) {
1607 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1608 		}
1609 		else {
1610 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1611 			FAIL_IF(!inst);
1612 			*inst = op_rm;
1613 		}
1614 		return SLJIT_SUCCESS;
1615 	}
1616 
1617 	if (dst == src1 && dstw == src1w) {
1618 		if (src2 & SLJIT_IMM) {
1619 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1620 			if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1621 #else
1622 			if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1623 #endif
1624 				BINARY_EAX_IMM(op_eax_imm, src2w);
1625 			}
1626 			else {
1627 				BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1628 			}
1629 		}
1630 		else if (FAST_IS_REG(dst)) {
1631 			inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1632 			FAIL_IF(!inst);
1633 			*inst = op_rm;
1634 		}
1635 		else if (FAST_IS_REG(src2)) {
1636 			inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1637 			FAIL_IF(!inst);
1638 			*inst = op_mr;
1639 		}
1640 		else {
1641 			EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1642 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1643 			FAIL_IF(!inst);
1644 			*inst = op_mr;
1645 		}
1646 		return SLJIT_SUCCESS;
1647 	}
1648 
1649 	/* General version. */
1650 	if (FAST_IS_REG(dst) && dst != src2) {
1651 		EMIT_MOV(compiler, dst, 0, src1, src1w);
1652 		if (src2 & SLJIT_IMM) {
1653 			BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1654 		}
1655 		else {
1656 			inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1657 			FAIL_IF(!inst);
1658 			*inst = op_rm;
1659 		}
1660 	}
1661 	else {
1662 		/* This version requires less memory writing. */
1663 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1664 		if (src2 & SLJIT_IMM) {
1665 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1666 		}
1667 		else {
1668 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1669 			FAIL_IF(!inst);
1670 			*inst = op_rm;
1671 		}
1672 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1673 	}
1674 
1675 	return SLJIT_SUCCESS;
1676 }
1677 
1678 static sljit_s32 emit_mul(struct sljit_compiler *compiler,
1679 	sljit_s32 dst, sljit_sw dstw,
1680 	sljit_s32 src1, sljit_sw src1w,
1681 	sljit_s32 src2, sljit_sw src2w)
1682 {
1683 	sljit_u8* inst;
1684 	sljit_s32 dst_r;
1685 
1686 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1687 
1688 	/* Register destination. */
1689 	if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
1690 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1691 		FAIL_IF(!inst);
1692 		*inst++ = GROUP_0F;
1693 		*inst = IMUL_r_rm;
1694 	}
1695 	else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
1696 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
1697 		FAIL_IF(!inst);
1698 		*inst++ = GROUP_0F;
1699 		*inst = IMUL_r_rm;
1700 	}
1701 	else if (src1 & SLJIT_IMM) {
1702 		if (src2 & SLJIT_IMM) {
1703 			EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
1704 			src2 = dst_r;
1705 			src2w = 0;
1706 		}
1707 
1708 		if (src1w <= 127 && src1w >= -128) {
1709 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1710 			FAIL_IF(!inst);
1711 			*inst = IMUL_r_rm_i8;
1712 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1713 			FAIL_IF(!inst);
1714 			INC_SIZE(1);
1715 			*inst = (sljit_s8)src1w;
1716 		}
1717 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1718 		else {
1719 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1720 			FAIL_IF(!inst);
1721 			*inst = IMUL_r_rm_i32;
1722 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1723 			FAIL_IF(!inst);
1724 			INC_SIZE(4);
1725 			sljit_unaligned_store_sw(inst, src1w);
1726 		}
1727 #else
1728 		else if (IS_HALFWORD(src1w)) {
1729 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1730 			FAIL_IF(!inst);
1731 			*inst = IMUL_r_rm_i32;
1732 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1733 			FAIL_IF(!inst);
1734 			INC_SIZE(4);
1735 			sljit_unaligned_store_s32(inst, (sljit_s32)src1w);
1736 		}
1737 		else {
1738 			EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1739 			if (dst_r != src2)
1740 				EMIT_MOV(compiler, dst_r, 0, src2, src2w);
1741 			inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1742 			FAIL_IF(!inst);
1743 			*inst++ = GROUP_0F;
1744 			*inst = IMUL_r_rm;
1745 		}
1746 #endif
1747 	}
1748 	else if (src2 & SLJIT_IMM) {
1749 		/* Note: src1 is NOT immediate. */
1750 
1751 		if (src2w <= 127 && src2w >= -128) {
1752 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1753 			FAIL_IF(!inst);
1754 			*inst = IMUL_r_rm_i8;
1755 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1756 			FAIL_IF(!inst);
1757 			INC_SIZE(1);
1758 			*inst = (sljit_s8)src2w;
1759 		}
1760 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1761 		else {
1762 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1763 			FAIL_IF(!inst);
1764 			*inst = IMUL_r_rm_i32;
1765 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1766 			FAIL_IF(!inst);
1767 			INC_SIZE(4);
1768 			sljit_unaligned_store_sw(inst, src2w);
1769 		}
1770 #else
1771 		else if (IS_HALFWORD(src2w)) {
1772 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1773 			FAIL_IF(!inst);
1774 			*inst = IMUL_r_rm_i32;
1775 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1776 			FAIL_IF(!inst);
1777 			INC_SIZE(4);
1778 			sljit_unaligned_store_s32(inst, (sljit_s32)src2w);
1779 		}
1780 		else {
1781 			EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src2w);
1782 			if (dst_r != src1)
1783 				EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1784 			inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1785 			FAIL_IF(!inst);
1786 			*inst++ = GROUP_0F;
1787 			*inst = IMUL_r_rm;
1788 		}
1789 #endif
1790 	}
1791 	else {
1792 		/* Neither argument is immediate. */
1793 		if (ADDRESSING_DEPENDS_ON(src2, dst_r))
1794 			dst_r = TMP_REG1;
1795 		EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1796 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1797 		FAIL_IF(!inst);
1798 		*inst++ = GROUP_0F;
1799 		*inst = IMUL_r_rm;
1800 	}
1801 
1802 	if (dst_r == TMP_REG1)
1803 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1804 
1805 	return SLJIT_SUCCESS;
1806 }
1807 
1808 static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler, sljit_s32 keep_flags,
1809 	sljit_s32 dst, sljit_sw dstw,
1810 	sljit_s32 src1, sljit_sw src1w,
1811 	sljit_s32 src2, sljit_sw src2w)
1812 {
1813 	sljit_u8* inst;
1814 	sljit_s32 dst_r, done = 0;
1815 
1816 	/* These cases better be left to handled by normal way. */
1817 	if (!keep_flags) {
1818 		if (dst == src1 && dstw == src1w)
1819 			return SLJIT_ERR_UNSUPPORTED;
1820 		if (dst == src2 && dstw == src2w)
1821 			return SLJIT_ERR_UNSUPPORTED;
1822 	}
1823 
1824 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1825 
1826 	if (FAST_IS_REG(src1)) {
1827 		if (FAST_IS_REG(src2)) {
1828 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
1829 			FAIL_IF(!inst);
1830 			*inst = LEA_r_m;
1831 			done = 1;
1832 		}
1833 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1834 		if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1835 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w);
1836 #else
1837 		if (src2 & SLJIT_IMM) {
1838 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
1839 #endif
1840 			FAIL_IF(!inst);
1841 			*inst = LEA_r_m;
1842 			done = 1;
1843 		}
1844 	}
1845 	else if (FAST_IS_REG(src2)) {
1846 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1847 		if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1848 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w);
1849 #else
1850 		if (src1 & SLJIT_IMM) {
1851 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
1852 #endif
1853 			FAIL_IF(!inst);
1854 			*inst = LEA_r_m;
1855 			done = 1;
1856 		}
1857 	}
1858 
1859 	if (done) {
1860 		if (dst_r == TMP_REG1)
1861 			return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
1862 		return SLJIT_SUCCESS;
1863 	}
1864 	return SLJIT_ERR_UNSUPPORTED;
1865 }
1866 
1867 static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,
1868 	sljit_s32 src1, sljit_sw src1w,
1869 	sljit_s32 src2, sljit_sw src2w)
1870 {
1871 	sljit_u8* inst;
1872 
1873 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1874 	if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1875 #else
1876 	if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1877 #endif
1878 		BINARY_EAX_IMM(CMP_EAX_i32, src2w);
1879 		return SLJIT_SUCCESS;
1880 	}
1881 
1882 	if (FAST_IS_REG(src1)) {
1883 		if (src2 & SLJIT_IMM) {
1884 			BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
1885 		}
1886 		else {
1887 			inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1888 			FAIL_IF(!inst);
1889 			*inst = CMP_r_rm;
1890 		}
1891 		return SLJIT_SUCCESS;
1892 	}
1893 
1894 	if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) {
1895 		inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1896 		FAIL_IF(!inst);
1897 		*inst = CMP_rm_r;
1898 		return SLJIT_SUCCESS;
1899 	}
1900 
1901 	if (src2 & SLJIT_IMM) {
1902 		if (src1 & SLJIT_IMM) {
1903 			EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1904 			src1 = TMP_REG1;
1905 			src1w = 0;
1906 		}
1907 		BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
1908 	}
1909 	else {
1910 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1911 		inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1912 		FAIL_IF(!inst);
1913 		*inst = CMP_r_rm;
1914 	}
1915 	return SLJIT_SUCCESS;
1916 }
1917 
1918 static sljit_s32 emit_test_binary(struct sljit_compiler *compiler,
1919 	sljit_s32 src1, sljit_sw src1w,
1920 	sljit_s32 src2, sljit_sw src2w)
1921 {
1922 	sljit_u8* inst;
1923 
1924 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1925 	if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1926 #else
1927 	if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1928 #endif
1929 		BINARY_EAX_IMM(TEST_EAX_i32, src2w);
1930 		return SLJIT_SUCCESS;
1931 	}
1932 
1933 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1934 	if (src2 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1935 #else
1936 	if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
1937 #endif
1938 		BINARY_EAX_IMM(TEST_EAX_i32, src1w);
1939 		return SLJIT_SUCCESS;
1940 	}
1941 
1942 	if (!(src1 & SLJIT_IMM)) {
1943 		if (src2 & SLJIT_IMM) {
1944 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1945 			if (IS_HALFWORD(src2w) || compiler->mode32) {
1946 				inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
1947 				FAIL_IF(!inst);
1948 				*inst = GROUP_F7;
1949 			}
1950 			else {
1951 				FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1952 				inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, src1w);
1953 				FAIL_IF(!inst);
1954 				*inst = TEST_rm_r;
1955 			}
1956 #else
1957 			inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
1958 			FAIL_IF(!inst);
1959 			*inst = GROUP_F7;
1960 #endif
1961 			return SLJIT_SUCCESS;
1962 		}
1963 		else if (FAST_IS_REG(src1)) {
1964 			inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1965 			FAIL_IF(!inst);
1966 			*inst = TEST_rm_r;
1967 			return SLJIT_SUCCESS;
1968 		}
1969 	}
1970 
1971 	if (!(src2 & SLJIT_IMM)) {
1972 		if (src1 & SLJIT_IMM) {
1973 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1974 			if (IS_HALFWORD(src1w) || compiler->mode32) {
1975 				inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w);
1976 				FAIL_IF(!inst);
1977 				*inst = GROUP_F7;
1978 			}
1979 			else {
1980 				FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
1981 				inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, src2w);
1982 				FAIL_IF(!inst);
1983 				*inst = TEST_rm_r;
1984 			}
1985 #else
1986 			inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w);
1987 			FAIL_IF(!inst);
1988 			*inst = GROUP_F7;
1989 #endif
1990 			return SLJIT_SUCCESS;
1991 		}
1992 		else if (FAST_IS_REG(src2)) {
1993 			inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1994 			FAIL_IF(!inst);
1995 			*inst = TEST_rm_r;
1996 			return SLJIT_SUCCESS;
1997 		}
1998 	}
1999 
2000 	EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2001 	if (src2 & SLJIT_IMM) {
2002 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2003 		if (IS_HALFWORD(src2w) || compiler->mode32) {
2004 			inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
2005 			FAIL_IF(!inst);
2006 			*inst = GROUP_F7;
2007 		}
2008 		else {
2009 			FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
2010 			inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);
2011 			FAIL_IF(!inst);
2012 			*inst = TEST_rm_r;
2013 		}
2014 #else
2015 		inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
2016 		FAIL_IF(!inst);
2017 		*inst = GROUP_F7;
2018 #endif
2019 	}
2020 	else {
2021 		inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2022 		FAIL_IF(!inst);
2023 		*inst = TEST_rm_r;
2024 	}
2025 	return SLJIT_SUCCESS;
2026 }
2027 
2028 static sljit_s32 emit_shift(struct sljit_compiler *compiler,
2029 	sljit_u8 mode,
2030 	sljit_s32 dst, sljit_sw dstw,
2031 	sljit_s32 src1, sljit_sw src1w,
2032 	sljit_s32 src2, sljit_sw src2w)
2033 {
2034 	sljit_u8* inst;
2035 
2036 	if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
2037 		if (dst == src1 && dstw == src1w) {
2038 			inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
2039 			FAIL_IF(!inst);
2040 			*inst |= mode;
2041 			return SLJIT_SUCCESS;
2042 		}
2043 		if (dst == SLJIT_UNUSED) {
2044 			EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2045 			inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2046 			FAIL_IF(!inst);
2047 			*inst |= mode;
2048 			return SLJIT_SUCCESS;
2049 		}
2050 		if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
2051 			EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2052 			inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2053 			FAIL_IF(!inst);
2054 			*inst |= mode;
2055 			EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2056 			return SLJIT_SUCCESS;
2057 		}
2058 		if (FAST_IS_REG(dst)) {
2059 			EMIT_MOV(compiler, dst, 0, src1, src1w);
2060 			inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
2061 			FAIL_IF(!inst);
2062 			*inst |= mode;
2063 			return SLJIT_SUCCESS;
2064 		}
2065 
2066 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2067 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2068 		FAIL_IF(!inst);
2069 		*inst |= mode;
2070 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2071 		return SLJIT_SUCCESS;
2072 	}
2073 
2074 	if (dst == SLJIT_PREF_SHIFT_REG) {
2075 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2076 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2077 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2078 		FAIL_IF(!inst);
2079 		*inst |= mode;
2080 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2081 	}
2082 	else if (FAST_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
2083 		if (src1 != dst)
2084 			EMIT_MOV(compiler, dst, 0, src1, src1w);
2085 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2086 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2087 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
2088 		FAIL_IF(!inst);
2089 		*inst |= mode;
2090 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2091 	}
2092 	else {
2093 		/* This case is really difficult, since ecx itself may used for
2094 		   addressing, and we must ensure to work even in that case. */
2095 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2096 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2097 		EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
2098 #else
2099 		/* [esp+0] contains the flags. */
2100 		EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0);
2101 #endif
2102 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2103 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2104 		FAIL_IF(!inst);
2105 		*inst |= mode;
2106 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2107 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
2108 #else
2109 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw));
2110 #endif
2111 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2112 	}
2113 
2114 	return SLJIT_SUCCESS;
2115 }
2116 
2117 static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler,
2118 	sljit_u8 mode, sljit_s32 set_flags,
2119 	sljit_s32 dst, sljit_sw dstw,
2120 	sljit_s32 src1, sljit_sw src1w,
2121 	sljit_s32 src2, sljit_sw src2w)
2122 {
2123 	/* The CPU does not set flags if the shift count is 0. */
2124 	if (src2 & SLJIT_IMM) {
2125 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2126 		if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
2127 			return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2128 #else
2129 		if ((src2w & 0x1f) != 0)
2130 			return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2131 #endif
2132 		if (!set_flags)
2133 			return emit_mov(compiler, dst, dstw, src1, src1w);
2134 		/* OR dst, src, 0 */
2135 		return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
2136 			dst, dstw, src1, src1w, SLJIT_IMM, 0);
2137 	}
2138 
2139 	if (!set_flags)
2140 		return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2141 
2142 	if (!FAST_IS_REG(dst))
2143 		FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
2144 
2145 	FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w));
2146 
2147 	if (FAST_IS_REG(dst))
2148 		return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
2149 	return SLJIT_SUCCESS;
2150 }
2151 
2152 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2153 	sljit_s32 dst, sljit_sw dstw,
2154 	sljit_s32 src1, sljit_sw src1w,
2155 	sljit_s32 src2, sljit_sw src2w)
2156 {
2157 	CHECK_ERROR();
2158 	CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2159 	ADJUST_LOCAL_OFFSET(dst, dstw);
2160 	ADJUST_LOCAL_OFFSET(src1, src1w);
2161 	ADJUST_LOCAL_OFFSET(src2, src2w);
2162 
2163 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
2164 	CHECK_EXTRA_REGS(src1, src1w, (void)0);
2165 	CHECK_EXTRA_REGS(src2, src2w, (void)0);
2166 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2167 	compiler->mode32 = op & SLJIT_I32_OP;
2168 #endif
2169 
2170 	if (GET_OPCODE(op) >= SLJIT_MUL) {
2171 		if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2172 			compiler->flags_saved = 0;
2173 		else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2174 			FAIL_IF(emit_save_flags(compiler));
2175 	}
2176 
2177 	switch (GET_OPCODE(op)) {
2178 	case SLJIT_ADD:
2179 		if (!GET_FLAGS(op)) {
2180 			if (emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
2181 				return compiler->error;
2182 		}
2183 		else
2184 			compiler->flags_saved = 0;
2185 		if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2186 			FAIL_IF(emit_save_flags(compiler));
2187 		return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
2188 			dst, dstw, src1, src1w, src2, src2w);
2189 	case SLJIT_ADDC:
2190 		if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
2191 			FAIL_IF(emit_restore_flags(compiler, 1));
2192 		else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
2193 			FAIL_IF(emit_save_flags(compiler));
2194 		if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2195 			compiler->flags_saved = 0;
2196 		return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32,
2197 			dst, dstw, src1, src1w, src2, src2w);
2198 	case SLJIT_SUB:
2199 		if (!GET_FLAGS(op)) {
2200 			if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
2201 				return compiler->error;
2202 		}
2203 		else
2204 			compiler->flags_saved = 0;
2205 		if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2206 			FAIL_IF(emit_save_flags(compiler));
2207 		if (dst == SLJIT_UNUSED)
2208 			return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
2209 		return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
2210 			dst, dstw, src1, src1w, src2, src2w);
2211 	case SLJIT_SUBC:
2212 		if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
2213 			FAIL_IF(emit_restore_flags(compiler, 1));
2214 		else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
2215 			FAIL_IF(emit_save_flags(compiler));
2216 		if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2217 			compiler->flags_saved = 0;
2218 		return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32,
2219 			dst, dstw, src1, src1w, src2, src2w);
2220 	case SLJIT_MUL:
2221 		return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
2222 	case SLJIT_AND:
2223 		if (dst == SLJIT_UNUSED)
2224 			return emit_test_binary(compiler, src1, src1w, src2, src2w);
2225 		return emit_cum_binary(compiler, AND_r_rm, AND_rm_r, AND, AND_EAX_i32,
2226 			dst, dstw, src1, src1w, src2, src2w);
2227 	case SLJIT_OR:
2228 		return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
2229 			dst, dstw, src1, src1w, src2, src2w);
2230 	case SLJIT_XOR:
2231 		return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32,
2232 			dst, dstw, src1, src1w, src2, src2w);
2233 	case SLJIT_SHL:
2234 		return emit_shift_with_flags(compiler, SHL, GET_FLAGS(op),
2235 			dst, dstw, src1, src1w, src2, src2w);
2236 	case SLJIT_LSHR:
2237 		return emit_shift_with_flags(compiler, SHR, GET_FLAGS(op),
2238 			dst, dstw, src1, src1w, src2, src2w);
2239 	case SLJIT_ASHR:
2240 		return emit_shift_with_flags(compiler, SAR, GET_FLAGS(op),
2241 			dst, dstw, src1, src1w, src2, src2w);
2242 	}
2243 
2244 	return SLJIT_SUCCESS;
2245 }
2246 
2247 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
2248 {
2249 	CHECK_REG_INDEX(check_sljit_get_register_index(reg));
2250 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2251 	if (reg >= SLJIT_R3 && reg <= SLJIT_R6)
2252 		return -1;
2253 #endif
2254 	return reg_map[reg];
2255 }
2256 
2257 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
2258 {
2259 	CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
2260 	return reg;
2261 }
2262 
2263 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2264 	void *instruction, sljit_s32 size)
2265 {
2266 	sljit_u8 *inst;
2267 
2268 	CHECK_ERROR();
2269 	CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2270 
2271 	inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
2272 	FAIL_IF(!inst);
2273 	INC_SIZE(size);
2274 	SLJIT_MEMCPY(inst, instruction, size);
2275 	return SLJIT_SUCCESS;
2276 }
2277 
2278 /* --------------------------------------------------------------------- */
2279 /*  Floating point operators                                             */
2280 /* --------------------------------------------------------------------- */
2281 
2282 /* Alignment + 2 * 16 bytes. */
2283 static sljit_s32 sse2_data[3 + (4 + 4) * 2];
2284 static sljit_s32 *sse2_buffer;
2285 
2286 static void init_compiler(void)
2287 {
2288 	sse2_buffer = (sljit_s32*)(((sljit_uw)sse2_data + 15) & ~0xf);
2289 	/* Single precision constants. */
2290 	sse2_buffer[0] = 0x80000000;
2291 	sse2_buffer[4] = 0x7fffffff;
2292 	/* Double precision constants. */
2293 	sse2_buffer[8] = 0;
2294 	sse2_buffer[9] = 0x80000000;
2295 	sse2_buffer[12] = 0xffffffff;
2296 	sse2_buffer[13] = 0x7fffffff;
2297 }
2298 
2299 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
2300 {
2301 #ifdef SLJIT_IS_FPU_AVAILABLE
2302 	return SLJIT_IS_FPU_AVAILABLE;
2303 #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
2304 	if (cpu_has_sse2 == -1)
2305 		get_cpu_features();
2306 	return cpu_has_sse2;
2307 #else /* SLJIT_DETECT_SSE2 */
2308 	return 1;
2309 #endif /* SLJIT_DETECT_SSE2 */
2310 }
2311 
2312 static sljit_s32 emit_sse2(struct sljit_compiler *compiler, sljit_u8 opcode,
2313 	sljit_s32 single, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
2314 {
2315 	sljit_u8 *inst;
2316 
2317 	inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2318 	FAIL_IF(!inst);
2319 	*inst++ = GROUP_0F;
2320 	*inst = opcode;
2321 	return SLJIT_SUCCESS;
2322 }
2323 
2324 static sljit_s32 emit_sse2_logic(struct sljit_compiler *compiler, sljit_u8 opcode,
2325 	sljit_s32 pref66, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
2326 {
2327 	sljit_u8 *inst;
2328 
2329 	inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2330 	FAIL_IF(!inst);
2331 	*inst++ = GROUP_0F;
2332 	*inst = opcode;
2333 	return SLJIT_SUCCESS;
2334 }
2335 
2336 static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
2337 	sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
2338 {
2339 	return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw);
2340 }
2341 
2342 static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
2343 	sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src)
2344 {
2345 	return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
2346 }
2347 
2348 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
2349 	sljit_s32 dst, sljit_sw dstw,
2350 	sljit_s32 src, sljit_sw srcw)
2351 {
2352 	sljit_s32 dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
2353 	sljit_u8 *inst;
2354 
2355 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2356 	if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)
2357 		compiler->mode32 = 0;
2358 #endif
2359 
2360 	inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw);
2361 	FAIL_IF(!inst);
2362 	*inst++ = GROUP_0F;
2363 	*inst = CVTTSD2SI_r_xm;
2364 
2365 	if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED)
2366 		return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2367 	return SLJIT_SUCCESS;
2368 }
2369 
2370 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
2371 	sljit_s32 dst, sljit_sw dstw,
2372 	sljit_s32 src, sljit_sw srcw)
2373 {
2374 	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2375 	sljit_u8 *inst;
2376 
2377 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2378 	if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
2379 		compiler->mode32 = 0;
2380 #endif
2381 
2382 	if (src & SLJIT_IMM) {
2383 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2384 		if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
2385 			srcw = (sljit_s32)srcw;
2386 #endif
2387 		EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
2388 		src = TMP_REG1;
2389 		srcw = 0;
2390 	}
2391 
2392 	inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw);
2393 	FAIL_IF(!inst);
2394 	*inst++ = GROUP_0F;
2395 	*inst = CVTSI2SD_x_rm;
2396 
2397 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2398 	compiler->mode32 = 1;
2399 #endif
2400 	if (dst_r == TMP_FREG)
2401 		return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2402 	return SLJIT_SUCCESS;
2403 }
2404 
2405 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
2406 	sljit_s32 src1, sljit_sw src1w,
2407 	sljit_s32 src2, sljit_sw src2w)
2408 {
2409 	compiler->flags_saved = 0;
2410 	if (!FAST_IS_REG(src1)) {
2411 		FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
2412 		src1 = TMP_FREG;
2413 	}
2414 	return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_F32_OP), src1, src2, src2w);
2415 }
2416 
2417 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2418 	sljit_s32 dst, sljit_sw dstw,
2419 	sljit_s32 src, sljit_sw srcw)
2420 {
2421 	sljit_s32 dst_r;
2422 
2423 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2424 	compiler->mode32 = 1;
2425 #endif
2426 
2427 	CHECK_ERROR();
2428 	SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2429 
2430 	if (GET_OPCODE(op) == SLJIT_MOV_F64) {
2431 		if (FAST_IS_REG(dst))
2432 			return emit_sse2_load(compiler, op & SLJIT_F32_OP, dst, src, srcw);
2433 		if (FAST_IS_REG(src))
2434 			return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, src);
2435 		FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src, srcw));
2436 		return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2437 	}
2438 
2439 	if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) {
2440 		dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2441 		if (FAST_IS_REG(src)) {
2442 			/* We overwrite the high bits of source. From SLJIT point of view,
2443 			   this is not an issue.
2444 			   Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
2445 			FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_F32_OP, src, src, 0));
2446 		}
2447 		else {
2448 			FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_F32_OP), TMP_FREG, src, srcw));
2449 			src = TMP_FREG;
2450 		}
2451 
2452 		FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_F32_OP, dst_r, src, 0));
2453 		if (dst_r == TMP_FREG)
2454 			return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2455 		return SLJIT_SUCCESS;
2456 	}
2457 
2458 	if (SLOW_IS_REG(dst)) {
2459 		dst_r = dst;
2460 		if (dst != src)
2461 			FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw));
2462 	}
2463 	else {
2464 		dst_r = TMP_FREG;
2465 		FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw));
2466 	}
2467 
2468 	switch (GET_OPCODE(op)) {
2469 	case SLJIT_NEG_F64:
2470 		FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer : sse2_buffer + 8)));
2471 		break;
2472 
2473 	case SLJIT_ABS_F64:
2474 		FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer + 4 : sse2_buffer + 12)));
2475 		break;
2476 	}
2477 
2478 	if (dst_r == TMP_FREG)
2479 		return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2480 	return SLJIT_SUCCESS;
2481 }
2482 
2483 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2484 	sljit_s32 dst, sljit_sw dstw,
2485 	sljit_s32 src1, sljit_sw src1w,
2486 	sljit_s32 src2, sljit_sw src2w)
2487 {
2488 	sljit_s32 dst_r;
2489 
2490 	CHECK_ERROR();
2491 	CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2492 	ADJUST_LOCAL_OFFSET(dst, dstw);
2493 	ADJUST_LOCAL_OFFSET(src1, src1w);
2494 	ADJUST_LOCAL_OFFSET(src2, src2w);
2495 
2496 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2497 	compiler->mode32 = 1;
2498 #endif
2499 
2500 	if (FAST_IS_REG(dst)) {
2501 		dst_r = dst;
2502 		if (dst == src1)
2503 			; /* Do nothing here. */
2504 		else if (dst == src2 && (op == SLJIT_ADD_F64 || op == SLJIT_MUL_F64)) {
2505 			/* Swap arguments. */
2506 			src2 = src1;
2507 			src2w = src1w;
2508 		}
2509 		else if (dst != src2)
2510 			FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src1, src1w));
2511 		else {
2512 			dst_r = TMP_FREG;
2513 			FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
2514 		}
2515 	}
2516 	else {
2517 		dst_r = TMP_FREG;
2518 		FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
2519 	}
2520 
2521 	switch (GET_OPCODE(op)) {
2522 	case SLJIT_ADD_F64:
2523 		FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2524 		break;
2525 
2526 	case SLJIT_SUB_F64:
2527 		FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2528 		break;
2529 
2530 	case SLJIT_MUL_F64:
2531 		FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2532 		break;
2533 
2534 	case SLJIT_DIV_F64:
2535 		FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2536 		break;
2537 	}
2538 
2539 	if (dst_r == TMP_FREG)
2540 		return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2541 	return SLJIT_SUCCESS;
2542 }
2543 
2544 /* --------------------------------------------------------------------- */
2545 /*  Conditional instructions                                             */
2546 /* --------------------------------------------------------------------- */
2547 
2548 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2549 {
2550 	sljit_u8 *inst;
2551 	struct sljit_label *label;
2552 
2553 	CHECK_ERROR_PTR();
2554 	CHECK_PTR(check_sljit_emit_label(compiler));
2555 
2556 	/* We should restore the flags before the label,
2557 	   since other taken jumps has their own flags as well. */
2558 	if (SLJIT_UNLIKELY(compiler->flags_saved))
2559 		PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2560 
2561 	if (compiler->last_label && compiler->last_label->size == compiler->size)
2562 		return compiler->last_label;
2563 
2564 	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2565 	PTR_FAIL_IF(!label);
2566 	set_label(label, compiler);
2567 
2568 	inst = (sljit_u8*)ensure_buf(compiler, 2);
2569 	PTR_FAIL_IF(!inst);
2570 
2571 	*inst++ = 0;
2572 	*inst++ = 0;
2573 
2574 	return label;
2575 }
2576 
2577 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2578 {
2579 	sljit_u8 *inst;
2580 	struct sljit_jump *jump;
2581 
2582 	CHECK_ERROR_PTR();
2583 	CHECK_PTR(check_sljit_emit_jump(compiler, type));
2584 
2585 	if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2586 		if ((type & 0xff) <= SLJIT_JUMP)
2587 			PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2588 		compiler->flags_saved = 0;
2589 	}
2590 
2591 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2592 	PTR_FAIL_IF_NULL(jump);
2593 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2594 	type &= 0xff;
2595 
2596 	if (type >= SLJIT_CALL1)
2597 		PTR_FAIL_IF(call_with_args(compiler, type));
2598 
2599 	/* Worst case size. */
2600 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2601 	compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
2602 #else
2603 	compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
2604 #endif
2605 
2606 	inst = (sljit_u8*)ensure_buf(compiler, 2);
2607 	PTR_FAIL_IF_NULL(inst);
2608 
2609 	*inst++ = 0;
2610 	*inst++ = type + 4;
2611 	return jump;
2612 }
2613 
2614 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
2615 {
2616 	sljit_u8 *inst;
2617 	struct sljit_jump *jump;
2618 
2619 	CHECK_ERROR();
2620 	CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2621 	ADJUST_LOCAL_OFFSET(src, srcw);
2622 
2623 	CHECK_EXTRA_REGS(src, srcw, (void)0);
2624 
2625 	if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2626 		if (type <= SLJIT_JUMP)
2627 			FAIL_IF(emit_restore_flags(compiler, 0));
2628 		compiler->flags_saved = 0;
2629 	}
2630 
2631 	if (type >= SLJIT_CALL1) {
2632 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2633 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
2634 		if (src == SLJIT_R2) {
2635 			EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
2636 			src = TMP_REG1;
2637 		}
2638 		if (src == SLJIT_MEM1(SLJIT_SP) && type >= SLJIT_CALL3)
2639 			srcw += sizeof(sljit_sw);
2640 #endif
2641 #endif
2642 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
2643 		if (src == SLJIT_R2) {
2644 			EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
2645 			src = TMP_REG1;
2646 		}
2647 #endif
2648 		FAIL_IF(call_with_args(compiler, type));
2649 	}
2650 
2651 	if (src == SLJIT_IMM) {
2652 		jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2653 		FAIL_IF_NULL(jump);
2654 		set_jump(jump, compiler, JUMP_ADDR);
2655 		jump->u.target = srcw;
2656 
2657 		/* Worst case size. */
2658 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2659 		compiler->size += 5;
2660 #else
2661 		compiler->size += 10 + 3;
2662 #endif
2663 
2664 		inst = (sljit_u8*)ensure_buf(compiler, 2);
2665 		FAIL_IF_NULL(inst);
2666 
2667 		*inst++ = 0;
2668 		*inst++ = type + 4;
2669 	}
2670 	else {
2671 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2672 		/* REX_W is not necessary (src is not immediate). */
2673 		compiler->mode32 = 1;
2674 #endif
2675 		inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2676 		FAIL_IF(!inst);
2677 		*inst++ = GROUP_FF;
2678 		*inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm;
2679 	}
2680 	return SLJIT_SUCCESS;
2681 }
2682 
2683 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
2684 	sljit_s32 dst, sljit_sw dstw,
2685 	sljit_s32 src, sljit_sw srcw,
2686 	sljit_s32 type)
2687 {
2688 	sljit_u8 *inst;
2689 	sljit_u8 cond_set = 0;
2690 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2691 	sljit_s32 reg;
2692 #else
2693 	/* CHECK_EXTRA_REGS migh overwrite these values. */
2694 	sljit_s32 dst_save = dst;
2695 	sljit_sw dstw_save = dstw;
2696 #endif
2697 
2698 	CHECK_ERROR();
2699 	CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
2700 	SLJIT_UNUSED_ARG(srcw);
2701 
2702 	if (dst == SLJIT_UNUSED)
2703 		return SLJIT_SUCCESS;
2704 
2705 	ADJUST_LOCAL_OFFSET(dst, dstw);
2706 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
2707 	if (SLJIT_UNLIKELY(compiler->flags_saved))
2708 		FAIL_IF(emit_restore_flags(compiler, op & SLJIT_KEEP_FLAGS));
2709 
2710 	type &= 0xff;
2711 	/* setcc = jcc + 0x10. */
2712 	cond_set = get_jump_code(type) + 0x10;
2713 
2714 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2715 	if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src) {
2716 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 3);
2717 		FAIL_IF(!inst);
2718 		INC_SIZE(4 + 3);
2719 		/* Set low register to conditional flag. */
2720 		*inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
2721 		*inst++ = GROUP_0F;
2722 		*inst++ = cond_set;
2723 		*inst++ = MOD_REG | reg_lmap[TMP_REG1];
2724 		*inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B);
2725 		*inst++ = OR_rm8_r8;
2726 		*inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst];
2727 		return SLJIT_SUCCESS;
2728 	}
2729 
2730 	reg = (op == SLJIT_MOV && FAST_IS_REG(dst)) ? dst : TMP_REG1;
2731 
2732 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 4);
2733 	FAIL_IF(!inst);
2734 	INC_SIZE(4 + 4);
2735 	/* Set low register to conditional flag. */
2736 	*inst++ = (reg_map[reg] <= 7) ? REX : REX_B;
2737 	*inst++ = GROUP_0F;
2738 	*inst++ = cond_set;
2739 	*inst++ = MOD_REG | reg_lmap[reg];
2740 	*inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
2741 	*inst++ = GROUP_0F;
2742 	*inst++ = MOVZX_r_rm8;
2743 	*inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg];
2744 
2745 	if (reg != TMP_REG1)
2746 		return SLJIT_SUCCESS;
2747 
2748 	if (GET_OPCODE(op) < SLJIT_ADD) {
2749 		compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
2750 		return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2751 	}
2752 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2753 		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2754 	compiler->skip_checks = 1;
2755 #endif
2756 	return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REG1, 0);
2757 #else /* SLJIT_CONFIG_X86_64 */
2758 	if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) {
2759 		if (reg_map[dst] <= 4) {
2760 			/* Low byte is accessible. */
2761 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3);
2762 			FAIL_IF(!inst);
2763 			INC_SIZE(3 + 3);
2764 			/* Set low byte to conditional flag. */
2765 			*inst++ = GROUP_0F;
2766 			*inst++ = cond_set;
2767 			*inst++ = MOD_REG | reg_map[dst];
2768 
2769 			*inst++ = GROUP_0F;
2770 			*inst++ = MOVZX_r_rm8;
2771 			*inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst];
2772 			return SLJIT_SUCCESS;
2773 		}
2774 
2775 		/* Low byte is not accessible. */
2776 		if (cpu_has_cmov == -1)
2777 			get_cpu_features();
2778 
2779 		if (cpu_has_cmov) {
2780 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
2781 			/* a xor reg, reg operation would overwrite the flags. */
2782 			EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);
2783 
2784 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
2785 			FAIL_IF(!inst);
2786 			INC_SIZE(3);
2787 
2788 			*inst++ = GROUP_0F;
2789 			/* cmovcc = setcc - 0x50. */
2790 			*inst++ = cond_set - 0x50;
2791 			*inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1];
2792 			return SLJIT_SUCCESS;
2793 		}
2794 
2795 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2796 		FAIL_IF(!inst);
2797 		INC_SIZE(1 + 3 + 3 + 1);
2798 		*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2799 		/* Set al to conditional flag. */
2800 		*inst++ = GROUP_0F;
2801 		*inst++ = cond_set;
2802 		*inst++ = MOD_REG | 0 /* eax */;
2803 
2804 		*inst++ = GROUP_0F;
2805 		*inst++ = MOVZX_r_rm8;
2806 		*inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */;
2807 		*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2808 		return SLJIT_SUCCESS;
2809 	}
2810 
2811 	if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src && reg_map[dst] <= 4) {
2812 		SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] == 0, scratch_reg1_must_be_eax);
2813 		if (dst != SLJIT_R0) {
2814 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
2815 			FAIL_IF(!inst);
2816 			INC_SIZE(1 + 3 + 2 + 1);
2817 			/* Set low register to conditional flag. */
2818 			*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2819 			*inst++ = GROUP_0F;
2820 			*inst++ = cond_set;
2821 			*inst++ = MOD_REG | 0 /* eax */;
2822 			*inst++ = OR_rm8_r8;
2823 			*inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst];
2824 			*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2825 		}
2826 		else {
2827 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2);
2828 			FAIL_IF(!inst);
2829 			INC_SIZE(2 + 3 + 2 + 2);
2830 			/* Set low register to conditional flag. */
2831 			*inst++ = XCHG_r_rm;
2832 			*inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
2833 			*inst++ = GROUP_0F;
2834 			*inst++ = cond_set;
2835 			*inst++ = MOD_REG | 1 /* ecx */;
2836 			*inst++ = OR_rm8_r8;
2837 			*inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */;
2838 			*inst++ = XCHG_r_rm;
2839 			*inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
2840 		}
2841 		return SLJIT_SUCCESS;
2842 	}
2843 
2844 	/* Set TMP_REG1 to the bit. */
2845 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2846 	FAIL_IF(!inst);
2847 	INC_SIZE(1 + 3 + 3 + 1);
2848 	*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2849 	/* Set al to conditional flag. */
2850 	*inst++ = GROUP_0F;
2851 	*inst++ = cond_set;
2852 	*inst++ = MOD_REG | 0 /* eax */;
2853 
2854 	*inst++ = GROUP_0F;
2855 	*inst++ = MOVZX_r_rm8;
2856 	*inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */;
2857 
2858 	*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2859 
2860 	if (GET_OPCODE(op) < SLJIT_ADD)
2861 		return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2862 
2863 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2864 		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2865 	compiler->skip_checks = 1;
2866 #endif
2867 	return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
2868 #endif /* SLJIT_CONFIG_X86_64 */
2869 }
2870 
2871 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
2872 {
2873 	CHECK_ERROR();
2874 	CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
2875 	ADJUST_LOCAL_OFFSET(dst, dstw);
2876 
2877 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
2878 
2879 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2880 	compiler->mode32 = 0;
2881 #endif
2882 
2883 	ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
2884 
2885 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2886 	if (NOT_HALFWORD(offset)) {
2887 		FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
2888 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
2889 		SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
2890 		return compiler->error;
2891 #else
2892 		return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
2893 #endif
2894 	}
2895 #endif
2896 
2897 	if (offset != 0)
2898 		return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
2899 	return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
2900 }
2901 
2902 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
2903 {
2904 	sljit_u8 *inst;
2905 	struct sljit_const *const_;
2906 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2907 	sljit_s32 reg;
2908 #endif
2909 
2910 	CHECK_ERROR_PTR();
2911 	CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
2912 	ADJUST_LOCAL_OFFSET(dst, dstw);
2913 
2914 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
2915 
2916 	const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2917 	PTR_FAIL_IF(!const_);
2918 	set_const(const_, compiler);
2919 
2920 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2921 	compiler->mode32 = 0;
2922 	reg = SLOW_IS_REG(dst) ? dst : TMP_REG1;
2923 
2924 	if (emit_load_imm64(compiler, reg, init_value))
2925 		return NULL;
2926 #else
2927 	if (dst == SLJIT_UNUSED)
2928 		dst = TMP_REG1;
2929 
2930 	if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
2931 		return NULL;
2932 #endif
2933 
2934 	inst = (sljit_u8*)ensure_buf(compiler, 2);
2935 	PTR_FAIL_IF(!inst);
2936 
2937 	*inst++ = 0;
2938 	*inst++ = 1;
2939 
2940 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2941 	if (dst & SLJIT_MEM)
2942 		if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
2943 			return NULL;
2944 #endif
2945 
2946 	return const_;
2947 }
2948 
2949 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
2950 {
2951 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2952 	sljit_unaligned_store_sw((void*)addr, new_addr - (addr + 4));
2953 #else
2954 	sljit_unaligned_store_sw((void*)addr, (sljit_sw) new_addr);
2955 #endif
2956 }
2957 
2958 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
2959 {
2960 	sljit_unaligned_store_sw((void*)addr, new_constant);
2961 }
2962 
2963 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_sse2_available(void)
2964 {
2965 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
2966 	if (cpu_has_sse2 == -1)
2967 		get_cpu_features();
2968 	return cpu_has_sse2;
2969 #else
2970 	return 1;
2971 #endif
2972 }
2973 
2974 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_cmov_available(void)
2975 {
2976 	if (cpu_has_cmov == -1)
2977 		get_cpu_features();
2978 	return cpu_has_cmov;
2979 }
2980 
2981 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_emit_cmov(struct sljit_compiler *compiler,
2982 	sljit_s32 type,
2983 	sljit_s32 dst_reg,
2984 	sljit_s32 src, sljit_sw srcw)
2985 {
2986 	sljit_u8* inst;
2987 
2988 	CHECK_ERROR();
2989 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2990 	CHECK_ARGUMENT(sljit_x86_is_cmov_available());
2991 	CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_I32_OP)));
2992 	CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_ORDERED_F64);
2993 	CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_I32_OP));
2994 	FUNCTION_CHECK_SRC(src, srcw);
2995 #endif
2996 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
2997 	if (SLJIT_UNLIKELY(!!compiler->verbose)) {
2998 		fprintf(compiler->verbose, "  x86_cmov%s %s%s, ",
2999 			!(dst_reg & SLJIT_I32_OP) ? "" : ".i",
3000 			jump_names[type & 0xff], JUMP_POSTFIX(type));
3001 		sljit_verbose_reg(compiler, dst_reg & ~SLJIT_I32_OP);
3002 		fprintf(compiler->verbose, ", ");
3003 		sljit_verbose_param(compiler, src, srcw);
3004 		fprintf(compiler->verbose, "\n");
3005 	}
3006 #endif
3007 
3008 	ADJUST_LOCAL_OFFSET(src, srcw);
3009 	CHECK_EXTRA_REGS(src, srcw, (void)0);
3010 
3011 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3012 	compiler->mode32 = dst_reg & SLJIT_I32_OP;
3013 #endif
3014 	dst_reg &= ~SLJIT_I32_OP;
3015 
3016 	if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
3017 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
3018 		src = TMP_REG1;
3019 		srcw = 0;
3020 	}
3021 
3022 	inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw);
3023 	FAIL_IF(!inst);
3024 	*inst++ = GROUP_0F;
3025 	*inst = get_jump_code(type & 0xff) - 0x40;
3026 	return SLJIT_SUCCESS;
3027 }
3028