1 /*
2  *    Stack-less Just-In-Time compiler
3  *
4  *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without modification, are
7  * permitted provided that the following conditions are met:
8  *
9  *   1. Redistributions of source code must retain the above copyright notice, this list of
10  *      conditions and the following disclaimer.
11  *
12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13  *      of conditions and the following disclaimer in the documentation and/or other materials
14  *      provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
sljit_get_platform_name(void)27 SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
28 {
29 	return "x86" SLJIT_CPUINFO;
30 }
31 
32 /*
33    32b register indexes:
34      0 - EAX
35      1 - ECX
36      2 - EDX
37      3 - EBX
38      4 - none
39      5 - EBP
40      6 - ESI
41      7 - EDI
42 */
43 
44 /*
45    64b register indexes:
46      0 - RAX
47      1 - RCX
48      2 - RDX
49      3 - RBX
50      4 - none
51      5 - RBP
52      6 - RSI
53      7 - RDI
54      8 - R8   - From now on REX prefix is required
55      9 - R9
56     10 - R10
57     11 - R11
58     12 - R12
59     13 - R13
60     14 - R14
61     15 - R15
62 */
63 
64 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
65 
66 /* Last register + 1. */
67 #define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
68 
69 static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
70 	0, 0, 2, 1, 0, 0, 0, 0, 7, 6, 3, 4, 5
71 };
72 
73 #define CHECK_EXTRA_REGS(p, w, do) \
74 	if (p >= SLJIT_R3 && p <= SLJIT_R6) { \
75 		w = FIXED_LOCALS_OFFSET + ((p) - (SLJIT_R3 + 4)) * sizeof(sljit_sw); \
76 		p = SLJIT_MEM1(SLJIT_SP); \
77 		do; \
78 	}
79 
80 #else /* SLJIT_CONFIG_X86_32 */
81 
82 /* Last register + 1. */
83 #define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
84 #define TMP_REG2	(SLJIT_NUMBER_OF_REGISTERS + 3)
85 #define TMP_REG3	(SLJIT_NUMBER_OF_REGISTERS + 4)
86 
87 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
88    Note: avoid to use r12 and r13 for memory addessing
89    therefore r12 is better for SAVED_EREG than SAVED_REG. */
90 #ifndef _WIN64
91 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
92 static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
93 	0, 0, 6, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 7, 9
94 };
95 /* low-map. reg_map & 0x7. */
96 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
97 	0, 0, 6, 1, 0, 3,  2,  4,  5,  5,  6,  7, 3, 4, 2, 7, 1
98 };
99 #else
100 /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
101 static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
102 	0, 0, 2, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 10, 8, 9
103 };
104 /* low-map. reg_map & 0x7. */
105 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
106 	0, 0, 2, 1, 3,  4,  5,  5, 6,  7,  7, 6, 3, 4, 2,  0, 1
107 };
108 #endif
109 
110 #define REX_W		0x48
111 #define REX_R		0x44
112 #define REX_X		0x42
113 #define REX_B		0x41
114 #define REX		0x40
115 
116 #ifndef _WIN64
117 #define HALFWORD_MAX 0x7fffffffl
118 #define HALFWORD_MIN -0x80000000l
119 #else
120 #define HALFWORD_MAX 0x7fffffffll
121 #define HALFWORD_MIN -0x80000000ll
122 #endif
123 
124 #define IS_HALFWORD(x)		((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
125 #define NOT_HALFWORD(x)		((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
126 
127 #define CHECK_EXTRA_REGS(p, w, do)
128 
129 #endif /* SLJIT_CONFIG_X86_32 */
130 
131 #define TMP_FREG	(0)
132 
133 /* Size flags for emit_x86_instruction: */
134 #define EX86_BIN_INS		0x0010
135 #define EX86_SHIFT_INS		0x0020
136 #define EX86_REX		0x0040
137 #define EX86_NO_REXW		0x0080
138 #define EX86_BYTE_ARG		0x0100
139 #define EX86_HALF_ARG		0x0200
140 #define EX86_PREF_66		0x0400
141 #define EX86_PREF_F2		0x0800
142 #define EX86_PREF_F3		0x1000
143 #define EX86_SSE2_OP1		0x2000
144 #define EX86_SSE2_OP2		0x4000
145 #define EX86_SSE2		(EX86_SSE2_OP1 | EX86_SSE2_OP2)
146 
147 /* --------------------------------------------------------------------- */
148 /*  Instrucion forms                                                     */
149 /* --------------------------------------------------------------------- */
150 
151 #define ADD		(/* BINARY */ 0 << 3)
152 #define ADD_EAX_i32	0x05
153 #define ADD_r_rm	0x03
154 #define ADD_rm_r	0x01
155 #define ADDSD_x_xm	0x58
156 #define ADC		(/* BINARY */ 2 << 3)
157 #define ADC_EAX_i32	0x15
158 #define ADC_r_rm	0x13
159 #define ADC_rm_r	0x11
160 #define AND		(/* BINARY */ 4 << 3)
161 #define AND_EAX_i32	0x25
162 #define AND_r_rm	0x23
163 #define AND_rm_r	0x21
164 #define ANDPD_x_xm	0x54
165 #define BSR_r_rm	(/* GROUP_0F */ 0xbd)
166 #define CALL_i32	0xe8
167 #define CALL_rm		(/* GROUP_FF */ 2 << 3)
168 #define CDQ		0x99
169 #define CMOVNE_r_rm	(/* GROUP_0F */ 0x45)
170 #define CMP		(/* BINARY */ 7 << 3)
171 #define CMP_EAX_i32	0x3d
172 #define CMP_r_rm	0x3b
173 #define CMP_rm_r	0x39
174 #define CVTPD2PS_x_xm	0x5a
175 #define CVTSI2SD_x_rm	0x2a
176 #define CVTTSD2SI_r_xm	0x2c
177 #define DIV		(/* GROUP_F7 */ 6 << 3)
178 #define DIVSD_x_xm	0x5e
179 #define INT3		0xcc
180 #define IDIV		(/* GROUP_F7 */ 7 << 3)
181 #define IMUL		(/* GROUP_F7 */ 5 << 3)
182 #define IMUL_r_rm	(/* GROUP_0F */ 0xaf)
183 #define IMUL_r_rm_i8	0x6b
184 #define IMUL_r_rm_i32	0x69
185 #define JE_i8		0x74
186 #define JMP_i8		0xeb
187 #define JMP_i32		0xe9
188 #define JMP_rm		(/* GROUP_FF */ 4 << 3)
189 #define LEA_r_m		0x8d
190 #define MOV_r_rm	0x8b
191 #define MOV_r_i32	0xb8
192 #define MOV_rm_r	0x89
193 #define MOV_rm_i32	0xc7
194 #define MOV_rm8_i8	0xc6
195 #define MOV_rm8_r8	0x88
196 #define MOVSD_x_xm	0x10
197 #define MOVSD_xm_x	0x11
198 #define MOVSXD_r_rm	0x63
199 #define MOVSX_r_rm8	(/* GROUP_0F */ 0xbe)
200 #define MOVSX_r_rm16	(/* GROUP_0F */ 0xbf)
201 #define MOVZX_r_rm8	(/* GROUP_0F */ 0xb6)
202 #define MOVZX_r_rm16	(/* GROUP_0F */ 0xb7)
203 #define MUL		(/* GROUP_F7 */ 4 << 3)
204 #define MULSD_x_xm	0x59
205 #define NEG_rm		(/* GROUP_F7 */ 3 << 3)
206 #define NOP		0x90
207 #define NOT_rm		(/* GROUP_F7 */ 2 << 3)
208 #define OR		(/* BINARY */ 1 << 3)
209 #define OR_r_rm		0x0b
210 #define OR_EAX_i32	0x0d
211 #define OR_rm_r		0x09
212 #define OR_rm8_r8	0x08
213 #define POP_r		0x58
214 #define POP_rm		0x8f
215 #define POPF		0x9d
216 #define PUSH_i32	0x68
217 #define PUSH_r		0x50
218 #define PUSH_rm		(/* GROUP_FF */ 6 << 3)
219 #define PUSHF		0x9c
220 #define RET_near	0xc3
221 #define RET_i16		0xc2
222 #define SBB		(/* BINARY */ 3 << 3)
223 #define SBB_EAX_i32	0x1d
224 #define SBB_r_rm	0x1b
225 #define SBB_rm_r	0x19
226 #define SAR		(/* SHIFT */ 7 << 3)
227 #define SHL		(/* SHIFT */ 4 << 3)
228 #define SHR		(/* SHIFT */ 5 << 3)
229 #define SUB		(/* BINARY */ 5 << 3)
230 #define SUB_EAX_i32	0x2d
231 #define SUB_r_rm	0x2b
232 #define SUB_rm_r	0x29
233 #define SUBSD_x_xm	0x5c
234 #define TEST_EAX_i32	0xa9
235 #define TEST_rm_r	0x85
236 #define UCOMISD_x_xm	0x2e
237 #define UNPCKLPD_x_xm	0x14
238 #define XCHG_EAX_r	0x90
239 #define XCHG_r_rm	0x87
240 #define XOR		(/* BINARY */ 6 << 3)
241 #define XOR_EAX_i32	0x35
242 #define XOR_r_rm	0x33
243 #define XOR_rm_r	0x31
244 #define XORPD_x_xm	0x57
245 
246 #define GROUP_0F	0x0f
247 #define GROUP_F7	0xf7
248 #define GROUP_FF	0xff
249 #define GROUP_BINARY_81	0x81
250 #define GROUP_BINARY_83	0x83
251 #define GROUP_SHIFT_1	0xd1
252 #define GROUP_SHIFT_N	0xc1
253 #define GROUP_SHIFT_CL	0xd3
254 
255 #define MOD_REG		0xc0
256 #define MOD_DISP8	0x40
257 
258 #define INC_SIZE(s)			(*inst++ = (s), compiler->size += (s))
259 
260 #define PUSH_REG(r)			(*inst++ = (PUSH_r + (r)))
261 #define POP_REG(r)			(*inst++ = (POP_r + (r)))
262 #define RET()				(*inst++ = (RET_near))
263 #define RET_I16(n)			(*inst++ = (RET_i16), *inst++ = n, *inst++ = 0)
264 /* r32, r/m32 */
265 #define MOV_RM(mod, reg, rm)		(*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm))
266 
267 /* Multithreading does not affect these static variables, since they store
268    built-in CPU features. Therefore they can be overwritten by different threads
269    if they detect the CPU features in the same time. */
270 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
271 static sljit_si cpu_has_sse2 = -1;
272 #endif
273 static sljit_si cpu_has_cmov = -1;
274 
275 #if defined(_MSC_VER) && _MSC_VER >= 1400
276 #include <intrin.h>
277 #endif
278 
get_cpu_features(void)279 static void get_cpu_features(void)
280 {
281 	sljit_ui features;
282 
283 #if defined(_MSC_VER) && _MSC_VER >= 1400
284 
285 	int CPUInfo[4];
286 	__cpuid(CPUInfo, 1);
287 	features = (sljit_ui)CPUInfo[3];
288 
289 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
290 
291 	/* AT&T syntax. */
292 	__asm__ (
293 		"movl $0x1, %%eax\n"
294 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
295 		/* On x86-32, there is no red zone, so this
296 		   should work (no need for a local variable). */
297 		"push %%ebx\n"
298 #endif
299 		"cpuid\n"
300 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
301 		"pop %%ebx\n"
302 #endif
303 		"movl %%edx, %0\n"
304 		: "=g" (features)
305 		:
306 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
307 		: "%eax", "%ecx", "%edx"
308 #else
309 		: "%rax", "%rbx", "%rcx", "%rdx"
310 #endif
311 	);
312 
313 #else /* _MSC_VER && _MSC_VER >= 1400 */
314 
315 	/* Intel syntax. */
316 	__asm {
317 		mov eax, 1
318 		cpuid
319 		mov features, edx
320 	}
321 
322 #endif /* _MSC_VER && _MSC_VER >= 1400 */
323 
324 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
325 	cpu_has_sse2 = (features >> 26) & 0x1;
326 #endif
327 	cpu_has_cmov = (features >> 15) & 0x1;
328 }
329 
get_jump_code(sljit_si type)330 static sljit_ub get_jump_code(sljit_si type)
331 {
332 	switch (type) {
333 	case SLJIT_C_EQUAL:
334 	case SLJIT_C_FLOAT_EQUAL:
335 		return 0x84 /* je */;
336 
337 	case SLJIT_C_NOT_EQUAL:
338 	case SLJIT_C_FLOAT_NOT_EQUAL:
339 		return 0x85 /* jne */;
340 
341 	case SLJIT_C_LESS:
342 	case SLJIT_C_FLOAT_LESS:
343 		return 0x82 /* jc */;
344 
345 	case SLJIT_C_GREATER_EQUAL:
346 	case SLJIT_C_FLOAT_GREATER_EQUAL:
347 		return 0x83 /* jae */;
348 
349 	case SLJIT_C_GREATER:
350 	case SLJIT_C_FLOAT_GREATER:
351 		return 0x87 /* jnbe */;
352 
353 	case SLJIT_C_LESS_EQUAL:
354 	case SLJIT_C_FLOAT_LESS_EQUAL:
355 		return 0x86 /* jbe */;
356 
357 	case SLJIT_C_SIG_LESS:
358 		return 0x8c /* jl */;
359 
360 	case SLJIT_C_SIG_GREATER_EQUAL:
361 		return 0x8d /* jnl */;
362 
363 	case SLJIT_C_SIG_GREATER:
364 		return 0x8f /* jnle */;
365 
366 	case SLJIT_C_SIG_LESS_EQUAL:
367 		return 0x8e /* jle */;
368 
369 	case SLJIT_C_OVERFLOW:
370 	case SLJIT_C_MUL_OVERFLOW:
371 		return 0x80 /* jo */;
372 
373 	case SLJIT_C_NOT_OVERFLOW:
374 	case SLJIT_C_MUL_NOT_OVERFLOW:
375 		return 0x81 /* jno */;
376 
377 	case SLJIT_C_FLOAT_UNORDERED:
378 		return 0x8a /* jp */;
379 
380 	case SLJIT_C_FLOAT_ORDERED:
381 		return 0x8b /* jpo */;
382 	}
383 	return 0;
384 }
385 
386 static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_si type);
387 
388 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
389 static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_sw addr, sljit_si type);
390 #endif
391 
generate_near_jump_code(struct sljit_jump * jump,sljit_ub * code_ptr,sljit_ub * code,sljit_si type)392 static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_ub *code, sljit_si type)
393 {
394 	sljit_si short_jump;
395 	sljit_uw label_addr;
396 
397 	if (jump->flags & JUMP_LABEL)
398 		label_addr = (sljit_uw)(code + jump->u.label->size);
399 	else
400 		label_addr = jump->u.target;
401 	short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
402 
403 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
404 	if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
405 		return generate_far_jump_code(jump, code_ptr, type);
406 #endif
407 
408 	if (type == SLJIT_JUMP) {
409 		if (short_jump)
410 			*code_ptr++ = JMP_i8;
411 		else
412 			*code_ptr++ = JMP_i32;
413 		jump->addr++;
414 	}
415 	else if (type >= SLJIT_FAST_CALL) {
416 		short_jump = 0;
417 		*code_ptr++ = CALL_i32;
418 		jump->addr++;
419 	}
420 	else if (short_jump) {
421 		*code_ptr++ = get_jump_code(type) - 0x10;
422 		jump->addr++;
423 	}
424 	else {
425 		*code_ptr++ = GROUP_0F;
426 		*code_ptr++ = get_jump_code(type);
427 		jump->addr += 2;
428 	}
429 
430 	if (short_jump) {
431 		jump->flags |= PATCH_MB;
432 		code_ptr += sizeof(sljit_sb);
433 	} else {
434 		jump->flags |= PATCH_MW;
435 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
436 		code_ptr += sizeof(sljit_sw);
437 #else
438 		code_ptr += sizeof(sljit_si);
439 #endif
440 	}
441 
442 	return code_ptr;
443 }
444 
sljit_generate_code(struct sljit_compiler * compiler)445 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
446 {
447 	struct sljit_memory_fragment *buf;
448 	sljit_ub *code;
449 	sljit_ub *code_ptr;
450 	sljit_ub *buf_ptr;
451 	sljit_ub *buf_end;
452 	sljit_ub len;
453 
454 	struct sljit_label *label;
455 	struct sljit_jump *jump;
456 	struct sljit_const *const_;
457 
458 	CHECK_ERROR_PTR();
459 	check_sljit_generate_code(compiler);
460 	reverse_buf(compiler);
461 
462 	/* Second code generation pass. */
463 	code = (sljit_ub*)SLJIT_MALLOC_EXEC(compiler->size);
464 	PTR_FAIL_WITH_EXEC_IF(code);
465 	buf = compiler->buf;
466 
467 	code_ptr = code;
468 	label = compiler->labels;
469 	jump = compiler->jumps;
470 	const_ = compiler->consts;
471 	do {
472 		buf_ptr = buf->memory;
473 		buf_end = buf_ptr + buf->used_size;
474 		do {
475 			len = *buf_ptr++;
476 			if (len > 0) {
477 				/* The code is already generated. */
478 				SLJIT_MEMMOVE(code_ptr, buf_ptr, len);
479 				code_ptr += len;
480 				buf_ptr += len;
481 			}
482 			else {
483 				if (*buf_ptr >= 4) {
484 					jump->addr = (sljit_uw)code_ptr;
485 					if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
486 						code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4);
487 					else
488 						code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4);
489 					jump = jump->next;
490 				}
491 				else if (*buf_ptr == 0) {
492 					label->addr = (sljit_uw)code_ptr;
493 					label->size = code_ptr - code;
494 					label = label->next;
495 				}
496 				else if (*buf_ptr == 1) {
497 					const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
498 					const_ = const_->next;
499 				}
500 				else {
501 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
502 					*code_ptr++ = (*buf_ptr == 2) ? CALL_i32 : JMP_i32;
503 					buf_ptr++;
504 					*(sljit_sw*)code_ptr = *(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw));
505 					code_ptr += sizeof(sljit_sw);
506 					buf_ptr += sizeof(sljit_sw) - 1;
507 #else
508 					code_ptr = generate_fixed_jump(code_ptr, *(sljit_sw*)(buf_ptr + 1), *buf_ptr);
509 					buf_ptr += sizeof(sljit_sw);
510 #endif
511 				}
512 				buf_ptr++;
513 			}
514 		} while (buf_ptr < buf_end);
515 		SLJIT_ASSERT(buf_ptr == buf_end);
516 		buf = buf->next;
517 	} while (buf);
518 
519 	SLJIT_ASSERT(!label);
520 	SLJIT_ASSERT(!jump);
521 	SLJIT_ASSERT(!const_);
522 
523 	jump = compiler->jumps;
524 	while (jump) {
525 		if (jump->flags & PATCH_MB) {
526 			SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) <= 127);
527 			*(sljit_ub*)jump->addr = (sljit_ub)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb)));
528 		} else if (jump->flags & PATCH_MW) {
529 			if (jump->flags & JUMP_LABEL) {
530 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
531 				*(sljit_sw*)jump->addr = (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sw)));
532 #else
533 				SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) <= HALFWORD_MAX);
534 				*(sljit_si*)jump->addr = (sljit_si)(jump->u.label->addr - (jump->addr + sizeof(sljit_si)));
535 #endif
536 			}
537 			else {
538 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
539 				*(sljit_sw*)jump->addr = (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_sw)));
540 #else
541 				SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) <= HALFWORD_MAX);
542 				*(sljit_si*)jump->addr = (sljit_si)(jump->u.target - (jump->addr + sizeof(sljit_si)));
543 #endif
544 			}
545 		}
546 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
547 		else if (jump->flags & PATCH_MD)
548 			*(sljit_sw*)jump->addr = jump->u.label->addr;
549 #endif
550 
551 		jump = jump->next;
552 	}
553 
554 	/* Maybe we waste some space because of short jumps. */
555 	SLJIT_ASSERT(code_ptr <= code + compiler->size);
556 	compiler->error = SLJIT_ERR_COMPILED;
557 	compiler->executable_size = code_ptr - code;
558 	return (void*)code;
559 }
560 
561 /* --------------------------------------------------------------------- */
562 /*  Operators                                                            */
563 /* --------------------------------------------------------------------- */
564 
565 static sljit_si emit_cum_binary(struct sljit_compiler *compiler,
566 	sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
567 	sljit_si dst, sljit_sw dstw,
568 	sljit_si src1, sljit_sw src1w,
569 	sljit_si src2, sljit_sw src2w);
570 
571 static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler,
572 	sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
573 	sljit_si dst, sljit_sw dstw,
574 	sljit_si src1, sljit_sw src1w,
575 	sljit_si src2, sljit_sw src2w);
576 
577 static sljit_si emit_mov(struct sljit_compiler *compiler,
578 	sljit_si dst, sljit_sw dstw,
579 	sljit_si src, sljit_sw srcw);
580 
emit_save_flags(struct sljit_compiler * compiler)581 static SLJIT_INLINE sljit_si emit_save_flags(struct sljit_compiler *compiler)
582 {
583 	sljit_ub *inst;
584 
585 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
586 	inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
587 	FAIL_IF(!inst);
588 	INC_SIZE(5);
589 #else
590 	inst = (sljit_ub*)ensure_buf(compiler, 1 + 6);
591 	FAIL_IF(!inst);
592 	INC_SIZE(6);
593 	*inst++ = REX_W;
594 #endif
595 	*inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp + sizeof(sljit_sw)] */
596 	*inst++ = 0x64;
597 	*inst++ = 0x24;
598 	*inst++ = (sljit_ub)sizeof(sljit_sw);
599 	*inst++ = PUSHF;
600 	compiler->flags_saved = 1;
601 	return SLJIT_SUCCESS;
602 }
603 
emit_restore_flags(struct sljit_compiler * compiler,sljit_si keep_flags)604 static SLJIT_INLINE sljit_si emit_restore_flags(struct sljit_compiler *compiler, sljit_si keep_flags)
605 {
606 	sljit_ub *inst;
607 
608 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
609 	inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
610 	FAIL_IF(!inst);
611 	INC_SIZE(5);
612 	*inst++ = POPF;
613 #else
614 	inst = (sljit_ub*)ensure_buf(compiler, 1 + 6);
615 	FAIL_IF(!inst);
616 	INC_SIZE(6);
617 	*inst++ = POPF;
618 	*inst++ = REX_W;
619 #endif
620 	*inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp - sizeof(sljit_sw)] */
621 	*inst++ = 0x64;
622 	*inst++ = 0x24;
623 	*inst++ = (sljit_ub)-(sljit_sb)sizeof(sljit_sw);
624 	compiler->flags_saved = keep_flags;
625 	return SLJIT_SUCCESS;
626 }
627 
628 #ifdef _WIN32
629 #include <malloc.h>
630 
sljit_grow_stack(sljit_sw local_size)631 static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size)
632 {
633 	/* Workaround for calling the internal _chkstk() function on Windows.
634 	This function touches all 4k pages belongs to the requested stack space,
635 	which size is passed in local_size. This is necessary on Windows where
636 	the stack can only grow in 4k steps. However, this function just burn
637 	CPU cycles if the stack is large enough. However, you don't know it in
638 	advance, so it must always be called. I think this is a bad design in
639 	general even if it has some reasons. */
640 	*(volatile sljit_si*)alloca(local_size) = 0;
641 }
642 
643 #endif
644 
645 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
646 #include "sljitNativeX86_32.c"
647 #else
648 #include "sljitNativeX86_64.c"
649 #endif
650 
emit_mov(struct sljit_compiler * compiler,sljit_si dst,sljit_sw dstw,sljit_si src,sljit_sw srcw)651 static sljit_si emit_mov(struct sljit_compiler *compiler,
652 	sljit_si dst, sljit_sw dstw,
653 	sljit_si src, sljit_sw srcw)
654 {
655 	sljit_ub* inst;
656 
657 	if (dst == SLJIT_UNUSED) {
658 		/* No destination, doesn't need to setup flags. */
659 		if (src & SLJIT_MEM) {
660 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
661 			FAIL_IF(!inst);
662 			*inst = MOV_r_rm;
663 		}
664 		return SLJIT_SUCCESS;
665 	}
666 	if (FAST_IS_REG(src)) {
667 		inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
668 		FAIL_IF(!inst);
669 		*inst = MOV_rm_r;
670 		return SLJIT_SUCCESS;
671 	}
672 	if (src & SLJIT_IMM) {
673 		if (FAST_IS_REG(dst)) {
674 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
675 			return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
676 #else
677 			if (!compiler->mode32) {
678 				if (NOT_HALFWORD(srcw))
679 					return emit_load_imm64(compiler, dst, srcw);
680 			}
681 			else
682 				return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw);
683 #endif
684 		}
685 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
686 		if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
687 			FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw));
688 			inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw);
689 			FAIL_IF(!inst);
690 			*inst = MOV_rm_r;
691 			return SLJIT_SUCCESS;
692 		}
693 #endif
694 		inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
695 		FAIL_IF(!inst);
696 		*inst = MOV_rm_i32;
697 		return SLJIT_SUCCESS;
698 	}
699 	if (FAST_IS_REG(dst)) {
700 		inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
701 		FAIL_IF(!inst);
702 		*inst = MOV_r_rm;
703 		return SLJIT_SUCCESS;
704 	}
705 
706 	/* Memory to memory move. Requires two instruction. */
707 	inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
708 	FAIL_IF(!inst);
709 	*inst = MOV_r_rm;
710 	inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
711 	FAIL_IF(!inst);
712 	*inst = MOV_rm_r;
713 	return SLJIT_SUCCESS;
714 }
715 
716 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
717 	FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
718 
sljit_emit_op0(struct sljit_compiler * compiler,sljit_si op)719 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
720 {
721 	sljit_ub *inst;
722 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
723 	sljit_si size;
724 #endif
725 
726 	CHECK_ERROR();
727 	check_sljit_emit_op0(compiler, op);
728 
729 	switch (GET_OPCODE(op)) {
730 	case SLJIT_BREAKPOINT:
731 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
732 		FAIL_IF(!inst);
733 		INC_SIZE(1);
734 		*inst = INT3;
735 		break;
736 	case SLJIT_NOP:
737 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
738 		FAIL_IF(!inst);
739 		INC_SIZE(1);
740 		*inst = NOP;
741 		break;
742 	case SLJIT_UMUL:
743 	case SLJIT_SMUL:
744 	case SLJIT_UDIV:
745 	case SLJIT_SDIV:
746 		compiler->flags_saved = 0;
747 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
748 #ifdef _WIN64
749 		SLJIT_COMPILE_ASSERT(
750 			reg_map[SLJIT_R0] == 0
751 			&& reg_map[SLJIT_R1] == 2
752 			&& reg_map[TMP_REG1] > 7,
753 			invalid_register_assignment_for_div_mul);
754 #else
755 		SLJIT_COMPILE_ASSERT(
756 			reg_map[SLJIT_R0] == 0
757 			&& reg_map[SLJIT_R1] < 7
758 			&& reg_map[TMP_REG1] == 2,
759 			invalid_register_assignment_for_div_mul);
760 #endif
761 		compiler->mode32 = op & SLJIT_INT_OP;
762 #endif
763 
764 		op = GET_OPCODE(op);
765 		if (op == SLJIT_UDIV) {
766 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
767 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
768 			inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);
769 #else
770 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
771 #endif
772 			FAIL_IF(!inst);
773 			*inst = XOR_r_rm;
774 		}
775 
776 		if (op == SLJIT_SDIV) {
777 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
778 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
779 #endif
780 
781 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
782 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
783 			FAIL_IF(!inst);
784 			INC_SIZE(1);
785 			*inst = CDQ;
786 #else
787 			if (compiler->mode32) {
788 				inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
789 				FAIL_IF(!inst);
790 				INC_SIZE(1);
791 				*inst = CDQ;
792 			} else {
793 				inst = (sljit_ub*)ensure_buf(compiler, 1 + 2);
794 				FAIL_IF(!inst);
795 				INC_SIZE(2);
796 				*inst++ = REX_W;
797 				*inst = CDQ;
798 			}
799 #endif
800 		}
801 
802 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
803 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 2);
804 		FAIL_IF(!inst);
805 		INC_SIZE(2);
806 		*inst++ = GROUP_F7;
807 		*inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
808 #else
809 #ifdef _WIN64
810 		size = (!compiler->mode32 || op >= SLJIT_UDIV) ? 3 : 2;
811 #else
812 		size = (!compiler->mode32) ? 3 : 2;
813 #endif
814 		inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
815 		FAIL_IF(!inst);
816 		INC_SIZE(size);
817 #ifdef _WIN64
818 		if (!compiler->mode32)
819 			*inst++ = REX_W | ((op >= SLJIT_UDIV) ? REX_B : 0);
820 		else if (op >= SLJIT_UDIV)
821 			*inst++ = REX_B;
822 		*inst++ = GROUP_F7;
823 		*inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
824 #else
825 		if (!compiler->mode32)
826 			*inst++ = REX_W;
827 		*inst++ = GROUP_F7;
828 		*inst = MOD_REG | reg_map[SLJIT_R1];
829 #endif
830 #endif
831 		switch (op) {
832 		case SLJIT_UMUL:
833 			*inst |= MUL;
834 			break;
835 		case SLJIT_SMUL:
836 			*inst |= IMUL;
837 			break;
838 		case SLJIT_UDIV:
839 			*inst |= DIV;
840 			break;
841 		case SLJIT_SDIV:
842 			*inst |= IDIV;
843 			break;
844 		}
845 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
846 		EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
847 #endif
848 		break;
849 	}
850 
851 	return SLJIT_SUCCESS;
852 }
853 
854 #define ENCODE_PREFIX(prefix) \
855 	do { \
856 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); \
857 		FAIL_IF(!inst); \
858 		INC_SIZE(1); \
859 		*inst = (prefix); \
860 	} while (0)
861 
emit_mov_byte(struct sljit_compiler * compiler,sljit_si sign,sljit_si dst,sljit_sw dstw,sljit_si src,sljit_sw srcw)862 static sljit_si emit_mov_byte(struct sljit_compiler *compiler, sljit_si sign,
863 	sljit_si dst, sljit_sw dstw,
864 	sljit_si src, sljit_sw srcw)
865 {
866 	sljit_ub* inst;
867 	sljit_si dst_r;
868 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
869 	sljit_si work_r;
870 #endif
871 
872 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
873 	compiler->mode32 = 0;
874 #endif
875 
876 	if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
877 		return SLJIT_SUCCESS; /* Empty instruction. */
878 
879 	if (src & SLJIT_IMM) {
880 		if (FAST_IS_REG(dst)) {
881 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
882 			return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
883 #else
884 			inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
885 			FAIL_IF(!inst);
886 			*inst = MOV_rm_i32;
887 			return SLJIT_SUCCESS;
888 #endif
889 		}
890 		inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
891 		FAIL_IF(!inst);
892 		*inst = MOV_rm8_i8;
893 		return SLJIT_SUCCESS;
894 	}
895 
896 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
897 
898 	if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {
899 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
900 		if (reg_map[src] >= 4) {
901 			SLJIT_ASSERT(dst_r == TMP_REG1);
902 			EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
903 		} else
904 			dst_r = src;
905 #else
906 		dst_r = src;
907 #endif
908 	}
909 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
910 	else if (FAST_IS_REG(src) && reg_map[src] >= 4) {
911 		/* src, dst are registers. */
912 		SLJIT_ASSERT(SLOW_IS_REG(dst));
913 		if (reg_map[dst] < 4) {
914 			if (dst != src)
915 				EMIT_MOV(compiler, dst, 0, src, 0);
916 			inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
917 			FAIL_IF(!inst);
918 			*inst++ = GROUP_0F;
919 			*inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
920 		}
921 		else {
922 			if (dst != src)
923 				EMIT_MOV(compiler, dst, 0, src, 0);
924 			if (sign) {
925 				/* shl reg, 24 */
926 				inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
927 				FAIL_IF(!inst);
928 				*inst |= SHL;
929 				/* sar reg, 24 */
930 				inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
931 				FAIL_IF(!inst);
932 				*inst |= SAR;
933 			}
934 			else {
935 				inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
936 				FAIL_IF(!inst);
937 				*(inst + 1) |= AND;
938 			}
939 		}
940 		return SLJIT_SUCCESS;
941 	}
942 #endif
943 	else {
944 		/* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
945 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
946 		FAIL_IF(!inst);
947 		*inst++ = GROUP_0F;
948 		*inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
949 	}
950 
951 	if (dst & SLJIT_MEM) {
952 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
953 		if (dst_r == TMP_REG1) {
954 			/* Find a non-used register, whose reg_map[src] < 4. */
955 			if ((dst & REG_MASK) == SLJIT_R0) {
956 				if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1))
957 					work_r = SLJIT_R2;
958 				else
959 					work_r = SLJIT_R1;
960 			}
961 			else {
962 				if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
963 					work_r = SLJIT_R0;
964 				else if ((dst & REG_MASK) == SLJIT_R1)
965 					work_r = SLJIT_R2;
966 				else
967 					work_r = SLJIT_R1;
968 			}
969 
970 			if (work_r == SLJIT_R0) {
971 				ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
972 			}
973 			else {
974 				inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
975 				FAIL_IF(!inst);
976 				*inst = XCHG_r_rm;
977 			}
978 
979 			inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
980 			FAIL_IF(!inst);
981 			*inst = MOV_rm8_r8;
982 
983 			if (work_r == SLJIT_R0) {
984 				ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
985 			}
986 			else {
987 				inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
988 				FAIL_IF(!inst);
989 				*inst = XCHG_r_rm;
990 			}
991 		}
992 		else {
993 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
994 			FAIL_IF(!inst);
995 			*inst = MOV_rm8_r8;
996 		}
997 #else
998 		inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
999 		FAIL_IF(!inst);
1000 		*inst = MOV_rm8_r8;
1001 #endif
1002 	}
1003 
1004 	return SLJIT_SUCCESS;
1005 }
1006 
emit_mov_half(struct sljit_compiler * compiler,sljit_si sign,sljit_si dst,sljit_sw dstw,sljit_si src,sljit_sw srcw)1007 static sljit_si emit_mov_half(struct sljit_compiler *compiler, sljit_si sign,
1008 	sljit_si dst, sljit_sw dstw,
1009 	sljit_si src, sljit_sw srcw)
1010 {
1011 	sljit_ub* inst;
1012 	sljit_si dst_r;
1013 
1014 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1015 	compiler->mode32 = 0;
1016 #endif
1017 
1018 	if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
1019 		return SLJIT_SUCCESS; /* Empty instruction. */
1020 
1021 	if (src & SLJIT_IMM) {
1022 		if (FAST_IS_REG(dst)) {
1023 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1024 			return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
1025 #else
1026 			inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1027 			FAIL_IF(!inst);
1028 			*inst = MOV_rm_i32;
1029 			return SLJIT_SUCCESS;
1030 #endif
1031 		}
1032 		inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
1033 		FAIL_IF(!inst);
1034 		*inst = MOV_rm_i32;
1035 		return SLJIT_SUCCESS;
1036 	}
1037 
1038 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1039 
1040 	if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
1041 		dst_r = src;
1042 	else {
1043 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
1044 		FAIL_IF(!inst);
1045 		*inst++ = GROUP_0F;
1046 		*inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16;
1047 	}
1048 
1049 	if (dst & SLJIT_MEM) {
1050 		inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
1051 		FAIL_IF(!inst);
1052 		*inst = MOV_rm_r;
1053 	}
1054 
1055 	return SLJIT_SUCCESS;
1056 }
1057 
emit_unary(struct sljit_compiler * compiler,sljit_ub opcode,sljit_si dst,sljit_sw dstw,sljit_si src,sljit_sw srcw)1058 static sljit_si emit_unary(struct sljit_compiler *compiler, sljit_ub opcode,
1059 	sljit_si dst, sljit_sw dstw,
1060 	sljit_si src, sljit_sw srcw)
1061 {
1062 	sljit_ub* inst;
1063 
1064 	if (dst == SLJIT_UNUSED) {
1065 		EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1066 		inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1067 		FAIL_IF(!inst);
1068 		*inst++ = GROUP_F7;
1069 		*inst |= opcode;
1070 		return SLJIT_SUCCESS;
1071 	}
1072 	if (dst == src && dstw == srcw) {
1073 		/* Same input and output */
1074 		inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1075 		FAIL_IF(!inst);
1076 		*inst++ = GROUP_F7;
1077 		*inst |= opcode;
1078 		return SLJIT_SUCCESS;
1079 	}
1080 	if (FAST_IS_REG(dst)) {
1081 		EMIT_MOV(compiler, dst, 0, src, srcw);
1082 		inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1083 		FAIL_IF(!inst);
1084 		*inst++ = GROUP_F7;
1085 		*inst |= opcode;
1086 		return SLJIT_SUCCESS;
1087 	}
1088 	EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1089 	inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1090 	FAIL_IF(!inst);
1091 	*inst++ = GROUP_F7;
1092 	*inst |= opcode;
1093 	EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1094 	return SLJIT_SUCCESS;
1095 }
1096 
emit_not_with_flags(struct sljit_compiler * compiler,sljit_si dst,sljit_sw dstw,sljit_si src,sljit_sw srcw)1097 static sljit_si emit_not_with_flags(struct sljit_compiler *compiler,
1098 	sljit_si dst, sljit_sw dstw,
1099 	sljit_si src, sljit_sw srcw)
1100 {
1101 	sljit_ub* inst;
1102 
1103 	if (dst == SLJIT_UNUSED) {
1104 		EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1105 		inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1106 		FAIL_IF(!inst);
1107 		*inst++ = GROUP_F7;
1108 		*inst |= NOT_rm;
1109 		inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1110 		FAIL_IF(!inst);
1111 		*inst = OR_r_rm;
1112 		return SLJIT_SUCCESS;
1113 	}
1114 	if (FAST_IS_REG(dst)) {
1115 		EMIT_MOV(compiler, dst, 0, src, srcw);
1116 		inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1117 		FAIL_IF(!inst);
1118 		*inst++ = GROUP_F7;
1119 		*inst |= NOT_rm;
1120 		inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
1121 		FAIL_IF(!inst);
1122 		*inst = OR_r_rm;
1123 		return SLJIT_SUCCESS;
1124 	}
1125 	EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1126 	inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1127 	FAIL_IF(!inst);
1128 	*inst++ = GROUP_F7;
1129 	*inst |= NOT_rm;
1130 	inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1131 	FAIL_IF(!inst);
1132 	*inst = OR_r_rm;
1133 	EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1134 	return SLJIT_SUCCESS;
1135 }
1136 
emit_clz(struct sljit_compiler * compiler,sljit_si op_flags,sljit_si dst,sljit_sw dstw,sljit_si src,sljit_sw srcw)1137 static sljit_si emit_clz(struct sljit_compiler *compiler, sljit_si op_flags,
1138 	sljit_si dst, sljit_sw dstw,
1139 	sljit_si src, sljit_sw srcw)
1140 {
1141 	sljit_ub* inst;
1142 	sljit_si dst_r;
1143 
1144 	SLJIT_UNUSED_ARG(op_flags);
1145 	if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
1146 		/* Just set the zero flag. */
1147 		EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1148 		inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1149 		FAIL_IF(!inst);
1150 		*inst++ = GROUP_F7;
1151 		*inst |= NOT_rm;
1152 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1153 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REG1, 0);
1154 #else
1155 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, TMP_REG1, 0);
1156 #endif
1157 		FAIL_IF(!inst);
1158 		*inst |= SHR;
1159 		return SLJIT_SUCCESS;
1160 	}
1161 
1162 	if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
1163 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
1164 		src = TMP_REG1;
1165 		srcw = 0;
1166 	}
1167 
1168 	inst = emit_x86_instruction(compiler, 2, TMP_REG1, 0, src, srcw);
1169 	FAIL_IF(!inst);
1170 	*inst++ = GROUP_0F;
1171 	*inst = BSR_r_rm;
1172 
1173 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1174 	if (FAST_IS_REG(dst))
1175 		dst_r = dst;
1176 	else {
1177 		/* Find an unused temporary register. */
1178 		if ((dst & REG_MASK) != SLJIT_R0 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
1179 			dst_r = SLJIT_R0;
1180 		else if ((dst & REG_MASK) != SLJIT_R1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R1))
1181 			dst_r = SLJIT_R1;
1182 		else
1183 			dst_r = SLJIT_R2;
1184 		EMIT_MOV(compiler, dst, dstw, dst_r, 0);
1185 	}
1186 	EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
1187 #else
1188 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
1189 	compiler->mode32 = 0;
1190 	EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 64 + 63 : 32 + 31);
1191 	compiler->mode32 = op_flags & SLJIT_INT_OP;
1192 #endif
1193 
1194 	if (cpu_has_cmov == -1)
1195 		get_cpu_features();
1196 
1197 	if (cpu_has_cmov) {
1198 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
1199 		FAIL_IF(!inst);
1200 		*inst++ = GROUP_0F;
1201 		*inst = CMOVNE_r_rm;
1202 	} else {
1203 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1204 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1205 		FAIL_IF(!inst);
1206 		INC_SIZE(4);
1207 
1208 		*inst++ = JE_i8;
1209 		*inst++ = 2;
1210 		*inst++ = MOV_r_rm;
1211 		*inst++ = MOD_REG | (reg_map[dst_r] << 3) | reg_map[TMP_REG1];
1212 #else
1213 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
1214 		FAIL_IF(!inst);
1215 		INC_SIZE(5);
1216 
1217 		*inst++ = JE_i8;
1218 		*inst++ = 3;
1219 		*inst++ = REX_W | (reg_map[dst_r] >= 8 ? REX_R : 0) | (reg_map[TMP_REG1] >= 8 ? REX_B : 0);
1220 		*inst++ = MOV_r_rm;
1221 		*inst++ = MOD_REG | (reg_lmap[dst_r] << 3) | reg_lmap[TMP_REG1];
1222 #endif
1223 	}
1224 
1225 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1226 	inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
1227 #else
1228 	inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, dst_r, 0);
1229 #endif
1230 	FAIL_IF(!inst);
1231 	*(inst + 1) |= XOR;
1232 
1233 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1234 	if (dst & SLJIT_MEM) {
1235 		inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1236 		FAIL_IF(!inst);
1237 		*inst = XCHG_r_rm;
1238 	}
1239 #else
1240 	if (dst & SLJIT_MEM)
1241 		EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0);
1242 #endif
1243 	return SLJIT_SUCCESS;
1244 }
1245 
sljit_emit_op1(struct sljit_compiler * compiler,sljit_si op,sljit_si dst,sljit_sw dstw,sljit_si src,sljit_sw srcw)1246 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
1247 	sljit_si dst, sljit_sw dstw,
1248 	sljit_si src, sljit_sw srcw)
1249 {
1250 	sljit_ub* inst;
1251 	sljit_si update = 0;
1252 	sljit_si op_flags = GET_ALL_FLAGS(op);
1253 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1254 	sljit_si dst_is_ereg = 0;
1255 	sljit_si src_is_ereg = 0;
1256 #else
1257 #	define src_is_ereg 0
1258 #endif
1259 
1260 	CHECK_ERROR();
1261 	check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
1262 	ADJUST_LOCAL_OFFSET(dst, dstw);
1263 	ADJUST_LOCAL_OFFSET(src, srcw);
1264 
1265 	CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
1266 	CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);
1267 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1268 	compiler->mode32 = op_flags & SLJIT_INT_OP;
1269 #endif
1270 
1271 	op = GET_OPCODE(op);
1272 	if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) {
1273 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1274 		compiler->mode32 = 0;
1275 #endif
1276 
1277 		if (op_flags & SLJIT_INT_OP) {
1278 			if (FAST_IS_REG(src) && src == dst) {
1279 				if (!TYPE_CAST_NEEDED(op))
1280 					return SLJIT_SUCCESS;
1281 			}
1282 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1283 			if (op == SLJIT_MOV_SI && (src & SLJIT_MEM))
1284 				op = SLJIT_MOV_UI;
1285 			if (op == SLJIT_MOVU_SI && (src & SLJIT_MEM))
1286 				op = SLJIT_MOVU_UI;
1287 			if (op == SLJIT_MOV_UI && (src & SLJIT_IMM))
1288 				op = SLJIT_MOV_SI;
1289 			if (op == SLJIT_MOVU_UI && (src & SLJIT_IMM))
1290 				op = SLJIT_MOVU_SI;
1291 #endif
1292 		}
1293 
1294 		SLJIT_COMPILE_ASSERT(SLJIT_MOV + 8 == SLJIT_MOVU, movu_offset);
1295 		if (op >= SLJIT_MOVU) {
1296 			update = 1;
1297 			op -= 8;
1298 		}
1299 
1300 		if (src & SLJIT_IMM) {
1301 			switch (op) {
1302 			case SLJIT_MOV_UB:
1303 				srcw = (sljit_ub)srcw;
1304 				break;
1305 			case SLJIT_MOV_SB:
1306 				srcw = (sljit_sb)srcw;
1307 				break;
1308 			case SLJIT_MOV_UH:
1309 				srcw = (sljit_uh)srcw;
1310 				break;
1311 			case SLJIT_MOV_SH:
1312 				srcw = (sljit_sh)srcw;
1313 				break;
1314 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1315 			case SLJIT_MOV_UI:
1316 				srcw = (sljit_ui)srcw;
1317 				break;
1318 			case SLJIT_MOV_SI:
1319 				srcw = (sljit_si)srcw;
1320 				break;
1321 #endif
1322 			}
1323 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1324 			if (SLJIT_UNLIKELY(dst_is_ereg))
1325 				return emit_mov(compiler, dst, dstw, src, srcw);
1326 #endif
1327 		}
1328 
1329 		if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & REG_MASK) && (srcw != 0 || (src & OFFS_REG_MASK) != 0)) {
1330 			inst = emit_x86_instruction(compiler, 1, src & REG_MASK, 0, src, srcw);
1331 			FAIL_IF(!inst);
1332 			*inst = LEA_r_m;
1333 			src &= SLJIT_MEM | 0xf;
1334 			srcw = 0;
1335 		}
1336 
1337 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1338 		if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
1339 			SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
1340 			dst = TMP_REG1;
1341 		}
1342 #endif
1343 
1344 		switch (op) {
1345 		case SLJIT_MOV:
1346 		case SLJIT_MOV_P:
1347 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1348 		case SLJIT_MOV_UI:
1349 		case SLJIT_MOV_SI:
1350 #endif
1351 			FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
1352 			break;
1353 		case SLJIT_MOV_UB:
1354 			FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
1355 			break;
1356 		case SLJIT_MOV_SB:
1357 			FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
1358 			break;
1359 		case SLJIT_MOV_UH:
1360 			FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
1361 			break;
1362 		case SLJIT_MOV_SH:
1363 			FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
1364 			break;
1365 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1366 		case SLJIT_MOV_UI:
1367 			FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
1368 			break;
1369 		case SLJIT_MOV_SI:
1370 			FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
1371 			break;
1372 #endif
1373 		}
1374 
1375 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1376 		if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
1377 			return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
1378 #endif
1379 
1380 		if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK) && (dstw != 0 || (dst & OFFS_REG_MASK) != 0)) {
1381 			inst = emit_x86_instruction(compiler, 1, dst & REG_MASK, 0, dst, dstw);
1382 			FAIL_IF(!inst);
1383 			*inst = LEA_r_m;
1384 		}
1385 		return SLJIT_SUCCESS;
1386 	}
1387 
1388 	if (SLJIT_UNLIKELY(GET_FLAGS(op_flags)))
1389 		compiler->flags_saved = 0;
1390 
1391 	switch (op) {
1392 	case SLJIT_NOT:
1393 		if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_E))
1394 			return emit_not_with_flags(compiler, dst, dstw, src, srcw);
1395 		return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
1396 
1397 	case SLJIT_NEG:
1398 		if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1399 			FAIL_IF(emit_save_flags(compiler));
1400 		return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw);
1401 
1402 	case SLJIT_CLZ:
1403 		if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1404 			FAIL_IF(emit_save_flags(compiler));
1405 		return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
1406 	}
1407 
1408 	return SLJIT_SUCCESS;
1409 
1410 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1411 #	undef src_is_ereg
1412 #endif
1413 }
1414 
1415 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1416 
1417 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1418 	if (IS_HALFWORD(immw) || compiler->mode32) { \
1419 		inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1420 		FAIL_IF(!inst); \
1421 		*(inst + 1) |= (op_imm); \
1422 	} \
1423 	else { \
1424 		FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \
1425 		inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \
1426 		FAIL_IF(!inst); \
1427 		*inst = (op_mr); \
1428 	}
1429 
1430 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1431 	FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
1432 
1433 #else
1434 
1435 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1436 	inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1437 	FAIL_IF(!inst); \
1438 	*(inst + 1) |= (op_imm);
1439 
1440 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1441 	FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
1442 
1443 #endif
1444 
emit_cum_binary(struct sljit_compiler * compiler,sljit_ub op_rm,sljit_ub op_mr,sljit_ub op_imm,sljit_ub op_eax_imm,sljit_si dst,sljit_sw dstw,sljit_si src1,sljit_sw src1w,sljit_si src2,sljit_sw src2w)1445 static sljit_si emit_cum_binary(struct sljit_compiler *compiler,
1446 	sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
1447 	sljit_si dst, sljit_sw dstw,
1448 	sljit_si src1, sljit_sw src1w,
1449 	sljit_si src2, sljit_sw src2w)
1450 {
1451 	sljit_ub* inst;
1452 
1453 	if (dst == SLJIT_UNUSED) {
1454 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1455 		if (src2 & SLJIT_IMM) {
1456 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1457 		}
1458 		else {
1459 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1460 			FAIL_IF(!inst);
1461 			*inst = op_rm;
1462 		}
1463 		return SLJIT_SUCCESS;
1464 	}
1465 
1466 	if (dst == src1 && dstw == src1w) {
1467 		if (src2 & SLJIT_IMM) {
1468 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1469 			if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1470 #else
1471 			if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1472 #endif
1473 				BINARY_EAX_IMM(op_eax_imm, src2w);
1474 			}
1475 			else {
1476 				BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1477 			}
1478 		}
1479 		else if (FAST_IS_REG(dst)) {
1480 			inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1481 			FAIL_IF(!inst);
1482 			*inst = op_rm;
1483 		}
1484 		else if (FAST_IS_REG(src2)) {
1485 			/* Special exception for sljit_emit_op_flags. */
1486 			inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1487 			FAIL_IF(!inst);
1488 			*inst = op_mr;
1489 		}
1490 		else {
1491 			EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1492 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1493 			FAIL_IF(!inst);
1494 			*inst = op_mr;
1495 		}
1496 		return SLJIT_SUCCESS;
1497 	}
1498 
1499 	/* Only for cumulative operations. */
1500 	if (dst == src2 && dstw == src2w) {
1501 		if (src1 & SLJIT_IMM) {
1502 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1503 			if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1504 #else
1505 			if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) {
1506 #endif
1507 				BINARY_EAX_IMM(op_eax_imm, src1w);
1508 			}
1509 			else {
1510 				BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
1511 			}
1512 		}
1513 		else if (FAST_IS_REG(dst)) {
1514 			inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
1515 			FAIL_IF(!inst);
1516 			*inst = op_rm;
1517 		}
1518 		else if (FAST_IS_REG(src1)) {
1519 			inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
1520 			FAIL_IF(!inst);
1521 			*inst = op_mr;
1522 		}
1523 		else {
1524 			EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1525 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1526 			FAIL_IF(!inst);
1527 			*inst = op_mr;
1528 		}
1529 		return SLJIT_SUCCESS;
1530 	}
1531 
1532 	/* General version. */
1533 	if (FAST_IS_REG(dst)) {
1534 		EMIT_MOV(compiler, dst, 0, src1, src1w);
1535 		if (src2 & SLJIT_IMM) {
1536 			BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1537 		}
1538 		else {
1539 			inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1540 			FAIL_IF(!inst);
1541 			*inst = op_rm;
1542 		}
1543 	}
1544 	else {
1545 		/* This version requires less memory writing. */
1546 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1547 		if (src2 & SLJIT_IMM) {
1548 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1549 		}
1550 		else {
1551 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1552 			FAIL_IF(!inst);
1553 			*inst = op_rm;
1554 		}
1555 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1556 	}
1557 
1558 	return SLJIT_SUCCESS;
1559 }
1560 
1561 static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler,
1562 	sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
1563 	sljit_si dst, sljit_sw dstw,
1564 	sljit_si src1, sljit_sw src1w,
1565 	sljit_si src2, sljit_sw src2w)
1566 {
1567 	sljit_ub* inst;
1568 
1569 	if (dst == SLJIT_UNUSED) {
1570 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1571 		if (src2 & SLJIT_IMM) {
1572 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1573 		}
1574 		else {
1575 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1576 			FAIL_IF(!inst);
1577 			*inst = op_rm;
1578 		}
1579 		return SLJIT_SUCCESS;
1580 	}
1581 
1582 	if (dst == src1 && dstw == src1w) {
1583 		if (src2 & SLJIT_IMM) {
1584 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1585 			if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1586 #else
1587 			if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1588 #endif
1589 				BINARY_EAX_IMM(op_eax_imm, src2w);
1590 			}
1591 			else {
1592 				BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1593 			}
1594 		}
1595 		else if (FAST_IS_REG(dst)) {
1596 			inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1597 			FAIL_IF(!inst);
1598 			*inst = op_rm;
1599 		}
1600 		else if (FAST_IS_REG(src2)) {
1601 			inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1602 			FAIL_IF(!inst);
1603 			*inst = op_mr;
1604 		}
1605 		else {
1606 			EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1607 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1608 			FAIL_IF(!inst);
1609 			*inst = op_mr;
1610 		}
1611 		return SLJIT_SUCCESS;
1612 	}
1613 
1614 	/* General version. */
1615 	if (FAST_IS_REG(dst) && dst != src2) {
1616 		EMIT_MOV(compiler, dst, 0, src1, src1w);
1617 		if (src2 & SLJIT_IMM) {
1618 			BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1619 		}
1620 		else {
1621 			inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1622 			FAIL_IF(!inst);
1623 			*inst = op_rm;
1624 		}
1625 	}
1626 	else {
1627 		/* This version requires less memory writing. */
1628 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1629 		if (src2 & SLJIT_IMM) {
1630 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1631 		}
1632 		else {
1633 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1634 			FAIL_IF(!inst);
1635 			*inst = op_rm;
1636 		}
1637 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1638 	}
1639 
1640 	return SLJIT_SUCCESS;
1641 }
1642 
1643 static sljit_si emit_mul(struct sljit_compiler *compiler,
1644 	sljit_si dst, sljit_sw dstw,
1645 	sljit_si src1, sljit_sw src1w,
1646 	sljit_si src2, sljit_sw src2w)
1647 {
1648 	sljit_ub* inst;
1649 	sljit_si dst_r;
1650 
1651 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1652 
1653 	/* Register destination. */
1654 	if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
1655 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1656 		FAIL_IF(!inst);
1657 		*inst++ = GROUP_0F;
1658 		*inst = IMUL_r_rm;
1659 	}
1660 	else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
1661 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
1662 		FAIL_IF(!inst);
1663 		*inst++ = GROUP_0F;
1664 		*inst = IMUL_r_rm;
1665 	}
1666 	else if (src1 & SLJIT_IMM) {
1667 		if (src2 & SLJIT_IMM) {
1668 			EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
1669 			src2 = dst_r;
1670 			src2w = 0;
1671 		}
1672 
1673 		if (src1w <= 127 && src1w >= -128) {
1674 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1675 			FAIL_IF(!inst);
1676 			*inst = IMUL_r_rm_i8;
1677 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
1678 			FAIL_IF(!inst);
1679 			INC_SIZE(1);
1680 			*inst = (sljit_sb)src1w;
1681 		}
1682 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1683 		else {
1684 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1685 			FAIL_IF(!inst);
1686 			*inst = IMUL_r_rm_i32;
1687 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1688 			FAIL_IF(!inst);
1689 			INC_SIZE(4);
1690 			*(sljit_sw*)inst = src1w;
1691 		}
1692 #else
1693 		else if (IS_HALFWORD(src1w)) {
1694 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1695 			FAIL_IF(!inst);
1696 			*inst = IMUL_r_rm_i32;
1697 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1698 			FAIL_IF(!inst);
1699 			INC_SIZE(4);
1700 			*(sljit_si*)inst = (sljit_si)src1w;
1701 		}
1702 		else {
1703 			EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1704 			if (dst_r != src2)
1705 				EMIT_MOV(compiler, dst_r, 0, src2, src2w);
1706 			inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1707 			FAIL_IF(!inst);
1708 			*inst++ = GROUP_0F;
1709 			*inst = IMUL_r_rm;
1710 		}
1711 #endif
1712 	}
1713 	else if (src2 & SLJIT_IMM) {
1714 		/* Note: src1 is NOT immediate. */
1715 
1716 		if (src2w <= 127 && src2w >= -128) {
1717 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1718 			FAIL_IF(!inst);
1719 			*inst = IMUL_r_rm_i8;
1720 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
1721 			FAIL_IF(!inst);
1722 			INC_SIZE(1);
1723 			*inst = (sljit_sb)src2w;
1724 		}
1725 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1726 		else {
1727 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1728 			FAIL_IF(!inst);
1729 			*inst = IMUL_r_rm_i32;
1730 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1731 			FAIL_IF(!inst);
1732 			INC_SIZE(4);
1733 			*(sljit_sw*)inst = src2w;
1734 		}
1735 #else
1736 		else if (IS_HALFWORD(src2w)) {
1737 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1738 			FAIL_IF(!inst);
1739 			*inst = IMUL_r_rm_i32;
1740 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1741 			FAIL_IF(!inst);
1742 			INC_SIZE(4);
1743 			*(sljit_si*)inst = (sljit_si)src2w;
1744 		}
1745 		else {
1746 			EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1747 			if (dst_r != src1)
1748 				EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1749 			inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1750 			FAIL_IF(!inst);
1751 			*inst++ = GROUP_0F;
1752 			*inst = IMUL_r_rm;
1753 		}
1754 #endif
1755 	}
1756 	else {
1757 		/* Neither argument is immediate. */
1758 		if (ADDRESSING_DEPENDS_ON(src2, dst_r))
1759 			dst_r = TMP_REG1;
1760 		EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1761 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1762 		FAIL_IF(!inst);
1763 		*inst++ = GROUP_0F;
1764 		*inst = IMUL_r_rm;
1765 	}
1766 
1767 	if (dst_r == TMP_REG1)
1768 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1769 
1770 	return SLJIT_SUCCESS;
1771 }
1772 
1773 static sljit_si emit_lea_binary(struct sljit_compiler *compiler, sljit_si keep_flags,
1774 	sljit_si dst, sljit_sw dstw,
1775 	sljit_si src1, sljit_sw src1w,
1776 	sljit_si src2, sljit_sw src2w)
1777 {
1778 	sljit_ub* inst;
1779 	sljit_si dst_r, done = 0;
1780 
1781 	/* These cases better be left to handled by normal way. */
1782 	if (!keep_flags) {
1783 		if (dst == src1 && dstw == src1w)
1784 			return SLJIT_ERR_UNSUPPORTED;
1785 		if (dst == src2 && dstw == src2w)
1786 			return SLJIT_ERR_UNSUPPORTED;
1787 	}
1788 
1789 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1790 
1791 	if (FAST_IS_REG(src1)) {
1792 		if (FAST_IS_REG(src2)) {
1793 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
1794 			FAIL_IF(!inst);
1795 			*inst = LEA_r_m;
1796 			done = 1;
1797 		}
1798 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1799 		if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1800 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_si)src2w);
1801 #else
1802 		if (src2 & SLJIT_IMM) {
1803 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
1804 #endif
1805 			FAIL_IF(!inst);
1806 			*inst = LEA_r_m;
1807 			done = 1;
1808 		}
1809 	}
1810 	else if (FAST_IS_REG(src2)) {
1811 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1812 		if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1813 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_si)src1w);
1814 #else
1815 		if (src1 & SLJIT_IMM) {
1816 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
1817 #endif
1818 			FAIL_IF(!inst);
1819 			*inst = LEA_r_m;
1820 			done = 1;
1821 		}
1822 	}
1823 
1824 	if (done) {
1825 		if (dst_r == TMP_REG1)
1826 			return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
1827 		return SLJIT_SUCCESS;
1828 	}
1829 	return SLJIT_ERR_UNSUPPORTED;
1830 }
1831 
1832 static sljit_si emit_cmp_binary(struct sljit_compiler *compiler,
1833 	sljit_si src1, sljit_sw src1w,
1834 	sljit_si src2, sljit_sw src2w)
1835 {
1836 	sljit_ub* inst;
1837 
1838 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1839 	if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1840 #else
1841 	if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1842 #endif
1843 		BINARY_EAX_IMM(CMP_EAX_i32, src2w);
1844 		return SLJIT_SUCCESS;
1845 	}
1846 
1847 	if (FAST_IS_REG(src1)) {
1848 		if (src2 & SLJIT_IMM) {
1849 			BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
1850 		}
1851 		else {
1852 			inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1853 			FAIL_IF(!inst);
1854 			*inst = CMP_r_rm;
1855 		}
1856 		return SLJIT_SUCCESS;
1857 	}
1858 
1859 	if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) {
1860 		inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1861 		FAIL_IF(!inst);
1862 		*inst = CMP_rm_r;
1863 		return SLJIT_SUCCESS;
1864 	}
1865 
1866 	if (src2 & SLJIT_IMM) {
1867 		if (src1 & SLJIT_IMM) {
1868 			EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1869 			src1 = TMP_REG1;
1870 			src1w = 0;
1871 		}
1872 		BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
1873 	}
1874 	else {
1875 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1876 		inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1877 		FAIL_IF(!inst);
1878 		*inst = CMP_r_rm;
1879 	}
1880 	return SLJIT_SUCCESS;
1881 }
1882 
1883 static sljit_si emit_test_binary(struct sljit_compiler *compiler,
1884 	sljit_si src1, sljit_sw src1w,
1885 	sljit_si src2, sljit_sw src2w)
1886 {
1887 	sljit_ub* inst;
1888 
1889 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1890 	if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1891 #else
1892 	if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1893 #endif
1894 		BINARY_EAX_IMM(TEST_EAX_i32, src2w);
1895 		return SLJIT_SUCCESS;
1896 	}
1897 
1898 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1899 	if (src2 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1900 #else
1901 	if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
1902 #endif
1903 		BINARY_EAX_IMM(TEST_EAX_i32, src1w);
1904 		return SLJIT_SUCCESS;
1905 	}
1906 
1907 	if (FAST_IS_REG(src1)) {
1908 		if (src2 & SLJIT_IMM) {
1909 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1910 			if (IS_HALFWORD(src2w) || compiler->mode32) {
1911 				inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
1912 				FAIL_IF(!inst);
1913 				*inst = GROUP_F7;
1914 			}
1915 			else {
1916 				FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1917 				inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, 0);
1918 				FAIL_IF(!inst);
1919 				*inst = TEST_rm_r;
1920 			}
1921 #else
1922 			inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
1923 			FAIL_IF(!inst);
1924 			*inst = GROUP_F7;
1925 #endif
1926 		}
1927 		else {
1928 			inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1929 			FAIL_IF(!inst);
1930 			*inst = TEST_rm_r;
1931 		}
1932 		return SLJIT_SUCCESS;
1933 	}
1934 
1935 	if (FAST_IS_REG(src2)) {
1936 		if (src1 & SLJIT_IMM) {
1937 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1938 			if (IS_HALFWORD(src1w) || compiler->mode32) {
1939 				inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, 0);
1940 				FAIL_IF(!inst);
1941 				*inst = GROUP_F7;
1942 			}
1943 			else {
1944 				FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
1945 				inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, 0);
1946 				FAIL_IF(!inst);
1947 				*inst = TEST_rm_r;
1948 			}
1949 #else
1950 			inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, 0);
1951 			FAIL_IF(!inst);
1952 			*inst = GROUP_F7;
1953 #endif
1954 		}
1955 		else {
1956 			inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1957 			FAIL_IF(!inst);
1958 			*inst = TEST_rm_r;
1959 		}
1960 		return SLJIT_SUCCESS;
1961 	}
1962 
1963 	EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1964 	if (src2 & SLJIT_IMM) {
1965 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1966 		if (IS_HALFWORD(src2w) || compiler->mode32) {
1967 			inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
1968 			FAIL_IF(!inst);
1969 			*inst = GROUP_F7;
1970 		}
1971 		else {
1972 			FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1973 			inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);
1974 			FAIL_IF(!inst);
1975 			*inst = TEST_rm_r;
1976 		}
1977 #else
1978 		inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
1979 		FAIL_IF(!inst);
1980 		*inst = GROUP_F7;
1981 #endif
1982 	}
1983 	else {
1984 		inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1985 		FAIL_IF(!inst);
1986 		*inst = TEST_rm_r;
1987 	}
1988 	return SLJIT_SUCCESS;
1989 }
1990 
1991 static sljit_si emit_shift(struct sljit_compiler *compiler,
1992 	sljit_ub mode,
1993 	sljit_si dst, sljit_sw dstw,
1994 	sljit_si src1, sljit_sw src1w,
1995 	sljit_si src2, sljit_sw src2w)
1996 {
1997 	sljit_ub* inst;
1998 
1999 	if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
2000 		if (dst == src1 && dstw == src1w) {
2001 			inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
2002 			FAIL_IF(!inst);
2003 			*inst |= mode;
2004 			return SLJIT_SUCCESS;
2005 		}
2006 		if (dst == SLJIT_UNUSED) {
2007 			EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2008 			inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2009 			FAIL_IF(!inst);
2010 			*inst |= mode;
2011 			return SLJIT_SUCCESS;
2012 		}
2013 		if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
2014 			EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2015 			inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2016 			FAIL_IF(!inst);
2017 			*inst |= mode;
2018 			EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2019 			return SLJIT_SUCCESS;
2020 		}
2021 		if (FAST_IS_REG(dst)) {
2022 			EMIT_MOV(compiler, dst, 0, src1, src1w);
2023 			inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
2024 			FAIL_IF(!inst);
2025 			*inst |= mode;
2026 			return SLJIT_SUCCESS;
2027 		}
2028 
2029 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2030 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2031 		FAIL_IF(!inst);
2032 		*inst |= mode;
2033 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2034 		return SLJIT_SUCCESS;
2035 	}
2036 
2037 	if (dst == SLJIT_PREF_SHIFT_REG) {
2038 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2039 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2040 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2041 		FAIL_IF(!inst);
2042 		*inst |= mode;
2043 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2044 	}
2045 	else if (FAST_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
2046 		if (src1 != dst)
2047 			EMIT_MOV(compiler, dst, 0, src1, src1w);
2048 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2049 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2050 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
2051 		FAIL_IF(!inst);
2052 		*inst |= mode;
2053 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2054 	}
2055 	else {
2056 		/* This case is really difficult, since ecx itself may used for
2057 		   addressing, and we must ensure to work even in that case. */
2058 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2059 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2060 		EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
2061 #else
2062 		/* [esp+0] contains the flags. */
2063 		EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0);
2064 #endif
2065 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2066 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2067 		FAIL_IF(!inst);
2068 		*inst |= mode;
2069 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2070 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
2071 #else
2072 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw));
2073 #endif
2074 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2075 	}
2076 
2077 	return SLJIT_SUCCESS;
2078 }
2079 
2080 static sljit_si emit_shift_with_flags(struct sljit_compiler *compiler,
2081 	sljit_ub mode, sljit_si set_flags,
2082 	sljit_si dst, sljit_sw dstw,
2083 	sljit_si src1, sljit_sw src1w,
2084 	sljit_si src2, sljit_sw src2w)
2085 {
2086 	/* The CPU does not set flags if the shift count is 0. */
2087 	if (src2 & SLJIT_IMM) {
2088 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2089 		if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
2090 			return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2091 #else
2092 		if ((src2w & 0x1f) != 0)
2093 			return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2094 #endif
2095 		if (!set_flags)
2096 			return emit_mov(compiler, dst, dstw, src1, src1w);
2097 		/* OR dst, src, 0 */
2098 		return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
2099 			dst, dstw, src1, src1w, SLJIT_IMM, 0);
2100 	}
2101 
2102 	if (!set_flags)
2103 		return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2104 
2105 	if (!FAST_IS_REG(dst))
2106 		FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
2107 
2108 	FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w));
2109 
2110 	if (FAST_IS_REG(dst))
2111 		return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
2112 	return SLJIT_SUCCESS;
2113 }
2114 
2115 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
2116 	sljit_si dst, sljit_sw dstw,
2117 	sljit_si src1, sljit_sw src1w,
2118 	sljit_si src2, sljit_sw src2w)
2119 {
2120 	CHECK_ERROR();
2121 	check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2122 	ADJUST_LOCAL_OFFSET(dst, dstw);
2123 	ADJUST_LOCAL_OFFSET(src1, src1w);
2124 	ADJUST_LOCAL_OFFSET(src2, src2w);
2125 
2126 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
2127 	CHECK_EXTRA_REGS(src1, src1w, (void)0);
2128 	CHECK_EXTRA_REGS(src2, src2w, (void)0);
2129 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2130 	compiler->mode32 = op & SLJIT_INT_OP;
2131 #endif
2132 
2133 	if (GET_OPCODE(op) >= SLJIT_MUL) {
2134 		if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2135 			compiler->flags_saved = 0;
2136 		else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2137 			FAIL_IF(emit_save_flags(compiler));
2138 	}
2139 
2140 	switch (GET_OPCODE(op)) {
2141 	case SLJIT_ADD:
2142 		if (!GET_FLAGS(op)) {
2143 			if (emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
2144 				return compiler->error;
2145 		}
2146 		else
2147 			compiler->flags_saved = 0;
2148 		if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2149 			FAIL_IF(emit_save_flags(compiler));
2150 		return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
2151 			dst, dstw, src1, src1w, src2, src2w);
2152 	case SLJIT_ADDC:
2153 		if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
2154 			FAIL_IF(emit_restore_flags(compiler, 1));
2155 		else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
2156 			FAIL_IF(emit_save_flags(compiler));
2157 		if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2158 			compiler->flags_saved = 0;
2159 		return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32,
2160 			dst, dstw, src1, src1w, src2, src2w);
2161 	case SLJIT_SUB:
2162 		if (!GET_FLAGS(op)) {
2163 			if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
2164 				return compiler->error;
2165 		}
2166 		else
2167 			compiler->flags_saved = 0;
2168 		if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2169 			FAIL_IF(emit_save_flags(compiler));
2170 		if (dst == SLJIT_UNUSED)
2171 			return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
2172 		return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
2173 			dst, dstw, src1, src1w, src2, src2w);
2174 	case SLJIT_SUBC:
2175 		if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
2176 			FAIL_IF(emit_restore_flags(compiler, 1));
2177 		else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
2178 			FAIL_IF(emit_save_flags(compiler));
2179 		if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2180 			compiler->flags_saved = 0;
2181 		return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32,
2182 			dst, dstw, src1, src1w, src2, src2w);
2183 	case SLJIT_MUL:
2184 		return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
2185 	case SLJIT_AND:
2186 		if (dst == SLJIT_UNUSED)
2187 			return emit_test_binary(compiler, src1, src1w, src2, src2w);
2188 		return emit_cum_binary(compiler, AND_r_rm, AND_rm_r, AND, AND_EAX_i32,
2189 			dst, dstw, src1, src1w, src2, src2w);
2190 	case SLJIT_OR:
2191 		return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
2192 			dst, dstw, src1, src1w, src2, src2w);
2193 	case SLJIT_XOR:
2194 		return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32,
2195 			dst, dstw, src1, src1w, src2, src2w);
2196 	case SLJIT_SHL:
2197 		return emit_shift_with_flags(compiler, SHL, GET_FLAGS(op),
2198 			dst, dstw, src1, src1w, src2, src2w);
2199 	case SLJIT_LSHR:
2200 		return emit_shift_with_flags(compiler, SHR, GET_FLAGS(op),
2201 			dst, dstw, src1, src1w, src2, src2w);
2202 	case SLJIT_ASHR:
2203 		return emit_shift_with_flags(compiler, SAR, GET_FLAGS(op),
2204 			dst, dstw, src1, src1w, src2, src2w);
2205 	}
2206 
2207 	return SLJIT_SUCCESS;
2208 }
2209 
2210 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
2211 {
2212 	check_sljit_get_register_index(reg);
2213 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2214 	if (reg >= SLJIT_R3 && reg <= SLJIT_R6)
2215 		return -1;
2216 #endif
2217 	return reg_map[reg];
2218 }
2219 
2220 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
2221 {
2222 	check_sljit_get_float_register_index(reg);
2223 	return reg;
2224 }
2225 
2226 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
2227 	void *instruction, sljit_si size)
2228 {
2229 	sljit_ub *inst;
2230 
2231 	CHECK_ERROR();
2232 	check_sljit_emit_op_custom(compiler, instruction, size);
2233 	SLJIT_ASSERT(size > 0 && size < 16);
2234 
2235 	inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
2236 	FAIL_IF(!inst);
2237 	INC_SIZE(size);
2238 	SLJIT_MEMMOVE(inst, instruction, size);
2239 	return SLJIT_SUCCESS;
2240 }
2241 
2242 /* --------------------------------------------------------------------- */
2243 /*  Floating point operators                                             */
2244 /* --------------------------------------------------------------------- */
2245 
2246 /* Alignment + 2 * 16 bytes. */
2247 static sljit_si sse2_data[3 + (4 + 4) * 2];
2248 static sljit_si *sse2_buffer;
2249 
2250 static void init_compiler(void)
2251 {
2252 	sse2_buffer = (sljit_si*)(((sljit_uw)sse2_data + 15) & ~0xf);
2253 	/* Single precision constants. */
2254 	sse2_buffer[0] = 0x80000000;
2255 	sse2_buffer[4] = 0x7fffffff;
2256 	/* Double precision constants. */
2257 	sse2_buffer[8] = 0;
2258 	sse2_buffer[9] = 0x80000000;
2259 	sse2_buffer[12] = 0xffffffff;
2260 	sse2_buffer[13] = 0x7fffffff;
2261 }
2262 
2263 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
2264 {
2265 #ifdef SLJIT_IS_FPU_AVAILABLE
2266 	return SLJIT_IS_FPU_AVAILABLE;
2267 #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
2268 	if (cpu_has_sse2 == -1)
2269 		get_cpu_features();
2270 	return cpu_has_sse2;
2271 #else /* SLJIT_DETECT_SSE2 */
2272 	return 1;
2273 #endif /* SLJIT_DETECT_SSE2 */
2274 }
2275 
2276 static sljit_si emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode,
2277 	sljit_si single, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w)
2278 {
2279 	sljit_ub *inst;
2280 
2281 	inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2282 	FAIL_IF(!inst);
2283 	*inst++ = GROUP_0F;
2284 	*inst = opcode;
2285 	return SLJIT_SUCCESS;
2286 }
2287 
2288 static sljit_si emit_sse2_logic(struct sljit_compiler *compiler, sljit_ub opcode,
2289 	sljit_si pref66, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w)
2290 {
2291 	sljit_ub *inst;
2292 
2293 	inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2294 	FAIL_IF(!inst);
2295 	*inst++ = GROUP_0F;
2296 	*inst = opcode;
2297 	return SLJIT_SUCCESS;
2298 }
2299 
2300 static SLJIT_INLINE sljit_si emit_sse2_load(struct sljit_compiler *compiler,
2301 	sljit_si single, sljit_si dst, sljit_si src, sljit_sw srcw)
2302 {
2303 	return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw);
2304 }
2305 
2306 static SLJIT_INLINE sljit_si emit_sse2_store(struct sljit_compiler *compiler,
2307 	sljit_si single, sljit_si dst, sljit_sw dstw, sljit_si src)
2308 {
2309 	return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
2310 }
2311 
2312 static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op,
2313 	sljit_si dst, sljit_sw dstw,
2314 	sljit_si src, sljit_sw srcw)
2315 {
2316 	sljit_si dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
2317 	sljit_ub *inst;
2318 
2319 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2320 	if (GET_OPCODE(op) == SLJIT_CONVW_FROMD)
2321 		compiler->mode32 = 0;
2322 #endif
2323 
2324 	inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_SINGLE_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw);
2325 	FAIL_IF(!inst);
2326 	*inst++ = GROUP_0F;
2327 	*inst = CVTTSD2SI_r_xm;
2328 
2329 	if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED)
2330 		return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2331 	return SLJIT_SUCCESS;
2332 }
2333 
2334 static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op,
2335 	sljit_si dst, sljit_sw dstw,
2336 	sljit_si src, sljit_sw srcw)
2337 {
2338 	sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2339 	sljit_ub *inst;
2340 
2341 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2342 	if (GET_OPCODE(op) == SLJIT_CONVD_FROMW)
2343 		compiler->mode32 = 0;
2344 #endif
2345 
2346 	if (src & SLJIT_IMM) {
2347 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2348 		if (GET_OPCODE(op) == SLJIT_CONVD_FROMI)
2349 			srcw = (sljit_si)srcw;
2350 #endif
2351 		EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
2352 		src = TMP_REG1;
2353 		srcw = 0;
2354 	}
2355 
2356 	inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_SINGLE_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw);
2357 	FAIL_IF(!inst);
2358 	*inst++ = GROUP_0F;
2359 	*inst = CVTSI2SD_x_rm;
2360 
2361 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2362 	compiler->mode32 = 1;
2363 #endif
2364 	if (dst_r == TMP_FREG)
2365 		return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2366 	return SLJIT_SUCCESS;
2367 }
2368 
2369 static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op,
2370 	sljit_si src1, sljit_sw src1w,
2371 	sljit_si src2, sljit_sw src2w)
2372 {
2373 	compiler->flags_saved = 0;
2374 	if (!FAST_IS_REG(src1)) {
2375 		FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
2376 		src1 = TMP_FREG;
2377 	}
2378 	return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_SINGLE_OP), src1, src2, src2w);
2379 }
2380 
2381 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
2382 	sljit_si dst, sljit_sw dstw,
2383 	sljit_si src, sljit_sw srcw)
2384 {
2385 	sljit_si dst_r;
2386 
2387 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2388 	compiler->mode32 = 1;
2389 #endif
2390 
2391 	CHECK_ERROR();
2392 	SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2393 
2394 	if (GET_OPCODE(op) == SLJIT_MOVD) {
2395 		if (FAST_IS_REG(dst))
2396 			return emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst, src, srcw);
2397 		if (FAST_IS_REG(src))
2398 			return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, src);
2399 		FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src, srcw));
2400 		return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2401 	}
2402 
2403 	if (GET_OPCODE(op) == SLJIT_CONVD_FROMS) {
2404 		dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2405 		if (FAST_IS_REG(src)) {
2406 			/* We overwrite the high bits of source. From SLJIT point of view,
2407 			   this is not an issue.
2408 			   Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
2409 			FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_SINGLE_OP, src, src, 0));
2410 		}
2411 		else {
2412 			FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_SINGLE_OP), TMP_FREG, src, srcw));
2413 			src = TMP_FREG;
2414 		}
2415 
2416 		FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_SINGLE_OP, dst_r, src, 0));
2417 		if (dst_r == TMP_FREG)
2418 			return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2419 		return SLJIT_SUCCESS;
2420 	}
2421 
2422 	if (SLOW_IS_REG(dst)) {
2423 		dst_r = dst;
2424 		if (dst != src)
2425 			FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw));
2426 	}
2427 	else {
2428 		dst_r = TMP_FREG;
2429 		FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw));
2430 	}
2431 
2432 	switch (GET_OPCODE(op)) {
2433 	case SLJIT_NEGD:
2434 		FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer : sse2_buffer + 8)));
2435 		break;
2436 
2437 	case SLJIT_ABSD:
2438 		FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer + 4 : sse2_buffer + 12)));
2439 		break;
2440 	}
2441 
2442 	if (dst_r == TMP_FREG)
2443 		return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2444 	return SLJIT_SUCCESS;
2445 }
2446 
2447 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
2448 	sljit_si dst, sljit_sw dstw,
2449 	sljit_si src1, sljit_sw src1w,
2450 	sljit_si src2, sljit_sw src2w)
2451 {
2452 	sljit_si dst_r;
2453 
2454 	CHECK_ERROR();
2455 	check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2456 	ADJUST_LOCAL_OFFSET(dst, dstw);
2457 	ADJUST_LOCAL_OFFSET(src1, src1w);
2458 	ADJUST_LOCAL_OFFSET(src2, src2w);
2459 
2460 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2461 	compiler->mode32 = 1;
2462 #endif
2463 
2464 	if (FAST_IS_REG(dst)) {
2465 		dst_r = dst;
2466 		if (dst == src1)
2467 			; /* Do nothing here. */
2468 		else if (dst == src2 && (op == SLJIT_ADDD || op == SLJIT_MULD)) {
2469 			/* Swap arguments. */
2470 			src2 = src1;
2471 			src2w = src1w;
2472 		}
2473 		else if (dst != src2)
2474 			FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src1, src1w));
2475 		else {
2476 			dst_r = TMP_FREG;
2477 			FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
2478 		}
2479 	}
2480 	else {
2481 		dst_r = TMP_FREG;
2482 		FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
2483 	}
2484 
2485 	switch (GET_OPCODE(op)) {
2486 	case SLJIT_ADDD:
2487 		FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2488 		break;
2489 
2490 	case SLJIT_SUBD:
2491 		FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2492 		break;
2493 
2494 	case SLJIT_MULD:
2495 		FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2496 		break;
2497 
2498 	case SLJIT_DIVD:
2499 		FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2500 		break;
2501 	}
2502 
2503 	if (dst_r == TMP_FREG)
2504 		return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2505 	return SLJIT_SUCCESS;
2506 }
2507 
2508 /* --------------------------------------------------------------------- */
2509 /*  Conditional instructions                                             */
2510 /* --------------------------------------------------------------------- */
2511 
2512 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2513 {
2514 	sljit_ub *inst;
2515 	struct sljit_label *label;
2516 
2517 	CHECK_ERROR_PTR();
2518 	check_sljit_emit_label(compiler);
2519 
2520 	/* We should restore the flags before the label,
2521 	   since other taken jumps has their own flags as well. */
2522 	if (SLJIT_UNLIKELY(compiler->flags_saved))
2523 		PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2524 
2525 	if (compiler->last_label && compiler->last_label->size == compiler->size)
2526 		return compiler->last_label;
2527 
2528 	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2529 	PTR_FAIL_IF(!label);
2530 	set_label(label, compiler);
2531 
2532 	inst = (sljit_ub*)ensure_buf(compiler, 2);
2533 	PTR_FAIL_IF(!inst);
2534 
2535 	*inst++ = 0;
2536 	*inst++ = 0;
2537 
2538 	return label;
2539 }
2540 
2541 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
2542 {
2543 	sljit_ub *inst;
2544 	struct sljit_jump *jump;
2545 
2546 	CHECK_ERROR_PTR();
2547 	check_sljit_emit_jump(compiler, type);
2548 
2549 	if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2550 		if ((type & 0xff) <= SLJIT_JUMP)
2551 			PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2552 		compiler->flags_saved = 0;
2553 	}
2554 
2555 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2556 	PTR_FAIL_IF_NULL(jump);
2557 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2558 	type &= 0xff;
2559 
2560 	if (type >= SLJIT_CALL1)
2561 		PTR_FAIL_IF(call_with_args(compiler, type));
2562 
2563 	/* Worst case size. */
2564 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2565 	compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
2566 #else
2567 	compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
2568 #endif
2569 
2570 	inst = (sljit_ub*)ensure_buf(compiler, 2);
2571 	PTR_FAIL_IF_NULL(inst);
2572 
2573 	*inst++ = 0;
2574 	*inst++ = type + 4;
2575 	return jump;
2576 }
2577 
2578 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
2579 {
2580 	sljit_ub *inst;
2581 	struct sljit_jump *jump;
2582 
2583 	CHECK_ERROR();
2584 	check_sljit_emit_ijump(compiler, type, src, srcw);
2585 	ADJUST_LOCAL_OFFSET(src, srcw);
2586 
2587 	CHECK_EXTRA_REGS(src, srcw, (void)0);
2588 
2589 	if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2590 		if (type <= SLJIT_JUMP)
2591 			FAIL_IF(emit_restore_flags(compiler, 0));
2592 		compiler->flags_saved = 0;
2593 	}
2594 
2595 	if (type >= SLJIT_CALL1) {
2596 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2597 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
2598 		if (src == SLJIT_R2) {
2599 			EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
2600 			src = TMP_REG1;
2601 		}
2602 		if (src == SLJIT_MEM1(SLJIT_SP) && type >= SLJIT_CALL3)
2603 			srcw += sizeof(sljit_sw);
2604 #endif
2605 #endif
2606 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
2607 		if (src == SLJIT_R2) {
2608 			EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
2609 			src = TMP_REG1;
2610 		}
2611 #endif
2612 		FAIL_IF(call_with_args(compiler, type));
2613 	}
2614 
2615 	if (src == SLJIT_IMM) {
2616 		jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2617 		FAIL_IF_NULL(jump);
2618 		set_jump(jump, compiler, JUMP_ADDR);
2619 		jump->u.target = srcw;
2620 
2621 		/* Worst case size. */
2622 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2623 		compiler->size += 5;
2624 #else
2625 		compiler->size += 10 + 3;
2626 #endif
2627 
2628 		inst = (sljit_ub*)ensure_buf(compiler, 2);
2629 		FAIL_IF_NULL(inst);
2630 
2631 		*inst++ = 0;
2632 		*inst++ = type + 4;
2633 	}
2634 	else {
2635 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2636 		/* REX_W is not necessary (src is not immediate). */
2637 		compiler->mode32 = 1;
2638 #endif
2639 		inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2640 		FAIL_IF(!inst);
2641 		*inst++ = GROUP_FF;
2642 		*inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm;
2643 	}
2644 	return SLJIT_SUCCESS;
2645 }
2646 
2647 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
2648 	sljit_si dst, sljit_sw dstw,
2649 	sljit_si src, sljit_sw srcw,
2650 	sljit_si type)
2651 {
2652 	sljit_ub *inst;
2653 	sljit_ub cond_set = 0;
2654 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2655 	sljit_si reg;
2656 #else
2657 	/* CHECK_EXTRA_REGS migh overwrite these values. */
2658 	sljit_si dst_save = dst;
2659 	sljit_sw dstw_save = dstw;
2660 #endif
2661 
2662 	CHECK_ERROR();
2663 	check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type);
2664 
2665 	if (dst == SLJIT_UNUSED)
2666 		return SLJIT_SUCCESS;
2667 
2668 	ADJUST_LOCAL_OFFSET(dst, dstw);
2669 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
2670 	if (SLJIT_UNLIKELY(compiler->flags_saved))
2671 		FAIL_IF(emit_restore_flags(compiler, op & SLJIT_KEEP_FLAGS));
2672 
2673 	/* setcc = jcc + 0x10. */
2674 	cond_set = get_jump_code(type) + 0x10;
2675 
2676 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2677 	if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src) {
2678 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 3);
2679 		FAIL_IF(!inst);
2680 		INC_SIZE(4 + 3);
2681 		/* Set low register to conditional flag. */
2682 		*inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
2683 		*inst++ = GROUP_0F;
2684 		*inst++ = cond_set;
2685 		*inst++ = MOD_REG | reg_lmap[TMP_REG1];
2686 		*inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B);
2687 		*inst++ = OR_rm8_r8;
2688 		*inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst];
2689 		return SLJIT_SUCCESS;
2690 	}
2691 
2692 	reg = (op == SLJIT_MOV && FAST_IS_REG(dst)) ? dst : TMP_REG1;
2693 
2694 	inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4);
2695 	FAIL_IF(!inst);
2696 	INC_SIZE(4 + 4);
2697 	/* Set low register to conditional flag. */
2698 	*inst++ = (reg_map[reg] <= 7) ? REX : REX_B;
2699 	*inst++ = GROUP_0F;
2700 	*inst++ = cond_set;
2701 	*inst++ = MOD_REG | reg_lmap[reg];
2702 	*inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
2703 	*inst++ = GROUP_0F;
2704 	*inst++ = MOVZX_r_rm8;
2705 	*inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg];
2706 
2707 	if (reg != TMP_REG1)
2708 		return SLJIT_SUCCESS;
2709 
2710 	if (GET_OPCODE(op) < SLJIT_ADD) {
2711 		compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
2712 		return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2713 	}
2714 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
2715 	compiler->skip_checks = 1;
2716 #endif
2717 	return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REG1, 0);
2718 #else /* SLJIT_CONFIG_X86_64 */
2719 	if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) {
2720 		if (reg_map[dst] <= 4) {
2721 			/* Low byte is accessible. */
2722 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
2723 			FAIL_IF(!inst);
2724 			INC_SIZE(3 + 3);
2725 			/* Set low byte to conditional flag. */
2726 			*inst++ = GROUP_0F;
2727 			*inst++ = cond_set;
2728 			*inst++ = MOD_REG | reg_map[dst];
2729 
2730 			*inst++ = GROUP_0F;
2731 			*inst++ = MOVZX_r_rm8;
2732 			*inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst];
2733 			return SLJIT_SUCCESS;
2734 		}
2735 
2736 		/* Low byte is not accessible. */
2737 		if (cpu_has_cmov == -1)
2738 			get_cpu_features();
2739 
2740 		if (cpu_has_cmov) {
2741 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
2742 			/* a xor reg, reg operation would overwrite the flags. */
2743 			EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);
2744 
2745 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 3);
2746 			FAIL_IF(!inst);
2747 			INC_SIZE(3);
2748 
2749 			*inst++ = GROUP_0F;
2750 			/* cmovcc = setcc - 0x50. */
2751 			*inst++ = cond_set - 0x50;
2752 			*inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1];
2753 			return SLJIT_SUCCESS;
2754 		}
2755 
2756 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2757 		FAIL_IF(!inst);
2758 		INC_SIZE(1 + 3 + 3 + 1);
2759 		*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2760 		/* Set al to conditional flag. */
2761 		*inst++ = GROUP_0F;
2762 		*inst++ = cond_set;
2763 		*inst++ = MOD_REG | 0 /* eax */;
2764 
2765 		*inst++ = GROUP_0F;
2766 		*inst++ = MOVZX_r_rm8;
2767 		*inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */;
2768 		*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2769 		return SLJIT_SUCCESS;
2770 	}
2771 
2772 	if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src && reg_map[dst] <= 4) {
2773 		SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] == 0, scratch_reg1_must_be_eax);
2774 		if (dst != SLJIT_R0) {
2775 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
2776 			FAIL_IF(!inst);
2777 			INC_SIZE(1 + 3 + 2 + 1);
2778 			/* Set low register to conditional flag. */
2779 			*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2780 			*inst++ = GROUP_0F;
2781 			*inst++ = cond_set;
2782 			*inst++ = MOD_REG | 0 /* eax */;
2783 			*inst++ = OR_rm8_r8;
2784 			*inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst];
2785 			*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2786 		}
2787 		else {
2788 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2);
2789 			FAIL_IF(!inst);
2790 			INC_SIZE(2 + 3 + 2 + 2);
2791 			/* Set low register to conditional flag. */
2792 			*inst++ = XCHG_r_rm;
2793 			*inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
2794 			*inst++ = GROUP_0F;
2795 			*inst++ = cond_set;
2796 			*inst++ = MOD_REG | 1 /* ecx */;
2797 			*inst++ = OR_rm8_r8;
2798 			*inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */;
2799 			*inst++ = XCHG_r_rm;
2800 			*inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
2801 		}
2802 		return SLJIT_SUCCESS;
2803 	}
2804 
2805 	/* Set TMP_REG1 to the bit. */
2806 	inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2807 	FAIL_IF(!inst);
2808 	INC_SIZE(1 + 3 + 3 + 1);
2809 	*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2810 	/* Set al to conditional flag. */
2811 	*inst++ = GROUP_0F;
2812 	*inst++ = cond_set;
2813 	*inst++ = MOD_REG | 0 /* eax */;
2814 
2815 	*inst++ = GROUP_0F;
2816 	*inst++ = MOVZX_r_rm8;
2817 	*inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */;
2818 
2819 	*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2820 
2821 	if (GET_OPCODE(op) < SLJIT_ADD)
2822 		return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2823 
2824 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
2825 	compiler->skip_checks = 1;
2826 #endif
2827 	return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
2828 #endif /* SLJIT_CONFIG_X86_64 */
2829 }
2830 
2831 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset)
2832 {
2833 	CHECK_ERROR();
2834 	check_sljit_get_local_base(compiler, dst, dstw, offset);
2835 	ADJUST_LOCAL_OFFSET(dst, dstw);
2836 
2837 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
2838 
2839 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2840 	compiler->mode32 = 0;
2841 #endif
2842 
2843 	ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
2844 
2845 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2846 	if (NOT_HALFWORD(offset)) {
2847 		FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
2848 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
2849 		SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
2850 		return compiler->error;
2851 #else
2852 		return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
2853 #endif
2854 	}
2855 #endif
2856 
2857 	if (offset != 0)
2858 		return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
2859 	return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
2860 }
2861 
2862 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
2863 {
2864 	sljit_ub *inst;
2865 	struct sljit_const *const_;
2866 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2867 	sljit_si reg;
2868 #endif
2869 
2870 	CHECK_ERROR_PTR();
2871 	check_sljit_emit_const(compiler, dst, dstw, init_value);
2872 	ADJUST_LOCAL_OFFSET(dst, dstw);
2873 
2874 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
2875 
2876 	const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2877 	PTR_FAIL_IF(!const_);
2878 	set_const(const_, compiler);
2879 
2880 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2881 	compiler->mode32 = 0;
2882 	reg = SLOW_IS_REG(dst) ? dst : TMP_REG1;
2883 
2884 	if (emit_load_imm64(compiler, reg, init_value))
2885 		return NULL;
2886 #else
2887 	if (dst == SLJIT_UNUSED)
2888 		dst = TMP_REG1;
2889 
2890 	if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
2891 		return NULL;
2892 #endif
2893 
2894 	inst = (sljit_ub*)ensure_buf(compiler, 2);
2895 	PTR_FAIL_IF(!inst);
2896 
2897 	*inst++ = 0;
2898 	*inst++ = 1;
2899 
2900 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2901 	if (dst & SLJIT_MEM)
2902 		if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
2903 			return NULL;
2904 #endif
2905 
2906 	return const_;
2907 }
2908 
2909 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
2910 {
2911 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2912 	*(sljit_sw*)addr = new_addr - (addr + 4);
2913 #else
2914 	*(sljit_uw*)addr = new_addr;
2915 #endif
2916 }
2917 
2918 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
2919 {
2920 	*(sljit_sw*)addr = new_constant;
2921 }
2922