• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /*
2   *    Stack-less Just-In-Time compiler
3   *
4   *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5   *
6   * Redistribution and use in source and binary forms, with or without modification, are
7   * permitted provided that the following conditions are met:
8   *
9   *   1. Redistributions of source code must retain the above copyright notice, this list of
10   *      conditions and the following disclaimer.
11   *
12   *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13   *      of conditions and the following disclaimer in the documentation and/or other materials
14   *      provided with the distribution.
15   *
16   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17   * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19   * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20   * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21   * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22   * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23   * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24   * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25   */
26  
sljit_get_platform_name(void)27  SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
28  {
29  	return "x86" SLJIT_CPUINFO;
30  }
31  
32  /*
33     32b register indexes:
34       0 - EAX
35       1 - ECX
36       2 - EDX
37       3 - EBX
38       4 - none
39       5 - EBP
40       6 - ESI
41       7 - EDI
42  */
43  
44  /*
45     64b register indexes:
46       0 - RAX
47       1 - RCX
48       2 - RDX
49       3 - RBX
50       4 - none
51       5 - RBP
52       6 - RSI
53       7 - RDI
54       8 - R8   - From now on REX prefix is required
55       9 - R9
56      10 - R10
57      11 - R11
58      12 - R12
59      13 - R13
60      14 - R14
61      15 - R15
62  */
63  
64  #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
65  
66  /* Last register + 1. */
67  #define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
68  
69  static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
70  	0, 0, 2, 1, 0, 0, 0, 0, 7, 6, 3, 4, 5
71  };
72  
73  #define CHECK_EXTRA_REGS(p, w, do) \
74  	if (p >= SLJIT_R3 && p <= SLJIT_R6) { \
75  		w = SLJIT_LOCALS_OFFSET + ((p) - (SLJIT_R3 + 4)) * sizeof(sljit_sw); \
76  		p = SLJIT_MEM1(SLJIT_SP); \
77  		do; \
78  	}
79  
80  #else /* SLJIT_CONFIG_X86_32 */
81  
82  /* Last register + 1. */
83  #define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
84  #define TMP_REG2	(SLJIT_NUMBER_OF_REGISTERS + 3)
85  #define TMP_REG3	(SLJIT_NUMBER_OF_REGISTERS + 4)
86  
87  /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
88     Note: avoid to use r12 and r13 for memory addessing
89     therefore r12 is better for SAVED_EREG than SAVED_REG. */
90  #ifndef _WIN64
91  /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
92  static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
93  	0, 0, 6, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 7, 9
94  };
95  /* low-map. reg_map & 0x7. */
96  static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
97  	0, 0, 6, 1, 0, 3,  2,  4,  5,  5,  6,  7, 3, 4, 2, 7, 1
98  };
99  #else
100  /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
101  static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
102  	0, 0, 2, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 10, 8, 9
103  };
104  /* low-map. reg_map & 0x7. */
105  static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
106  	0, 0, 2, 1, 3,  4,  5,  5, 6,  7,  7, 6, 3, 4, 2,  0, 1
107  };
108  #endif
109  
110  #define REX_W		0x48
111  #define REX_R		0x44
112  #define REX_X		0x42
113  #define REX_B		0x41
114  #define REX		0x40
115  
116  #ifndef _WIN64
117  #define HALFWORD_MAX 0x7fffffffl
118  #define HALFWORD_MIN -0x80000000l
119  #else
120  #define HALFWORD_MAX 0x7fffffffll
121  #define HALFWORD_MIN -0x80000000ll
122  #endif
123  
124  #define IS_HALFWORD(x)		((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
125  #define NOT_HALFWORD(x)		((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
126  
127  #define CHECK_EXTRA_REGS(p, w, do)
128  
129  #endif /* SLJIT_CONFIG_X86_32 */
130  
131  #define TMP_FREG	(0)
132  
133  /* Size flags for emit_x86_instruction: */
134  #define EX86_BIN_INS		0x0010
135  #define EX86_SHIFT_INS		0x0020
136  #define EX86_REX		0x0040
137  #define EX86_NO_REXW		0x0080
138  #define EX86_BYTE_ARG		0x0100
139  #define EX86_HALF_ARG		0x0200
140  #define EX86_PREF_66		0x0400
141  #define EX86_PREF_F2		0x0800
142  #define EX86_PREF_F3		0x1000
143  #define EX86_SSE2_OP1		0x2000
144  #define EX86_SSE2_OP2		0x4000
145  #define EX86_SSE2		(EX86_SSE2_OP1 | EX86_SSE2_OP2)
146  
147  /* --------------------------------------------------------------------- */
148  /*  Instrucion forms                                                     */
149  /* --------------------------------------------------------------------- */
150  
151  #define ADD		(/* BINARY */ 0 << 3)
152  #define ADD_EAX_i32	0x05
153  #define ADD_r_rm	0x03
154  #define ADD_rm_r	0x01
155  #define ADDSD_x_xm	0x58
156  #define ADC		(/* BINARY */ 2 << 3)
157  #define ADC_EAX_i32	0x15
158  #define ADC_r_rm	0x13
159  #define ADC_rm_r	0x11
160  #define AND		(/* BINARY */ 4 << 3)
161  #define AND_EAX_i32	0x25
162  #define AND_r_rm	0x23
163  #define AND_rm_r	0x21
164  #define ANDPD_x_xm	0x54
165  #define BSR_r_rm	(/* GROUP_0F */ 0xbd)
166  #define CALL_i32	0xe8
167  #define CALL_rm		(/* GROUP_FF */ 2 << 3)
168  #define CDQ		0x99
169  #define CMOVNE_r_rm	(/* GROUP_0F */ 0x45)
170  #define CMP		(/* BINARY */ 7 << 3)
171  #define CMP_EAX_i32	0x3d
172  #define CMP_r_rm	0x3b
173  #define CMP_rm_r	0x39
174  #define CVTPD2PS_x_xm	0x5a
175  #define CVTSI2SD_x_rm	0x2a
176  #define CVTTSD2SI_r_xm	0x2c
177  #define DIV		(/* GROUP_F7 */ 6 << 3)
178  #define DIVSD_x_xm	0x5e
179  #define INT3		0xcc
180  #define IDIV		(/* GROUP_F7 */ 7 << 3)
181  #define IMUL		(/* GROUP_F7 */ 5 << 3)
182  #define IMUL_r_rm	(/* GROUP_0F */ 0xaf)
183  #define IMUL_r_rm_i8	0x6b
184  #define IMUL_r_rm_i32	0x69
185  #define JE_i8		0x74
186  #define JNE_i8		0x75
187  #define JMP_i8		0xeb
188  #define JMP_i32		0xe9
189  #define JMP_rm		(/* GROUP_FF */ 4 << 3)
190  #define LEA_r_m		0x8d
191  #define MOV_r_rm	0x8b
192  #define MOV_r_i32	0xb8
193  #define MOV_rm_r	0x89
194  #define MOV_rm_i32	0xc7
195  #define MOV_rm8_i8	0xc6
196  #define MOV_rm8_r8	0x88
197  #define MOVSD_x_xm	0x10
198  #define MOVSD_xm_x	0x11
199  #define MOVSXD_r_rm	0x63
200  #define MOVSX_r_rm8	(/* GROUP_0F */ 0xbe)
201  #define MOVSX_r_rm16	(/* GROUP_0F */ 0xbf)
202  #define MOVZX_r_rm8	(/* GROUP_0F */ 0xb6)
203  #define MOVZX_r_rm16	(/* GROUP_0F */ 0xb7)
204  #define MUL		(/* GROUP_F7 */ 4 << 3)
205  #define MULSD_x_xm	0x59
206  #define NEG_rm		(/* GROUP_F7 */ 3 << 3)
207  #define NOP		0x90
208  #define NOT_rm		(/* GROUP_F7 */ 2 << 3)
209  #define OR		(/* BINARY */ 1 << 3)
210  #define OR_r_rm		0x0b
211  #define OR_EAX_i32	0x0d
212  #define OR_rm_r		0x09
213  #define OR_rm8_r8	0x08
214  #define POP_r		0x58
215  #define POP_rm		0x8f
216  #define POPF		0x9d
217  #define PUSH_i32	0x68
218  #define PUSH_r		0x50
219  #define PUSH_rm		(/* GROUP_FF */ 6 << 3)
220  #define PUSHF		0x9c
221  #define RET_near	0xc3
222  #define RET_i16		0xc2
223  #define SBB		(/* BINARY */ 3 << 3)
224  #define SBB_EAX_i32	0x1d
225  #define SBB_r_rm	0x1b
226  #define SBB_rm_r	0x19
227  #define SAR		(/* SHIFT */ 7 << 3)
228  #define SHL		(/* SHIFT */ 4 << 3)
229  #define SHR		(/* SHIFT */ 5 << 3)
230  #define SUB		(/* BINARY */ 5 << 3)
231  #define SUB_EAX_i32	0x2d
232  #define SUB_r_rm	0x2b
233  #define SUB_rm_r	0x29
234  #define SUBSD_x_xm	0x5c
235  #define TEST_EAX_i32	0xa9
236  #define TEST_rm_r	0x85
237  #define UCOMISD_x_xm	0x2e
238  #define UNPCKLPD_x_xm	0x14
239  #define XCHG_EAX_r	0x90
240  #define XCHG_r_rm	0x87
241  #define XOR		(/* BINARY */ 6 << 3)
242  #define XOR_EAX_i32	0x35
243  #define XOR_r_rm	0x33
244  #define XOR_rm_r	0x31
245  #define XORPD_x_xm	0x57
246  
247  #define GROUP_0F	0x0f
248  #define GROUP_F7	0xf7
249  #define GROUP_FF	0xff
250  #define GROUP_BINARY_81	0x81
251  #define GROUP_BINARY_83	0x83
252  #define GROUP_SHIFT_1	0xd1
253  #define GROUP_SHIFT_N	0xc1
254  #define GROUP_SHIFT_CL	0xd3
255  
256  #define MOD_REG		0xc0
257  #define MOD_DISP8	0x40
258  
259  #define INC_SIZE(s)			(*inst++ = (s), compiler->size += (s))
260  
261  #define PUSH_REG(r)			(*inst++ = (PUSH_r + (r)))
262  #define POP_REG(r)			(*inst++ = (POP_r + (r)))
263  #define RET()				(*inst++ = (RET_near))
264  #define RET_I16(n)			(*inst++ = (RET_i16), *inst++ = n, *inst++ = 0)
265  /* r32, r/m32 */
266  #define MOV_RM(mod, reg, rm)		(*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm))
267  
268  /* Multithreading does not affect these static variables, since they store
269     built-in CPU features. Therefore they can be overwritten by different threads
270     if they detect the CPU features in the same time. */
271  #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
272  static sljit_s32 cpu_has_sse2 = -1;
273  #endif
274  static sljit_s32 cpu_has_cmov = -1;
275  
276  #ifdef _WIN32_WCE
277  #include <cmnintrin.h>
278  #elif defined(_MSC_VER) && _MSC_VER >= 1400
279  #include <intrin.h>
280  #endif
281  
282  /******************************************************/
283  /*    Unaligned-store functions                       */
284  /******************************************************/
285  
sljit_unaligned_store_s16(void * addr,sljit_s16 value)286  static SLJIT_INLINE void sljit_unaligned_store_s16(void *addr, sljit_s16 value)
287  {
288  	SLJIT_MEMCPY(addr, &value, sizeof(value));
289  }
290  
sljit_unaligned_store_s32(void * addr,sljit_s32 value)291  static SLJIT_INLINE void sljit_unaligned_store_s32(void *addr, sljit_s32 value)
292  {
293  	SLJIT_MEMCPY(addr, &value, sizeof(value));
294  }
295  
sljit_unaligned_store_sw(void * addr,sljit_sw value)296  static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value)
297  {
298  	SLJIT_MEMCPY(addr, &value, sizeof(value));
299  }
300  
301  /******************************************************/
302  /*    Utility functions                               */
303  /******************************************************/
304  
get_cpu_features(void)305  static void get_cpu_features(void)
306  {
307  	sljit_u32 features;
308  
309  #if defined(_MSC_VER) && _MSC_VER >= 1400
310  
311  	int CPUInfo[4];
312  	__cpuid(CPUInfo, 1);
313  	features = (sljit_u32)CPUInfo[3];
314  
315  #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
316  
317  	/* AT&T syntax. */
318  	__asm__ (
319  		"movl $0x1, %%eax\n"
320  #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
321  		/* On x86-32, there is no red zone, so this
322  		   should work (no need for a local variable). */
323  		"push %%ebx\n"
324  #endif
325  		"cpuid\n"
326  #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
327  		"pop %%ebx\n"
328  #endif
329  		"movl %%edx, %0\n"
330  		: "=g" (features)
331  		:
332  #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
333  		: "%eax", "%ecx", "%edx"
334  #else
335  		: "%rax", "%rbx", "%rcx", "%rdx"
336  #endif
337  	);
338  
339  #else /* _MSC_VER && _MSC_VER >= 1400 */
340  
341  	/* Intel syntax. */
342  	__asm {
343  		mov eax, 1
344  		cpuid
345  		mov features, edx
346  	}
347  
348  #endif /* _MSC_VER && _MSC_VER >= 1400 */
349  
350  #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
351  	cpu_has_sse2 = (features >> 26) & 0x1;
352  #endif
353  	cpu_has_cmov = (features >> 15) & 0x1;
354  }
355  
get_jump_code(sljit_s32 type)356  static sljit_u8 get_jump_code(sljit_s32 type)
357  {
358  	switch (type) {
359  	case SLJIT_EQUAL:
360  	case SLJIT_EQUAL_F64:
361  		return 0x84 /* je */;
362  
363  	case SLJIT_NOT_EQUAL:
364  	case SLJIT_NOT_EQUAL_F64:
365  		return 0x85 /* jne */;
366  
367  	case SLJIT_LESS:
368  	case SLJIT_LESS_F64:
369  		return 0x82 /* jc */;
370  
371  	case SLJIT_GREATER_EQUAL:
372  	case SLJIT_GREATER_EQUAL_F64:
373  		return 0x83 /* jae */;
374  
375  	case SLJIT_GREATER:
376  	case SLJIT_GREATER_F64:
377  		return 0x87 /* jnbe */;
378  
379  	case SLJIT_LESS_EQUAL:
380  	case SLJIT_LESS_EQUAL_F64:
381  		return 0x86 /* jbe */;
382  
383  	case SLJIT_SIG_LESS:
384  		return 0x8c /* jl */;
385  
386  	case SLJIT_SIG_GREATER_EQUAL:
387  		return 0x8d /* jnl */;
388  
389  	case SLJIT_SIG_GREATER:
390  		return 0x8f /* jnle */;
391  
392  	case SLJIT_SIG_LESS_EQUAL:
393  		return 0x8e /* jle */;
394  
395  	case SLJIT_OVERFLOW:
396  	case SLJIT_MUL_OVERFLOW:
397  		return 0x80 /* jo */;
398  
399  	case SLJIT_NOT_OVERFLOW:
400  	case SLJIT_MUL_NOT_OVERFLOW:
401  		return 0x81 /* jno */;
402  
403  	case SLJIT_UNORDERED_F64:
404  		return 0x8a /* jp */;
405  
406  	case SLJIT_ORDERED_F64:
407  		return 0x8b /* jpo */;
408  	}
409  	return 0;
410  }
411  
412  static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type);
413  
414  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
415  static sljit_u8* generate_fixed_jump(sljit_u8 *code_ptr, sljit_sw addr, sljit_s32 type);
416  #endif
417  
generate_near_jump_code(struct sljit_jump * jump,sljit_u8 * code_ptr,sljit_u8 * code,sljit_s32 type)418  static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_s32 type)
419  {
420  	sljit_s32 short_jump;
421  	sljit_uw label_addr;
422  
423  	if (jump->flags & JUMP_LABEL)
424  		label_addr = (sljit_uw)(code + jump->u.label->size);
425  	else
426  		label_addr = jump->u.target;
427  	short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
428  
429  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
430  	if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
431  		return generate_far_jump_code(jump, code_ptr, type);
432  #endif
433  
434  	if (type == SLJIT_JUMP) {
435  		if (short_jump)
436  			*code_ptr++ = JMP_i8;
437  		else
438  			*code_ptr++ = JMP_i32;
439  		jump->addr++;
440  	}
441  	else if (type >= SLJIT_FAST_CALL) {
442  		short_jump = 0;
443  		*code_ptr++ = CALL_i32;
444  		jump->addr++;
445  	}
446  	else if (short_jump) {
447  		*code_ptr++ = get_jump_code(type) - 0x10;
448  		jump->addr++;
449  	}
450  	else {
451  		*code_ptr++ = GROUP_0F;
452  		*code_ptr++ = get_jump_code(type);
453  		jump->addr += 2;
454  	}
455  
456  	if (short_jump) {
457  		jump->flags |= PATCH_MB;
458  		code_ptr += sizeof(sljit_s8);
459  	} else {
460  		jump->flags |= PATCH_MW;
461  #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
462  		code_ptr += sizeof(sljit_sw);
463  #else
464  		code_ptr += sizeof(sljit_s32);
465  #endif
466  	}
467  
468  	return code_ptr;
469  }
470  
sljit_generate_code(struct sljit_compiler * compiler)471  SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
472  {
473  	struct sljit_memory_fragment *buf;
474  	sljit_u8 *code;
475  	sljit_u8 *code_ptr;
476  	sljit_u8 *buf_ptr;
477  	sljit_u8 *buf_end;
478  	sljit_u8 len;
479  
480  	struct sljit_label *label;
481  	struct sljit_jump *jump;
482  	struct sljit_const *const_;
483  
484  	CHECK_ERROR_PTR();
485  	CHECK_PTR(check_sljit_generate_code(compiler));
486  	reverse_buf(compiler);
487  
488  	/* Second code generation pass. */
489  	code = (sljit_u8*)SLJIT_MALLOC_EXEC(compiler->size);
490  	PTR_FAIL_WITH_EXEC_IF(code);
491  	buf = compiler->buf;
492  
493  	code_ptr = code;
494  	label = compiler->labels;
495  	jump = compiler->jumps;
496  	const_ = compiler->consts;
497  	do {
498  		buf_ptr = buf->memory;
499  		buf_end = buf_ptr + buf->used_size;
500  		do {
501  			len = *buf_ptr++;
502  			if (len > 0) {
503  				/* The code is already generated. */
504  				SLJIT_MEMCPY(code_ptr, buf_ptr, len);
505  				code_ptr += len;
506  				buf_ptr += len;
507  			}
508  			else {
509  				if (*buf_ptr >= 4) {
510  					jump->addr = (sljit_uw)code_ptr;
511  					if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
512  						code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4);
513  					else
514  						code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4);
515  					jump = jump->next;
516  				}
517  				else if (*buf_ptr == 0) {
518  					label->addr = (sljit_uw)code_ptr;
519  					label->size = code_ptr - code;
520  					label = label->next;
521  				}
522  				else if (*buf_ptr == 1) {
523  					const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
524  					const_ = const_->next;
525  				}
526  				else {
527  #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
528  					*code_ptr++ = (*buf_ptr == 2) ? CALL_i32 : JMP_i32;
529  					buf_ptr++;
530  					sljit_unaligned_store_sw(code_ptr, *(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw)));
531  					code_ptr += sizeof(sljit_sw);
532  					buf_ptr += sizeof(sljit_sw) - 1;
533  #else
534  					code_ptr = generate_fixed_jump(code_ptr, *(sljit_sw*)(buf_ptr + 1), *buf_ptr);
535  					buf_ptr += sizeof(sljit_sw);
536  #endif
537  				}
538  				buf_ptr++;
539  			}
540  		} while (buf_ptr < buf_end);
541  		SLJIT_ASSERT(buf_ptr == buf_end);
542  		buf = buf->next;
543  	} while (buf);
544  
545  	SLJIT_ASSERT(!label);
546  	SLJIT_ASSERT(!jump);
547  	SLJIT_ASSERT(!const_);
548  
549  	jump = compiler->jumps;
550  	while (jump) {
551  		if (jump->flags & PATCH_MB) {
552  			SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s8))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s8))) <= 127);
553  			*(sljit_u8*)jump->addr = (sljit_u8)(jump->u.label->addr - (jump->addr + sizeof(sljit_s8)));
554  		} else if (jump->flags & PATCH_MW) {
555  			if (jump->flags & JUMP_LABEL) {
556  #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
557  				sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sw))));
558  #else
559  				SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
560  				sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32))));
561  #endif
562  			}
563  			else {
564  #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
565  				sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_sw))));
566  #else
567  				SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
568  				sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.target - (jump->addr + sizeof(sljit_s32))));
569  #endif
570  			}
571  		}
572  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
573  		else if (jump->flags & PATCH_MD)
574  			sljit_unaligned_store_sw((void*)jump->addr, jump->u.label->addr);
575  #endif
576  
577  		jump = jump->next;
578  	}
579  
580  	/* Maybe we waste some space because of short jumps. */
581  	SLJIT_ASSERT(code_ptr <= code + compiler->size);
582  	compiler->error = SLJIT_ERR_COMPILED;
583  	compiler->executable_size = code_ptr - code;
584  	return (void*)code;
585  }
586  
587  /* --------------------------------------------------------------------- */
588  /*  Operators                                                            */
589  /* --------------------------------------------------------------------- */
590  
591  static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
592  	sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm,
593  	sljit_s32 dst, sljit_sw dstw,
594  	sljit_s32 src1, sljit_sw src1w,
595  	sljit_s32 src2, sljit_sw src2w);
596  
597  static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
598  	sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm,
599  	sljit_s32 dst, sljit_sw dstw,
600  	sljit_s32 src1, sljit_sw src1w,
601  	sljit_s32 src2, sljit_sw src2w);
602  
603  static sljit_s32 emit_mov(struct sljit_compiler *compiler,
604  	sljit_s32 dst, sljit_sw dstw,
605  	sljit_s32 src, sljit_sw srcw);
606  
emit_save_flags(struct sljit_compiler * compiler)607  static SLJIT_INLINE sljit_s32 emit_save_flags(struct sljit_compiler *compiler)
608  {
609  	sljit_u8 *inst;
610  
611  #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
612  	inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
613  	FAIL_IF(!inst);
614  	INC_SIZE(5);
615  #else
616  	inst = (sljit_u8*)ensure_buf(compiler, 1 + 6);
617  	FAIL_IF(!inst);
618  	INC_SIZE(6);
619  	*inst++ = REX_W;
620  #endif
621  	*inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp + sizeof(sljit_sw)] */
622  	*inst++ = 0x64;
623  	*inst++ = 0x24;
624  	*inst++ = (sljit_u8)sizeof(sljit_sw);
625  	*inst++ = PUSHF;
626  	compiler->flags_saved = 1;
627  	return SLJIT_SUCCESS;
628  }
629  
emit_restore_flags(struct sljit_compiler * compiler,sljit_s32 keep_flags)630  static SLJIT_INLINE sljit_s32 emit_restore_flags(struct sljit_compiler *compiler, sljit_s32 keep_flags)
631  {
632  	sljit_u8 *inst;
633  
634  #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
635  	inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
636  	FAIL_IF(!inst);
637  	INC_SIZE(5);
638  	*inst++ = POPF;
639  #else
640  	inst = (sljit_u8*)ensure_buf(compiler, 1 + 6);
641  	FAIL_IF(!inst);
642  	INC_SIZE(6);
643  	*inst++ = POPF;
644  	*inst++ = REX_W;
645  #endif
646  	*inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp - sizeof(sljit_sw)] */
647  	*inst++ = 0x64;
648  	*inst++ = 0x24;
649  	*inst++ = (sljit_u8)(-(sljit_s8)sizeof(sljit_sw));
650  	compiler->flags_saved = keep_flags;
651  	return SLJIT_SUCCESS;
652  }
653  
654  #ifdef _WIN32
655  #include <malloc.h>
656  
sljit_grow_stack(sljit_sw local_size)657  static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size)
658  {
659  	/* Workaround for calling the internal _chkstk() function on Windows.
660  	This function touches all 4k pages belongs to the requested stack space,
661  	which size is passed in local_size. This is necessary on Windows where
662  	the stack can only grow in 4k steps. However, this function just burn
663  	CPU cycles if the stack is large enough. However, you don't know it in
664  	advance, so it must always be called. I think this is a bad design in
665  	general even if it has some reasons. */
666  	*(volatile sljit_s32*)alloca(local_size) = 0;
667  }
668  
669  #endif
670  
671  #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
672  #include "sljitNativeX86_32.c"
673  #else
674  #include "sljitNativeX86_64.c"
675  #endif
676  
emit_mov(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)677  static sljit_s32 emit_mov(struct sljit_compiler *compiler,
678  	sljit_s32 dst, sljit_sw dstw,
679  	sljit_s32 src, sljit_sw srcw)
680  {
681  	sljit_u8* inst;
682  
683  	if (dst == SLJIT_UNUSED) {
684  		/* No destination, doesn't need to setup flags. */
685  		if (src & SLJIT_MEM) {
686  			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
687  			FAIL_IF(!inst);
688  			*inst = MOV_r_rm;
689  		}
690  		return SLJIT_SUCCESS;
691  	}
692  	if (FAST_IS_REG(src)) {
693  		inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
694  		FAIL_IF(!inst);
695  		*inst = MOV_rm_r;
696  		return SLJIT_SUCCESS;
697  	}
698  	if (src & SLJIT_IMM) {
699  		if (FAST_IS_REG(dst)) {
700  #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
701  			return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
702  #else
703  			if (!compiler->mode32) {
704  				if (NOT_HALFWORD(srcw))
705  					return emit_load_imm64(compiler, dst, srcw);
706  			}
707  			else
708  				return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw);
709  #endif
710  		}
711  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
712  		if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
713  			FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw));
714  			inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw);
715  			FAIL_IF(!inst);
716  			*inst = MOV_rm_r;
717  			return SLJIT_SUCCESS;
718  		}
719  #endif
720  		inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
721  		FAIL_IF(!inst);
722  		*inst = MOV_rm_i32;
723  		return SLJIT_SUCCESS;
724  	}
725  	if (FAST_IS_REG(dst)) {
726  		inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
727  		FAIL_IF(!inst);
728  		*inst = MOV_r_rm;
729  		return SLJIT_SUCCESS;
730  	}
731  
732  	/* Memory to memory move. Requires two instruction. */
733  	inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
734  	FAIL_IF(!inst);
735  	*inst = MOV_r_rm;
736  	inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
737  	FAIL_IF(!inst);
738  	*inst = MOV_rm_r;
739  	return SLJIT_SUCCESS;
740  }
741  
742  #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
743  	FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
744  
sljit_emit_op0(struct sljit_compiler * compiler,sljit_s32 op)745  SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
746  {
747  	sljit_u8 *inst;
748  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
749  	sljit_s32 size;
750  #endif
751  
752  	CHECK_ERROR();
753  	CHECK(check_sljit_emit_op0(compiler, op));
754  
755  	switch (GET_OPCODE(op)) {
756  	case SLJIT_BREAKPOINT:
757  		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
758  		FAIL_IF(!inst);
759  		INC_SIZE(1);
760  		*inst = INT3;
761  		break;
762  	case SLJIT_NOP:
763  		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
764  		FAIL_IF(!inst);
765  		INC_SIZE(1);
766  		*inst = NOP;
767  		break;
768  	case SLJIT_LMUL_UW:
769  	case SLJIT_LMUL_SW:
770  	case SLJIT_DIVMOD_UW:
771  	case SLJIT_DIVMOD_SW:
772  	case SLJIT_DIV_UW:
773  	case SLJIT_DIV_SW:
774  		compiler->flags_saved = 0;
775  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
776  #ifdef _WIN64
777  		SLJIT_COMPILE_ASSERT(
778  			reg_map[SLJIT_R0] == 0
779  			&& reg_map[SLJIT_R1] == 2
780  			&& reg_map[TMP_REG1] > 7,
781  			invalid_register_assignment_for_div_mul);
782  #else
783  		SLJIT_COMPILE_ASSERT(
784  			reg_map[SLJIT_R0] == 0
785  			&& reg_map[SLJIT_R1] < 7
786  			&& reg_map[TMP_REG1] == 2,
787  			invalid_register_assignment_for_div_mul);
788  #endif
789  		compiler->mode32 = op & SLJIT_I32_OP;
790  #endif
791  		SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
792  
793  		op = GET_OPCODE(op);
794  		if ((op | 0x2) == SLJIT_DIV_UW) {
795  #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
796  			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
797  			inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);
798  #else
799  			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
800  #endif
801  			FAIL_IF(!inst);
802  			*inst = XOR_r_rm;
803  		}
804  
805  		if ((op | 0x2) == SLJIT_DIV_SW) {
806  #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
807  			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
808  #endif
809  
810  #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
811  			inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
812  			FAIL_IF(!inst);
813  			INC_SIZE(1);
814  			*inst = CDQ;
815  #else
816  			if (compiler->mode32) {
817  				inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
818  				FAIL_IF(!inst);
819  				INC_SIZE(1);
820  				*inst = CDQ;
821  			} else {
822  				inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
823  				FAIL_IF(!inst);
824  				INC_SIZE(2);
825  				*inst++ = REX_W;
826  				*inst = CDQ;
827  			}
828  #endif
829  		}
830  
831  #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
832  		inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
833  		FAIL_IF(!inst);
834  		INC_SIZE(2);
835  		*inst++ = GROUP_F7;
836  		*inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
837  #else
838  #ifdef _WIN64
839  		size = (!compiler->mode32 || op >= SLJIT_DIVMOD_UW) ? 3 : 2;
840  #else
841  		size = (!compiler->mode32) ? 3 : 2;
842  #endif
843  		inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
844  		FAIL_IF(!inst);
845  		INC_SIZE(size);
846  #ifdef _WIN64
847  		if (!compiler->mode32)
848  			*inst++ = REX_W | ((op >= SLJIT_DIVMOD_UW) ? REX_B : 0);
849  		else if (op >= SLJIT_DIVMOD_UW)
850  			*inst++ = REX_B;
851  		*inst++ = GROUP_F7;
852  		*inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
853  #else
854  		if (!compiler->mode32)
855  			*inst++ = REX_W;
856  		*inst++ = GROUP_F7;
857  		*inst = MOD_REG | reg_map[SLJIT_R1];
858  #endif
859  #endif
860  		switch (op) {
861  		case SLJIT_LMUL_UW:
862  			*inst |= MUL;
863  			break;
864  		case SLJIT_LMUL_SW:
865  			*inst |= IMUL;
866  			break;
867  		case SLJIT_DIVMOD_UW:
868  		case SLJIT_DIV_UW:
869  			*inst |= DIV;
870  			break;
871  		case SLJIT_DIVMOD_SW:
872  		case SLJIT_DIV_SW:
873  			*inst |= IDIV;
874  			break;
875  		}
876  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
877  		if (op <= SLJIT_DIVMOD_SW)
878  			EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
879  #else
880  		if (op >= SLJIT_DIV_UW)
881  			EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
882  #endif
883  		break;
884  	}
885  
886  	return SLJIT_SUCCESS;
887  }
888  
889  #define ENCODE_PREFIX(prefix) \
890  	do { \
891  		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); \
892  		FAIL_IF(!inst); \
893  		INC_SIZE(1); \
894  		*inst = (prefix); \
895  	} while (0)
896  
emit_mov_byte(struct sljit_compiler * compiler,sljit_s32 sign,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)897  static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign,
898  	sljit_s32 dst, sljit_sw dstw,
899  	sljit_s32 src, sljit_sw srcw)
900  {
901  	sljit_u8* inst;
902  	sljit_s32 dst_r;
903  #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
904  	sljit_s32 work_r;
905  #endif
906  
907  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
908  	compiler->mode32 = 0;
909  #endif
910  
911  	if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
912  		return SLJIT_SUCCESS; /* Empty instruction. */
913  
914  	if (src & SLJIT_IMM) {
915  		if (FAST_IS_REG(dst)) {
916  #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
917  			return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
918  #else
919  			inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
920  			FAIL_IF(!inst);
921  			*inst = MOV_rm_i32;
922  			return SLJIT_SUCCESS;
923  #endif
924  		}
925  		inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
926  		FAIL_IF(!inst);
927  		*inst = MOV_rm8_i8;
928  		return SLJIT_SUCCESS;
929  	}
930  
931  	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
932  
933  	if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {
934  #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
935  		if (reg_map[src] >= 4) {
936  			SLJIT_ASSERT(dst_r == TMP_REG1);
937  			EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
938  		} else
939  			dst_r = src;
940  #else
941  		dst_r = src;
942  #endif
943  	}
944  #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
945  	else if (FAST_IS_REG(src) && reg_map[src] >= 4) {
946  		/* src, dst are registers. */
947  		SLJIT_ASSERT(SLOW_IS_REG(dst));
948  		if (reg_map[dst] < 4) {
949  			if (dst != src)
950  				EMIT_MOV(compiler, dst, 0, src, 0);
951  			inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
952  			FAIL_IF(!inst);
953  			*inst++ = GROUP_0F;
954  			*inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
955  		}
956  		else {
957  			if (dst != src)
958  				EMIT_MOV(compiler, dst, 0, src, 0);
959  			if (sign) {
960  				/* shl reg, 24 */
961  				inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
962  				FAIL_IF(!inst);
963  				*inst |= SHL;
964  				/* sar reg, 24 */
965  				inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
966  				FAIL_IF(!inst);
967  				*inst |= SAR;
968  			}
969  			else {
970  				inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
971  				FAIL_IF(!inst);
972  				*(inst + 1) |= AND;
973  			}
974  		}
975  		return SLJIT_SUCCESS;
976  	}
977  #endif
978  	else {
979  		/* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
980  		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
981  		FAIL_IF(!inst);
982  		*inst++ = GROUP_0F;
983  		*inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
984  	}
985  
986  	if (dst & SLJIT_MEM) {
987  #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
988  		if (dst_r == TMP_REG1) {
989  			/* Find a non-used register, whose reg_map[src] < 4. */
990  			if ((dst & REG_MASK) == SLJIT_R0) {
991  				if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1))
992  					work_r = SLJIT_R2;
993  				else
994  					work_r = SLJIT_R1;
995  			}
996  			else {
997  				if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
998  					work_r = SLJIT_R0;
999  				else if ((dst & REG_MASK) == SLJIT_R1)
1000  					work_r = SLJIT_R2;
1001  				else
1002  					work_r = SLJIT_R1;
1003  			}
1004  
1005  			if (work_r == SLJIT_R0) {
1006  				ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
1007  			}
1008  			else {
1009  				inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
1010  				FAIL_IF(!inst);
1011  				*inst = XCHG_r_rm;
1012  			}
1013  
1014  			inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
1015  			FAIL_IF(!inst);
1016  			*inst = MOV_rm8_r8;
1017  
1018  			if (work_r == SLJIT_R0) {
1019  				ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
1020  			}
1021  			else {
1022  				inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
1023  				FAIL_IF(!inst);
1024  				*inst = XCHG_r_rm;
1025  			}
1026  		}
1027  		else {
1028  			inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1029  			FAIL_IF(!inst);
1030  			*inst = MOV_rm8_r8;
1031  		}
1032  #else
1033  		inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
1034  		FAIL_IF(!inst);
1035  		*inst = MOV_rm8_r8;
1036  #endif
1037  	}
1038  
1039  	return SLJIT_SUCCESS;
1040  }
1041  
emit_mov_half(struct sljit_compiler * compiler,sljit_s32 sign,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1042  static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign,
1043  	sljit_s32 dst, sljit_sw dstw,
1044  	sljit_s32 src, sljit_sw srcw)
1045  {
1046  	sljit_u8* inst;
1047  	sljit_s32 dst_r;
1048  
1049  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1050  	compiler->mode32 = 0;
1051  #endif
1052  
1053  	if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
1054  		return SLJIT_SUCCESS; /* Empty instruction. */
1055  
1056  	if (src & SLJIT_IMM) {
1057  		if (FAST_IS_REG(dst)) {
1058  #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1059  			return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
1060  #else
1061  			inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1062  			FAIL_IF(!inst);
1063  			*inst = MOV_rm_i32;
1064  			return SLJIT_SUCCESS;
1065  #endif
1066  		}
1067  		inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
1068  		FAIL_IF(!inst);
1069  		*inst = MOV_rm_i32;
1070  		return SLJIT_SUCCESS;
1071  	}
1072  
1073  	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1074  
1075  	if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
1076  		dst_r = src;
1077  	else {
1078  		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
1079  		FAIL_IF(!inst);
1080  		*inst++ = GROUP_0F;
1081  		*inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16;
1082  	}
1083  
1084  	if (dst & SLJIT_MEM) {
1085  		inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
1086  		FAIL_IF(!inst);
1087  		*inst = MOV_rm_r;
1088  	}
1089  
1090  	return SLJIT_SUCCESS;
1091  }
1092  
emit_unary(struct sljit_compiler * compiler,sljit_u8 opcode,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1093  static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode,
1094  	sljit_s32 dst, sljit_sw dstw,
1095  	sljit_s32 src, sljit_sw srcw)
1096  {
1097  	sljit_u8* inst;
1098  
1099  	if (dst == SLJIT_UNUSED) {
1100  		EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1101  		inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1102  		FAIL_IF(!inst);
1103  		*inst++ = GROUP_F7;
1104  		*inst |= opcode;
1105  		return SLJIT_SUCCESS;
1106  	}
1107  	if (dst == src && dstw == srcw) {
1108  		/* Same input and output */
1109  		inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1110  		FAIL_IF(!inst);
1111  		*inst++ = GROUP_F7;
1112  		*inst |= opcode;
1113  		return SLJIT_SUCCESS;
1114  	}
1115  	if (FAST_IS_REG(dst)) {
1116  		EMIT_MOV(compiler, dst, 0, src, srcw);
1117  		inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1118  		FAIL_IF(!inst);
1119  		*inst++ = GROUP_F7;
1120  		*inst |= opcode;
1121  		return SLJIT_SUCCESS;
1122  	}
1123  	EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1124  	inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1125  	FAIL_IF(!inst);
1126  	*inst++ = GROUP_F7;
1127  	*inst |= opcode;
1128  	EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1129  	return SLJIT_SUCCESS;
1130  }
1131  
emit_not_with_flags(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1132  static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler,
1133  	sljit_s32 dst, sljit_sw dstw,
1134  	sljit_s32 src, sljit_sw srcw)
1135  {
1136  	sljit_u8* inst;
1137  
1138  	if (dst == SLJIT_UNUSED) {
1139  		EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1140  		inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1141  		FAIL_IF(!inst);
1142  		*inst++ = GROUP_F7;
1143  		*inst |= NOT_rm;
1144  		inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1145  		FAIL_IF(!inst);
1146  		*inst = OR_r_rm;
1147  		return SLJIT_SUCCESS;
1148  	}
1149  	if (FAST_IS_REG(dst)) {
1150  		EMIT_MOV(compiler, dst, 0, src, srcw);
1151  		inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1152  		FAIL_IF(!inst);
1153  		*inst++ = GROUP_F7;
1154  		*inst |= NOT_rm;
1155  		inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
1156  		FAIL_IF(!inst);
1157  		*inst = OR_r_rm;
1158  		return SLJIT_SUCCESS;
1159  	}
1160  	EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1161  	inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1162  	FAIL_IF(!inst);
1163  	*inst++ = GROUP_F7;
1164  	*inst |= NOT_rm;
1165  	inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1166  	FAIL_IF(!inst);
1167  	*inst = OR_r_rm;
1168  	EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1169  	return SLJIT_SUCCESS;
1170  }
1171  
emit_clz(struct sljit_compiler * compiler,sljit_s32 op_flags,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1172  static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags,
1173  	sljit_s32 dst, sljit_sw dstw,
1174  	sljit_s32 src, sljit_sw srcw)
1175  {
1176  	sljit_u8* inst;
1177  	sljit_s32 dst_r;
1178  
1179  	SLJIT_UNUSED_ARG(op_flags);
1180  	if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
1181  		/* Just set the zero flag. */
1182  		EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1183  		inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1184  		FAIL_IF(!inst);
1185  		*inst++ = GROUP_F7;
1186  		*inst |= NOT_rm;
1187  #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1188  		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REG1, 0);
1189  #else
1190  		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, TMP_REG1, 0);
1191  #endif
1192  		FAIL_IF(!inst);
1193  		*inst |= SHR;
1194  		return SLJIT_SUCCESS;
1195  	}
1196  
1197  	if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
1198  		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
1199  		src = TMP_REG1;
1200  		srcw = 0;
1201  	}
1202  
1203  	inst = emit_x86_instruction(compiler, 2, TMP_REG1, 0, src, srcw);
1204  	FAIL_IF(!inst);
1205  	*inst++ = GROUP_0F;
1206  	*inst = BSR_r_rm;
1207  
1208  #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1209  	if (FAST_IS_REG(dst))
1210  		dst_r = dst;
1211  	else {
1212  		/* Find an unused temporary register. */
1213  		if ((dst & REG_MASK) != SLJIT_R0 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
1214  			dst_r = SLJIT_R0;
1215  		else if ((dst & REG_MASK) != SLJIT_R1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R1))
1216  			dst_r = SLJIT_R1;
1217  		else
1218  			dst_r = SLJIT_R2;
1219  		EMIT_MOV(compiler, dst, dstw, dst_r, 0);
1220  	}
1221  	EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
1222  #else
1223  	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
1224  	compiler->mode32 = 0;
1225  	EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 64 + 63 : 32 + 31);
1226  	compiler->mode32 = op_flags & SLJIT_I32_OP;
1227  #endif
1228  
1229  	if (cpu_has_cmov == -1)
1230  		get_cpu_features();
1231  
1232  	if (cpu_has_cmov) {
1233  		inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
1234  		FAIL_IF(!inst);
1235  		*inst++ = GROUP_0F;
1236  		*inst = CMOVNE_r_rm;
1237  	} else {
1238  #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1239  		inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1240  		FAIL_IF(!inst);
1241  		INC_SIZE(4);
1242  
1243  		*inst++ = JE_i8;
1244  		*inst++ = 2;
1245  		*inst++ = MOV_r_rm;
1246  		*inst++ = MOD_REG | (reg_map[dst_r] << 3) | reg_map[TMP_REG1];
1247  #else
1248  		inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
1249  		FAIL_IF(!inst);
1250  		INC_SIZE(5);
1251  
1252  		*inst++ = JE_i8;
1253  		*inst++ = 3;
1254  		*inst++ = REX_W | (reg_map[dst_r] >= 8 ? REX_R : 0) | (reg_map[TMP_REG1] >= 8 ? REX_B : 0);
1255  		*inst++ = MOV_r_rm;
1256  		*inst++ = MOD_REG | (reg_lmap[dst_r] << 3) | reg_lmap[TMP_REG1];
1257  #endif
1258  	}
1259  
1260  #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1261  	inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
1262  #else
1263  	inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, dst_r, 0);
1264  #endif
1265  	FAIL_IF(!inst);
1266  	*(inst + 1) |= XOR;
1267  
1268  #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1269  	if (dst & SLJIT_MEM) {
1270  		inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1271  		FAIL_IF(!inst);
1272  		*inst = XCHG_r_rm;
1273  	}
1274  #else
1275  	if (dst & SLJIT_MEM)
1276  		EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0);
1277  #endif
1278  	return SLJIT_SUCCESS;
1279  }
1280  
sljit_emit_op1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1281  SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1282  	sljit_s32 dst, sljit_sw dstw,
1283  	sljit_s32 src, sljit_sw srcw)
1284  {
1285  	sljit_u8* inst;
1286  	sljit_s32 update = 0;
1287  	sljit_s32 op_flags = GET_ALL_FLAGS(op);
1288  #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1289  	sljit_s32 dst_is_ereg = 0;
1290  	sljit_s32 src_is_ereg = 0;
1291  #else
1292  #	define src_is_ereg 0
1293  #endif
1294  
1295  	CHECK_ERROR();
1296  	CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1297  	ADJUST_LOCAL_OFFSET(dst, dstw);
1298  	ADJUST_LOCAL_OFFSET(src, srcw);
1299  
1300  	CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
1301  	CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);
1302  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1303  	compiler->mode32 = op_flags & SLJIT_I32_OP;
1304  #endif
1305  
1306  	op = GET_OPCODE(op);
1307  	if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) {
1308  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1309  		compiler->mode32 = 0;
1310  #endif
1311  
1312  		if (op_flags & SLJIT_I32_OP) {
1313  			if (FAST_IS_REG(src) && src == dst) {
1314  				if (!TYPE_CAST_NEEDED(op))
1315  					return SLJIT_SUCCESS;
1316  			}
1317  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1318  			if (op == SLJIT_MOV_S32 && (src & SLJIT_MEM))
1319  				op = SLJIT_MOV_U32;
1320  			if (op == SLJIT_MOVU_S32 && (src & SLJIT_MEM))
1321  				op = SLJIT_MOVU_U32;
1322  			if (op == SLJIT_MOV_U32 && (src & SLJIT_IMM))
1323  				op = SLJIT_MOV_S32;
1324  			if (op == SLJIT_MOVU_U32 && (src & SLJIT_IMM))
1325  				op = SLJIT_MOVU_S32;
1326  #endif
1327  		}
1328  
1329  		SLJIT_COMPILE_ASSERT(SLJIT_MOV + 8 == SLJIT_MOVU, movu_offset);
1330  		if (op >= SLJIT_MOVU) {
1331  			update = 1;
1332  			op -= 8;
1333  		}
1334  
1335  		if (src & SLJIT_IMM) {
1336  			switch (op) {
1337  			case SLJIT_MOV_U8:
1338  				srcw = (sljit_u8)srcw;
1339  				break;
1340  			case SLJIT_MOV_S8:
1341  				srcw = (sljit_s8)srcw;
1342  				break;
1343  			case SLJIT_MOV_U16:
1344  				srcw = (sljit_u16)srcw;
1345  				break;
1346  			case SLJIT_MOV_S16:
1347  				srcw = (sljit_s16)srcw;
1348  				break;
1349  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1350  			case SLJIT_MOV_U32:
1351  				srcw = (sljit_u32)srcw;
1352  				break;
1353  			case SLJIT_MOV_S32:
1354  				srcw = (sljit_s32)srcw;
1355  				break;
1356  #endif
1357  			}
1358  #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1359  			if (SLJIT_UNLIKELY(dst_is_ereg))
1360  				return emit_mov(compiler, dst, dstw, src, srcw);
1361  #endif
1362  		}
1363  
1364  		if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & REG_MASK) && (srcw != 0 || (src & OFFS_REG_MASK) != 0)) {
1365  			inst = emit_x86_instruction(compiler, 1, src & REG_MASK, 0, src, srcw);
1366  			FAIL_IF(!inst);
1367  			*inst = LEA_r_m;
1368  			src &= SLJIT_MEM | 0xf;
1369  			srcw = 0;
1370  		}
1371  
1372  #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1373  		if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
1374  			SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
1375  			dst = TMP_REG1;
1376  		}
1377  #endif
1378  
1379  		switch (op) {
1380  		case SLJIT_MOV:
1381  		case SLJIT_MOV_P:
1382  #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1383  		case SLJIT_MOV_U32:
1384  		case SLJIT_MOV_S32:
1385  #endif
1386  			FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
1387  			break;
1388  		case SLJIT_MOV_U8:
1389  			FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
1390  			break;
1391  		case SLJIT_MOV_S8:
1392  			FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
1393  			break;
1394  		case SLJIT_MOV_U16:
1395  			FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
1396  			break;
1397  		case SLJIT_MOV_S16:
1398  			FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
1399  			break;
1400  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1401  		case SLJIT_MOV_U32:
1402  			FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
1403  			break;
1404  		case SLJIT_MOV_S32:
1405  			FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
1406  			break;
1407  #endif
1408  		}
1409  
1410  #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1411  		if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
1412  			return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
1413  #endif
1414  
1415  		if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK) && (dstw != 0 || (dst & OFFS_REG_MASK) != 0)) {
1416  			inst = emit_x86_instruction(compiler, 1, dst & REG_MASK, 0, dst, dstw);
1417  			FAIL_IF(!inst);
1418  			*inst = LEA_r_m;
1419  		}
1420  		return SLJIT_SUCCESS;
1421  	}
1422  
1423  	if (SLJIT_UNLIKELY(GET_FLAGS(op_flags)))
1424  		compiler->flags_saved = 0;
1425  
1426  	switch (op) {
1427  	case SLJIT_NOT:
1428  		if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_E))
1429  			return emit_not_with_flags(compiler, dst, dstw, src, srcw);
1430  		return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
1431  
1432  	case SLJIT_NEG:
1433  		if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1434  			FAIL_IF(emit_save_flags(compiler));
1435  		return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw);
1436  
1437  	case SLJIT_CLZ:
1438  		if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1439  			FAIL_IF(emit_save_flags(compiler));
1440  		return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
1441  	}
1442  
1443  	return SLJIT_SUCCESS;
1444  
1445  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1446  #	undef src_is_ereg
1447  #endif
1448  }
1449  
1450  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1451  
1452  #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1453  	if (IS_HALFWORD(immw) || compiler->mode32) { \
1454  		inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1455  		FAIL_IF(!inst); \
1456  		*(inst + 1) |= (op_imm); \
1457  	} \
1458  	else { \
1459  		FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \
1460  		inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \
1461  		FAIL_IF(!inst); \
1462  		*inst = (op_mr); \
1463  	}
1464  
1465  #define BINARY_EAX_IMM(op_eax_imm, immw) \
1466  	FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
1467  
1468  #else
1469  
1470  #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1471  	inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1472  	FAIL_IF(!inst); \
1473  	*(inst + 1) |= (op_imm);
1474  
1475  #define BINARY_EAX_IMM(op_eax_imm, immw) \
1476  	FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
1477  
1478  #endif
1479  
emit_cum_binary(struct sljit_compiler * compiler,sljit_u8 op_rm,sljit_u8 op_mr,sljit_u8 op_imm,sljit_u8 op_eax_imm,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1480  static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
1481  	sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm,
1482  	sljit_s32 dst, sljit_sw dstw,
1483  	sljit_s32 src1, sljit_sw src1w,
1484  	sljit_s32 src2, sljit_sw src2w)
1485  {
1486  	sljit_u8* inst;
1487  
1488  	if (dst == SLJIT_UNUSED) {
1489  		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1490  		if (src2 & SLJIT_IMM) {
1491  			BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1492  		}
1493  		else {
1494  			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1495  			FAIL_IF(!inst);
1496  			*inst = op_rm;
1497  		}
1498  		return SLJIT_SUCCESS;
1499  	}
1500  
1501  	if (dst == src1 && dstw == src1w) {
1502  		if (src2 & SLJIT_IMM) {
1503  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1504  			if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1505  #else
1506  			if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1507  #endif
1508  				BINARY_EAX_IMM(op_eax_imm, src2w);
1509  			}
1510  			else {
1511  				BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1512  			}
1513  		}
1514  		else if (FAST_IS_REG(dst)) {
1515  			inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1516  			FAIL_IF(!inst);
1517  			*inst = op_rm;
1518  		}
1519  		else if (FAST_IS_REG(src2)) {
1520  			/* Special exception for sljit_emit_op_flags. */
1521  			inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1522  			FAIL_IF(!inst);
1523  			*inst = op_mr;
1524  		}
1525  		else {
1526  			EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1527  			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1528  			FAIL_IF(!inst);
1529  			*inst = op_mr;
1530  		}
1531  		return SLJIT_SUCCESS;
1532  	}
1533  
1534  	/* Only for cumulative operations. */
1535  	if (dst == src2 && dstw == src2w) {
1536  		if (src1 & SLJIT_IMM) {
1537  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1538  			if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1539  #else
1540  			if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) {
1541  #endif
1542  				BINARY_EAX_IMM(op_eax_imm, src1w);
1543  			}
1544  			else {
1545  				BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
1546  			}
1547  		}
1548  		else if (FAST_IS_REG(dst)) {
1549  			inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
1550  			FAIL_IF(!inst);
1551  			*inst = op_rm;
1552  		}
1553  		else if (FAST_IS_REG(src1)) {
1554  			inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
1555  			FAIL_IF(!inst);
1556  			*inst = op_mr;
1557  		}
1558  		else {
1559  			EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1560  			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1561  			FAIL_IF(!inst);
1562  			*inst = op_mr;
1563  		}
1564  		return SLJIT_SUCCESS;
1565  	}
1566  
1567  	/* General version. */
1568  	if (FAST_IS_REG(dst)) {
1569  		EMIT_MOV(compiler, dst, 0, src1, src1w);
1570  		if (src2 & SLJIT_IMM) {
1571  			BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1572  		}
1573  		else {
1574  			inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1575  			FAIL_IF(!inst);
1576  			*inst = op_rm;
1577  		}
1578  	}
1579  	else {
1580  		/* This version requires less memory writing. */
1581  		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1582  		if (src2 & SLJIT_IMM) {
1583  			BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1584  		}
1585  		else {
1586  			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1587  			FAIL_IF(!inst);
1588  			*inst = op_rm;
1589  		}
1590  		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1591  	}
1592  
1593  	return SLJIT_SUCCESS;
1594  }
1595  
1596  static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
1597  	sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm,
1598  	sljit_s32 dst, sljit_sw dstw,
1599  	sljit_s32 src1, sljit_sw src1w,
1600  	sljit_s32 src2, sljit_sw src2w)
1601  {
1602  	sljit_u8* inst;
1603  
1604  	if (dst == SLJIT_UNUSED) {
1605  		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1606  		if (src2 & SLJIT_IMM) {
1607  			BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1608  		}
1609  		else {
1610  			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1611  			FAIL_IF(!inst);
1612  			*inst = op_rm;
1613  		}
1614  		return SLJIT_SUCCESS;
1615  	}
1616  
1617  	if (dst == src1 && dstw == src1w) {
1618  		if (src2 & SLJIT_IMM) {
1619  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1620  			if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1621  #else
1622  			if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1623  #endif
1624  				BINARY_EAX_IMM(op_eax_imm, src2w);
1625  			}
1626  			else {
1627  				BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1628  			}
1629  		}
1630  		else if (FAST_IS_REG(dst)) {
1631  			inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1632  			FAIL_IF(!inst);
1633  			*inst = op_rm;
1634  		}
1635  		else if (FAST_IS_REG(src2)) {
1636  			inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1637  			FAIL_IF(!inst);
1638  			*inst = op_mr;
1639  		}
1640  		else {
1641  			EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1642  			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1643  			FAIL_IF(!inst);
1644  			*inst = op_mr;
1645  		}
1646  		return SLJIT_SUCCESS;
1647  	}
1648  
1649  	/* General version. */
1650  	if (FAST_IS_REG(dst) && dst != src2) {
1651  		EMIT_MOV(compiler, dst, 0, src1, src1w);
1652  		if (src2 & SLJIT_IMM) {
1653  			BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1654  		}
1655  		else {
1656  			inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1657  			FAIL_IF(!inst);
1658  			*inst = op_rm;
1659  		}
1660  	}
1661  	else {
1662  		/* This version requires less memory writing. */
1663  		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1664  		if (src2 & SLJIT_IMM) {
1665  			BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1666  		}
1667  		else {
1668  			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1669  			FAIL_IF(!inst);
1670  			*inst = op_rm;
1671  		}
1672  		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1673  	}
1674  
1675  	return SLJIT_SUCCESS;
1676  }
1677  
1678  static sljit_s32 emit_mul(struct sljit_compiler *compiler,
1679  	sljit_s32 dst, sljit_sw dstw,
1680  	sljit_s32 src1, sljit_sw src1w,
1681  	sljit_s32 src2, sljit_sw src2w)
1682  {
1683  	sljit_u8* inst;
1684  	sljit_s32 dst_r;
1685  
1686  	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1687  
1688  	/* Register destination. */
1689  	if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
1690  		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1691  		FAIL_IF(!inst);
1692  		*inst++ = GROUP_0F;
1693  		*inst = IMUL_r_rm;
1694  	}
1695  	else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
1696  		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
1697  		FAIL_IF(!inst);
1698  		*inst++ = GROUP_0F;
1699  		*inst = IMUL_r_rm;
1700  	}
1701  	else if (src1 & SLJIT_IMM) {
1702  		if (src2 & SLJIT_IMM) {
1703  			EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
1704  			src2 = dst_r;
1705  			src2w = 0;
1706  		}
1707  
1708  		if (src1w <= 127 && src1w >= -128) {
1709  			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1710  			FAIL_IF(!inst);
1711  			*inst = IMUL_r_rm_i8;
1712  			inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1713  			FAIL_IF(!inst);
1714  			INC_SIZE(1);
1715  			*inst = (sljit_s8)src1w;
1716  		}
1717  #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1718  		else {
1719  			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1720  			FAIL_IF(!inst);
1721  			*inst = IMUL_r_rm_i32;
1722  			inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1723  			FAIL_IF(!inst);
1724  			INC_SIZE(4);
1725  			sljit_unaligned_store_sw(inst, src1w);
1726  		}
1727  #else
1728  		else if (IS_HALFWORD(src1w)) {
1729  			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1730  			FAIL_IF(!inst);
1731  			*inst = IMUL_r_rm_i32;
1732  			inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1733  			FAIL_IF(!inst);
1734  			INC_SIZE(4);
1735  			sljit_unaligned_store_s32(inst, (sljit_s32)src1w);
1736  		}
1737  		else {
1738  			EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1739  			if (dst_r != src2)
1740  				EMIT_MOV(compiler, dst_r, 0, src2, src2w);
1741  			inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1742  			FAIL_IF(!inst);
1743  			*inst++ = GROUP_0F;
1744  			*inst = IMUL_r_rm;
1745  		}
1746  #endif
1747  	}
1748  	else if (src2 & SLJIT_IMM) {
1749  		/* Note: src1 is NOT immediate. */
1750  
1751  		if (src2w <= 127 && src2w >= -128) {
1752  			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1753  			FAIL_IF(!inst);
1754  			*inst = IMUL_r_rm_i8;
1755  			inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1756  			FAIL_IF(!inst);
1757  			INC_SIZE(1);
1758  			*inst = (sljit_s8)src2w;
1759  		}
1760  #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1761  		else {
1762  			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1763  			FAIL_IF(!inst);
1764  			*inst = IMUL_r_rm_i32;
1765  			inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1766  			FAIL_IF(!inst);
1767  			INC_SIZE(4);
1768  			sljit_unaligned_store_sw(inst, src2w);
1769  		}
1770  #else
1771  		else if (IS_HALFWORD(src2w)) {
1772  			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1773  			FAIL_IF(!inst);
1774  			*inst = IMUL_r_rm_i32;
1775  			inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1776  			FAIL_IF(!inst);
1777  			INC_SIZE(4);
1778  			sljit_unaligned_store_s32(inst, (sljit_s32)src2w);
1779  		}
1780  		else {
1781  			EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src2w);
1782  			if (dst_r != src1)
1783  				EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1784  			inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1785  			FAIL_IF(!inst);
1786  			*inst++ = GROUP_0F;
1787  			*inst = IMUL_r_rm;
1788  		}
1789  #endif
1790  	}
1791  	else {
1792  		/* Neither argument is immediate. */
1793  		if (ADDRESSING_DEPENDS_ON(src2, dst_r))
1794  			dst_r = TMP_REG1;
1795  		EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1796  		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1797  		FAIL_IF(!inst);
1798  		*inst++ = GROUP_0F;
1799  		*inst = IMUL_r_rm;
1800  	}
1801  
1802  	if (dst_r == TMP_REG1)
1803  		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1804  
1805  	return SLJIT_SUCCESS;
1806  }
1807  
1808  static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler, sljit_s32 keep_flags,
1809  	sljit_s32 dst, sljit_sw dstw,
1810  	sljit_s32 src1, sljit_sw src1w,
1811  	sljit_s32 src2, sljit_sw src2w)
1812  {
1813  	sljit_u8* inst;
1814  	sljit_s32 dst_r, done = 0;
1815  
1816  	/* These cases better be left to handled by normal way. */
1817  	if (!keep_flags) {
1818  		if (dst == src1 && dstw == src1w)
1819  			return SLJIT_ERR_UNSUPPORTED;
1820  		if (dst == src2 && dstw == src2w)
1821  			return SLJIT_ERR_UNSUPPORTED;
1822  	}
1823  
1824  	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1825  
1826  	if (FAST_IS_REG(src1)) {
1827  		if (FAST_IS_REG(src2)) {
1828  			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
1829  			FAIL_IF(!inst);
1830  			*inst = LEA_r_m;
1831  			done = 1;
1832  		}
1833  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1834  		if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1835  			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w);
1836  #else
1837  		if (src2 & SLJIT_IMM) {
1838  			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
1839  #endif
1840  			FAIL_IF(!inst);
1841  			*inst = LEA_r_m;
1842  			done = 1;
1843  		}
1844  	}
1845  	else if (FAST_IS_REG(src2)) {
1846  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1847  		if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1848  			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w);
1849  #else
1850  		if (src1 & SLJIT_IMM) {
1851  			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
1852  #endif
1853  			FAIL_IF(!inst);
1854  			*inst = LEA_r_m;
1855  			done = 1;
1856  		}
1857  	}
1858  
1859  	if (done) {
1860  		if (dst_r == TMP_REG1)
1861  			return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
1862  		return SLJIT_SUCCESS;
1863  	}
1864  	return SLJIT_ERR_UNSUPPORTED;
1865  }
1866  
1867  static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,
1868  	sljit_s32 src1, sljit_sw src1w,
1869  	sljit_s32 src2, sljit_sw src2w)
1870  {
1871  	sljit_u8* inst;
1872  
1873  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1874  	if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1875  #else
1876  	if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1877  #endif
1878  		BINARY_EAX_IMM(CMP_EAX_i32, src2w);
1879  		return SLJIT_SUCCESS;
1880  	}
1881  
1882  	if (FAST_IS_REG(src1)) {
1883  		if (src2 & SLJIT_IMM) {
1884  			BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
1885  		}
1886  		else {
1887  			inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1888  			FAIL_IF(!inst);
1889  			*inst = CMP_r_rm;
1890  		}
1891  		return SLJIT_SUCCESS;
1892  	}
1893  
1894  	if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) {
1895  		inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1896  		FAIL_IF(!inst);
1897  		*inst = CMP_rm_r;
1898  		return SLJIT_SUCCESS;
1899  	}
1900  
1901  	if (src2 & SLJIT_IMM) {
1902  		if (src1 & SLJIT_IMM) {
1903  			EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1904  			src1 = TMP_REG1;
1905  			src1w = 0;
1906  		}
1907  		BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
1908  	}
1909  	else {
1910  		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1911  		inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1912  		FAIL_IF(!inst);
1913  		*inst = CMP_r_rm;
1914  	}
1915  	return SLJIT_SUCCESS;
1916  }
1917  
1918  static sljit_s32 emit_test_binary(struct sljit_compiler *compiler,
1919  	sljit_s32 src1, sljit_sw src1w,
1920  	sljit_s32 src2, sljit_sw src2w)
1921  {
1922  	sljit_u8* inst;
1923  
1924  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1925  	if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1926  #else
1927  	if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1928  #endif
1929  		BINARY_EAX_IMM(TEST_EAX_i32, src2w);
1930  		return SLJIT_SUCCESS;
1931  	}
1932  
1933  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1934  	if (src2 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1935  #else
1936  	if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
1937  #endif
1938  		BINARY_EAX_IMM(TEST_EAX_i32, src1w);
1939  		return SLJIT_SUCCESS;
1940  	}
1941  
1942  	if (!(src1 & SLJIT_IMM)) {
1943  		if (src2 & SLJIT_IMM) {
1944  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1945  			if (IS_HALFWORD(src2w) || compiler->mode32) {
1946  				inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
1947  				FAIL_IF(!inst);
1948  				*inst = GROUP_F7;
1949  			}
1950  			else {
1951  				FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1952  				inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, src1w);
1953  				FAIL_IF(!inst);
1954  				*inst = TEST_rm_r;
1955  			}
1956  #else
1957  			inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
1958  			FAIL_IF(!inst);
1959  			*inst = GROUP_F7;
1960  #endif
1961  			return SLJIT_SUCCESS;
1962  		}
1963  		else if (FAST_IS_REG(src1)) {
1964  			inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1965  			FAIL_IF(!inst);
1966  			*inst = TEST_rm_r;
1967  			return SLJIT_SUCCESS;
1968  		}
1969  	}
1970  
1971  	if (!(src2 & SLJIT_IMM)) {
1972  		if (src1 & SLJIT_IMM) {
1973  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1974  			if (IS_HALFWORD(src1w) || compiler->mode32) {
1975  				inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w);
1976  				FAIL_IF(!inst);
1977  				*inst = GROUP_F7;
1978  			}
1979  			else {
1980  				FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
1981  				inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, src2w);
1982  				FAIL_IF(!inst);
1983  				*inst = TEST_rm_r;
1984  			}
1985  #else
1986  			inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w);
1987  			FAIL_IF(!inst);
1988  			*inst = GROUP_F7;
1989  #endif
1990  			return SLJIT_SUCCESS;
1991  		}
1992  		else if (FAST_IS_REG(src2)) {
1993  			inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1994  			FAIL_IF(!inst);
1995  			*inst = TEST_rm_r;
1996  			return SLJIT_SUCCESS;
1997  		}
1998  	}
1999  
2000  	EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2001  	if (src2 & SLJIT_IMM) {
2002  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2003  		if (IS_HALFWORD(src2w) || compiler->mode32) {
2004  			inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
2005  			FAIL_IF(!inst);
2006  			*inst = GROUP_F7;
2007  		}
2008  		else {
2009  			FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
2010  			inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);
2011  			FAIL_IF(!inst);
2012  			*inst = TEST_rm_r;
2013  		}
2014  #else
2015  		inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
2016  		FAIL_IF(!inst);
2017  		*inst = GROUP_F7;
2018  #endif
2019  	}
2020  	else {
2021  		inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2022  		FAIL_IF(!inst);
2023  		*inst = TEST_rm_r;
2024  	}
2025  	return SLJIT_SUCCESS;
2026  }
2027  
2028  static sljit_s32 emit_shift(struct sljit_compiler *compiler,
2029  	sljit_u8 mode,
2030  	sljit_s32 dst, sljit_sw dstw,
2031  	sljit_s32 src1, sljit_sw src1w,
2032  	sljit_s32 src2, sljit_sw src2w)
2033  {
2034  	sljit_u8* inst;
2035  
2036  	if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
2037  		if (dst == src1 && dstw == src1w) {
2038  			inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
2039  			FAIL_IF(!inst);
2040  			*inst |= mode;
2041  			return SLJIT_SUCCESS;
2042  		}
2043  		if (dst == SLJIT_UNUSED) {
2044  			EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2045  			inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2046  			FAIL_IF(!inst);
2047  			*inst |= mode;
2048  			return SLJIT_SUCCESS;
2049  		}
2050  		if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
2051  			EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2052  			inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2053  			FAIL_IF(!inst);
2054  			*inst |= mode;
2055  			EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2056  			return SLJIT_SUCCESS;
2057  		}
2058  		if (FAST_IS_REG(dst)) {
2059  			EMIT_MOV(compiler, dst, 0, src1, src1w);
2060  			inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
2061  			FAIL_IF(!inst);
2062  			*inst |= mode;
2063  			return SLJIT_SUCCESS;
2064  		}
2065  
2066  		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2067  		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2068  		FAIL_IF(!inst);
2069  		*inst |= mode;
2070  		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2071  		return SLJIT_SUCCESS;
2072  	}
2073  
2074  	if (dst == SLJIT_PREF_SHIFT_REG) {
2075  		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2076  		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2077  		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2078  		FAIL_IF(!inst);
2079  		*inst |= mode;
2080  		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2081  	}
2082  	else if (FAST_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
2083  		if (src1 != dst)
2084  			EMIT_MOV(compiler, dst, 0, src1, src1w);
2085  		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2086  		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2087  		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
2088  		FAIL_IF(!inst);
2089  		*inst |= mode;
2090  		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2091  	}
2092  	else {
2093  		/* This case is really difficult, since ecx itself may used for
2094  		   addressing, and we must ensure to work even in that case. */
2095  		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2096  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2097  		EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
2098  #else
2099  		/* [esp+0] contains the flags. */
2100  		EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0);
2101  #endif
2102  		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2103  		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2104  		FAIL_IF(!inst);
2105  		*inst |= mode;
2106  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2107  		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
2108  #else
2109  		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw));
2110  #endif
2111  		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2112  	}
2113  
2114  	return SLJIT_SUCCESS;
2115  }
2116  
2117  static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler,
2118  	sljit_u8 mode, sljit_s32 set_flags,
2119  	sljit_s32 dst, sljit_sw dstw,
2120  	sljit_s32 src1, sljit_sw src1w,
2121  	sljit_s32 src2, sljit_sw src2w)
2122  {
2123  	/* The CPU does not set flags if the shift count is 0. */
2124  	if (src2 & SLJIT_IMM) {
2125  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2126  		if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
2127  			return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2128  #else
2129  		if ((src2w & 0x1f) != 0)
2130  			return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2131  #endif
2132  		if (!set_flags)
2133  			return emit_mov(compiler, dst, dstw, src1, src1w);
2134  		/* OR dst, src, 0 */
2135  		return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
2136  			dst, dstw, src1, src1w, SLJIT_IMM, 0);
2137  	}
2138  
2139  	if (!set_flags)
2140  		return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2141  
2142  	if (!FAST_IS_REG(dst))
2143  		FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
2144  
2145  	FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w));
2146  
2147  	if (FAST_IS_REG(dst))
2148  		return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
2149  	return SLJIT_SUCCESS;
2150  }
2151  
2152  SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2153  	sljit_s32 dst, sljit_sw dstw,
2154  	sljit_s32 src1, sljit_sw src1w,
2155  	sljit_s32 src2, sljit_sw src2w)
2156  {
2157  	CHECK_ERROR();
2158  	CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2159  	ADJUST_LOCAL_OFFSET(dst, dstw);
2160  	ADJUST_LOCAL_OFFSET(src1, src1w);
2161  	ADJUST_LOCAL_OFFSET(src2, src2w);
2162  
2163  	CHECK_EXTRA_REGS(dst, dstw, (void)0);
2164  	CHECK_EXTRA_REGS(src1, src1w, (void)0);
2165  	CHECK_EXTRA_REGS(src2, src2w, (void)0);
2166  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2167  	compiler->mode32 = op & SLJIT_I32_OP;
2168  #endif
2169  
2170  	if (GET_OPCODE(op) >= SLJIT_MUL) {
2171  		if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2172  			compiler->flags_saved = 0;
2173  		else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2174  			FAIL_IF(emit_save_flags(compiler));
2175  	}
2176  
2177  	switch (GET_OPCODE(op)) {
2178  	case SLJIT_ADD:
2179  		if (!GET_FLAGS(op)) {
2180  			if (emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
2181  				return compiler->error;
2182  		}
2183  		else
2184  			compiler->flags_saved = 0;
2185  		if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2186  			FAIL_IF(emit_save_flags(compiler));
2187  		return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
2188  			dst, dstw, src1, src1w, src2, src2w);
2189  	case SLJIT_ADDC:
2190  		if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
2191  			FAIL_IF(emit_restore_flags(compiler, 1));
2192  		else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
2193  			FAIL_IF(emit_save_flags(compiler));
2194  		if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2195  			compiler->flags_saved = 0;
2196  		return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32,
2197  			dst, dstw, src1, src1w, src2, src2w);
2198  	case SLJIT_SUB:
2199  		if (!GET_FLAGS(op)) {
2200  			if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
2201  				return compiler->error;
2202  		}
2203  		else
2204  			compiler->flags_saved = 0;
2205  		if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2206  			FAIL_IF(emit_save_flags(compiler));
2207  		if (dst == SLJIT_UNUSED)
2208  			return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
2209  		return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
2210  			dst, dstw, src1, src1w, src2, src2w);
2211  	case SLJIT_SUBC:
2212  		if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
2213  			FAIL_IF(emit_restore_flags(compiler, 1));
2214  		else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
2215  			FAIL_IF(emit_save_flags(compiler));
2216  		if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2217  			compiler->flags_saved = 0;
2218  		return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32,
2219  			dst, dstw, src1, src1w, src2, src2w);
2220  	case SLJIT_MUL:
2221  		return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
2222  	case SLJIT_AND:
2223  		if (dst == SLJIT_UNUSED)
2224  			return emit_test_binary(compiler, src1, src1w, src2, src2w);
2225  		return emit_cum_binary(compiler, AND_r_rm, AND_rm_r, AND, AND_EAX_i32,
2226  			dst, dstw, src1, src1w, src2, src2w);
2227  	case SLJIT_OR:
2228  		return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
2229  			dst, dstw, src1, src1w, src2, src2w);
2230  	case SLJIT_XOR:
2231  		return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32,
2232  			dst, dstw, src1, src1w, src2, src2w);
2233  	case SLJIT_SHL:
2234  		return emit_shift_with_flags(compiler, SHL, GET_FLAGS(op),
2235  			dst, dstw, src1, src1w, src2, src2w);
2236  	case SLJIT_LSHR:
2237  		return emit_shift_with_flags(compiler, SHR, GET_FLAGS(op),
2238  			dst, dstw, src1, src1w, src2, src2w);
2239  	case SLJIT_ASHR:
2240  		return emit_shift_with_flags(compiler, SAR, GET_FLAGS(op),
2241  			dst, dstw, src1, src1w, src2, src2w);
2242  	}
2243  
2244  	return SLJIT_SUCCESS;
2245  }
2246  
2247  SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
2248  {
2249  	CHECK_REG_INDEX(check_sljit_get_register_index(reg));
2250  #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2251  	if (reg >= SLJIT_R3 && reg <= SLJIT_R6)
2252  		return -1;
2253  #endif
2254  	return reg_map[reg];
2255  }
2256  
2257  SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
2258  {
2259  	CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
2260  	return reg;
2261  }
2262  
2263  SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2264  	void *instruction, sljit_s32 size)
2265  {
2266  	sljit_u8 *inst;
2267  
2268  	CHECK_ERROR();
2269  	CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2270  
2271  	inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
2272  	FAIL_IF(!inst);
2273  	INC_SIZE(size);
2274  	SLJIT_MEMCPY(inst, instruction, size);
2275  	return SLJIT_SUCCESS;
2276  }
2277  
2278  /* --------------------------------------------------------------------- */
2279  /*  Floating point operators                                             */
2280  /* --------------------------------------------------------------------- */
2281  
2282  /* Alignment + 2 * 16 bytes. */
2283  static sljit_s32 sse2_data[3 + (4 + 4) * 2];
2284  static sljit_s32 *sse2_buffer;
2285  
2286  static void init_compiler(void)
2287  {
2288  	sse2_buffer = (sljit_s32*)(((sljit_uw)sse2_data + 15) & ~0xf);
2289  	/* Single precision constants. */
2290  	sse2_buffer[0] = 0x80000000;
2291  	sse2_buffer[4] = 0x7fffffff;
2292  	/* Double precision constants. */
2293  	sse2_buffer[8] = 0;
2294  	sse2_buffer[9] = 0x80000000;
2295  	sse2_buffer[12] = 0xffffffff;
2296  	sse2_buffer[13] = 0x7fffffff;
2297  }
2298  
2299  SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
2300  {
2301  #ifdef SLJIT_IS_FPU_AVAILABLE
2302  	return SLJIT_IS_FPU_AVAILABLE;
2303  #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
2304  	if (cpu_has_sse2 == -1)
2305  		get_cpu_features();
2306  	return cpu_has_sse2;
2307  #else /* SLJIT_DETECT_SSE2 */
2308  	return 1;
2309  #endif /* SLJIT_DETECT_SSE2 */
2310  }
2311  
2312  static sljit_s32 emit_sse2(struct sljit_compiler *compiler, sljit_u8 opcode,
2313  	sljit_s32 single, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
2314  {
2315  	sljit_u8 *inst;
2316  
2317  	inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2318  	FAIL_IF(!inst);
2319  	*inst++ = GROUP_0F;
2320  	*inst = opcode;
2321  	return SLJIT_SUCCESS;
2322  }
2323  
2324  static sljit_s32 emit_sse2_logic(struct sljit_compiler *compiler, sljit_u8 opcode,
2325  	sljit_s32 pref66, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
2326  {
2327  	sljit_u8 *inst;
2328  
2329  	inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2330  	FAIL_IF(!inst);
2331  	*inst++ = GROUP_0F;
2332  	*inst = opcode;
2333  	return SLJIT_SUCCESS;
2334  }
2335  
2336  static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
2337  	sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
2338  {
2339  	return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw);
2340  }
2341  
2342  static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
2343  	sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src)
2344  {
2345  	return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
2346  }
2347  
2348  static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
2349  	sljit_s32 dst, sljit_sw dstw,
2350  	sljit_s32 src, sljit_sw srcw)
2351  {
2352  	sljit_s32 dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
2353  	sljit_u8 *inst;
2354  
2355  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2356  	if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)
2357  		compiler->mode32 = 0;
2358  #endif
2359  
2360  	inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw);
2361  	FAIL_IF(!inst);
2362  	*inst++ = GROUP_0F;
2363  	*inst = CVTTSD2SI_r_xm;
2364  
2365  	if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED)
2366  		return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2367  	return SLJIT_SUCCESS;
2368  }
2369  
2370  static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
2371  	sljit_s32 dst, sljit_sw dstw,
2372  	sljit_s32 src, sljit_sw srcw)
2373  {
2374  	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2375  	sljit_u8 *inst;
2376  
2377  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2378  	if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
2379  		compiler->mode32 = 0;
2380  #endif
2381  
2382  	if (src & SLJIT_IMM) {
2383  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2384  		if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
2385  			srcw = (sljit_s32)srcw;
2386  #endif
2387  		EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
2388  		src = TMP_REG1;
2389  		srcw = 0;
2390  	}
2391  
2392  	inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw);
2393  	FAIL_IF(!inst);
2394  	*inst++ = GROUP_0F;
2395  	*inst = CVTSI2SD_x_rm;
2396  
2397  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2398  	compiler->mode32 = 1;
2399  #endif
2400  	if (dst_r == TMP_FREG)
2401  		return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2402  	return SLJIT_SUCCESS;
2403  }
2404  
2405  static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
2406  	sljit_s32 src1, sljit_sw src1w,
2407  	sljit_s32 src2, sljit_sw src2w)
2408  {
2409  	compiler->flags_saved = 0;
2410  	if (!FAST_IS_REG(src1)) {
2411  		FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
2412  		src1 = TMP_FREG;
2413  	}
2414  	return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_F32_OP), src1, src2, src2w);
2415  }
2416  
2417  SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2418  	sljit_s32 dst, sljit_sw dstw,
2419  	sljit_s32 src, sljit_sw srcw)
2420  {
2421  	sljit_s32 dst_r;
2422  
2423  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2424  	compiler->mode32 = 1;
2425  #endif
2426  
2427  	CHECK_ERROR();
2428  	SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2429  
2430  	if (GET_OPCODE(op) == SLJIT_MOV_F64) {
2431  		if (FAST_IS_REG(dst))
2432  			return emit_sse2_load(compiler, op & SLJIT_F32_OP, dst, src, srcw);
2433  		if (FAST_IS_REG(src))
2434  			return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, src);
2435  		FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src, srcw));
2436  		return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2437  	}
2438  
2439  	if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) {
2440  		dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2441  		if (FAST_IS_REG(src)) {
2442  			/* We overwrite the high bits of source. From SLJIT point of view,
2443  			   this is not an issue.
2444  			   Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
2445  			FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_F32_OP, src, src, 0));
2446  		}
2447  		else {
2448  			FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_F32_OP), TMP_FREG, src, srcw));
2449  			src = TMP_FREG;
2450  		}
2451  
2452  		FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_F32_OP, dst_r, src, 0));
2453  		if (dst_r == TMP_FREG)
2454  			return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2455  		return SLJIT_SUCCESS;
2456  	}
2457  
2458  	if (SLOW_IS_REG(dst)) {
2459  		dst_r = dst;
2460  		if (dst != src)
2461  			FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw));
2462  	}
2463  	else {
2464  		dst_r = TMP_FREG;
2465  		FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw));
2466  	}
2467  
2468  	switch (GET_OPCODE(op)) {
2469  	case SLJIT_NEG_F64:
2470  		FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer : sse2_buffer + 8)));
2471  		break;
2472  
2473  	case SLJIT_ABS_F64:
2474  		FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer + 4 : sse2_buffer + 12)));
2475  		break;
2476  	}
2477  
2478  	if (dst_r == TMP_FREG)
2479  		return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2480  	return SLJIT_SUCCESS;
2481  }
2482  
2483  SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2484  	sljit_s32 dst, sljit_sw dstw,
2485  	sljit_s32 src1, sljit_sw src1w,
2486  	sljit_s32 src2, sljit_sw src2w)
2487  {
2488  	sljit_s32 dst_r;
2489  
2490  	CHECK_ERROR();
2491  	CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2492  	ADJUST_LOCAL_OFFSET(dst, dstw);
2493  	ADJUST_LOCAL_OFFSET(src1, src1w);
2494  	ADJUST_LOCAL_OFFSET(src2, src2w);
2495  
2496  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2497  	compiler->mode32 = 1;
2498  #endif
2499  
2500  	if (FAST_IS_REG(dst)) {
2501  		dst_r = dst;
2502  		if (dst == src1)
2503  			; /* Do nothing here. */
2504  		else if (dst == src2 && (op == SLJIT_ADD_F64 || op == SLJIT_MUL_F64)) {
2505  			/* Swap arguments. */
2506  			src2 = src1;
2507  			src2w = src1w;
2508  		}
2509  		else if (dst != src2)
2510  			FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src1, src1w));
2511  		else {
2512  			dst_r = TMP_FREG;
2513  			FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
2514  		}
2515  	}
2516  	else {
2517  		dst_r = TMP_FREG;
2518  		FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
2519  	}
2520  
2521  	switch (GET_OPCODE(op)) {
2522  	case SLJIT_ADD_F64:
2523  		FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2524  		break;
2525  
2526  	case SLJIT_SUB_F64:
2527  		FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2528  		break;
2529  
2530  	case SLJIT_MUL_F64:
2531  		FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2532  		break;
2533  
2534  	case SLJIT_DIV_F64:
2535  		FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2536  		break;
2537  	}
2538  
2539  	if (dst_r == TMP_FREG)
2540  		return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2541  	return SLJIT_SUCCESS;
2542  }
2543  
2544  /* --------------------------------------------------------------------- */
2545  /*  Conditional instructions                                             */
2546  /* --------------------------------------------------------------------- */
2547  
2548  SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2549  {
2550  	sljit_u8 *inst;
2551  	struct sljit_label *label;
2552  
2553  	CHECK_ERROR_PTR();
2554  	CHECK_PTR(check_sljit_emit_label(compiler));
2555  
2556  	/* We should restore the flags before the label,
2557  	   since other taken jumps has their own flags as well. */
2558  	if (SLJIT_UNLIKELY(compiler->flags_saved))
2559  		PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2560  
2561  	if (compiler->last_label && compiler->last_label->size == compiler->size)
2562  		return compiler->last_label;
2563  
2564  	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2565  	PTR_FAIL_IF(!label);
2566  	set_label(label, compiler);
2567  
2568  	inst = (sljit_u8*)ensure_buf(compiler, 2);
2569  	PTR_FAIL_IF(!inst);
2570  
2571  	*inst++ = 0;
2572  	*inst++ = 0;
2573  
2574  	return label;
2575  }
2576  
2577  SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2578  {
2579  	sljit_u8 *inst;
2580  	struct sljit_jump *jump;
2581  
2582  	CHECK_ERROR_PTR();
2583  	CHECK_PTR(check_sljit_emit_jump(compiler, type));
2584  
2585  	if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2586  		if ((type & 0xff) <= SLJIT_JUMP)
2587  			PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2588  		compiler->flags_saved = 0;
2589  	}
2590  
2591  	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2592  	PTR_FAIL_IF_NULL(jump);
2593  	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2594  	type &= 0xff;
2595  
2596  	if (type >= SLJIT_CALL1)
2597  		PTR_FAIL_IF(call_with_args(compiler, type));
2598  
2599  	/* Worst case size. */
2600  #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2601  	compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
2602  #else
2603  	compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
2604  #endif
2605  
2606  	inst = (sljit_u8*)ensure_buf(compiler, 2);
2607  	PTR_FAIL_IF_NULL(inst);
2608  
2609  	*inst++ = 0;
2610  	*inst++ = type + 4;
2611  	return jump;
2612  }
2613  
2614  SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
2615  {
2616  	sljit_u8 *inst;
2617  	struct sljit_jump *jump;
2618  
2619  	CHECK_ERROR();
2620  	CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2621  	ADJUST_LOCAL_OFFSET(src, srcw);
2622  
2623  	CHECK_EXTRA_REGS(src, srcw, (void)0);
2624  
2625  	if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2626  		if (type <= SLJIT_JUMP)
2627  			FAIL_IF(emit_restore_flags(compiler, 0));
2628  		compiler->flags_saved = 0;
2629  	}
2630  
2631  	if (type >= SLJIT_CALL1) {
2632  #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2633  #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
2634  		if (src == SLJIT_R2) {
2635  			EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
2636  			src = TMP_REG1;
2637  		}
2638  		if (src == SLJIT_MEM1(SLJIT_SP) && type >= SLJIT_CALL3)
2639  			srcw += sizeof(sljit_sw);
2640  #endif
2641  #endif
2642  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
2643  		if (src == SLJIT_R2) {
2644  			EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
2645  			src = TMP_REG1;
2646  		}
2647  #endif
2648  		FAIL_IF(call_with_args(compiler, type));
2649  	}
2650  
2651  	if (src == SLJIT_IMM) {
2652  		jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2653  		FAIL_IF_NULL(jump);
2654  		set_jump(jump, compiler, JUMP_ADDR);
2655  		jump->u.target = srcw;
2656  
2657  		/* Worst case size. */
2658  #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2659  		compiler->size += 5;
2660  #else
2661  		compiler->size += 10 + 3;
2662  #endif
2663  
2664  		inst = (sljit_u8*)ensure_buf(compiler, 2);
2665  		FAIL_IF_NULL(inst);
2666  
2667  		*inst++ = 0;
2668  		*inst++ = type + 4;
2669  	}
2670  	else {
2671  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2672  		/* REX_W is not necessary (src is not immediate). */
2673  		compiler->mode32 = 1;
2674  #endif
2675  		inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2676  		FAIL_IF(!inst);
2677  		*inst++ = GROUP_FF;
2678  		*inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm;
2679  	}
2680  	return SLJIT_SUCCESS;
2681  }
2682  
2683  SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
2684  	sljit_s32 dst, sljit_sw dstw,
2685  	sljit_s32 src, sljit_sw srcw,
2686  	sljit_s32 type)
2687  {
2688  	sljit_u8 *inst;
2689  	sljit_u8 cond_set = 0;
2690  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2691  	sljit_s32 reg;
2692  #else
2693  	/* CHECK_EXTRA_REGS migh overwrite these values. */
2694  	sljit_s32 dst_save = dst;
2695  	sljit_sw dstw_save = dstw;
2696  #endif
2697  
2698  	CHECK_ERROR();
2699  	CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
2700  	SLJIT_UNUSED_ARG(srcw);
2701  
2702  	if (dst == SLJIT_UNUSED)
2703  		return SLJIT_SUCCESS;
2704  
2705  	ADJUST_LOCAL_OFFSET(dst, dstw);
2706  	CHECK_EXTRA_REGS(dst, dstw, (void)0);
2707  	if (SLJIT_UNLIKELY(compiler->flags_saved))
2708  		FAIL_IF(emit_restore_flags(compiler, op & SLJIT_KEEP_FLAGS));
2709  
2710  	type &= 0xff;
2711  	/* setcc = jcc + 0x10. */
2712  	cond_set = get_jump_code(type) + 0x10;
2713  
2714  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2715  	if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src) {
2716  		inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 3);
2717  		FAIL_IF(!inst);
2718  		INC_SIZE(4 + 3);
2719  		/* Set low register to conditional flag. */
2720  		*inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
2721  		*inst++ = GROUP_0F;
2722  		*inst++ = cond_set;
2723  		*inst++ = MOD_REG | reg_lmap[TMP_REG1];
2724  		*inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B);
2725  		*inst++ = OR_rm8_r8;
2726  		*inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst];
2727  		return SLJIT_SUCCESS;
2728  	}
2729  
2730  	reg = (op == SLJIT_MOV && FAST_IS_REG(dst)) ? dst : TMP_REG1;
2731  
2732  	inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 4);
2733  	FAIL_IF(!inst);
2734  	INC_SIZE(4 + 4);
2735  	/* Set low register to conditional flag. */
2736  	*inst++ = (reg_map[reg] <= 7) ? REX : REX_B;
2737  	*inst++ = GROUP_0F;
2738  	*inst++ = cond_set;
2739  	*inst++ = MOD_REG | reg_lmap[reg];
2740  	*inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
2741  	*inst++ = GROUP_0F;
2742  	*inst++ = MOVZX_r_rm8;
2743  	*inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg];
2744  
2745  	if (reg != TMP_REG1)
2746  		return SLJIT_SUCCESS;
2747  
2748  	if (GET_OPCODE(op) < SLJIT_ADD) {
2749  		compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
2750  		return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2751  	}
2752  #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2753  		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2754  	compiler->skip_checks = 1;
2755  #endif
2756  	return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REG1, 0);
2757  #else /* SLJIT_CONFIG_X86_64 */
2758  	if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) {
2759  		if (reg_map[dst] <= 4) {
2760  			/* Low byte is accessible. */
2761  			inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3);
2762  			FAIL_IF(!inst);
2763  			INC_SIZE(3 + 3);
2764  			/* Set low byte to conditional flag. */
2765  			*inst++ = GROUP_0F;
2766  			*inst++ = cond_set;
2767  			*inst++ = MOD_REG | reg_map[dst];
2768  
2769  			*inst++ = GROUP_0F;
2770  			*inst++ = MOVZX_r_rm8;
2771  			*inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst];
2772  			return SLJIT_SUCCESS;
2773  		}
2774  
2775  		/* Low byte is not accessible. */
2776  		if (cpu_has_cmov == -1)
2777  			get_cpu_features();
2778  
2779  		if (cpu_has_cmov) {
2780  			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
2781  			/* a xor reg, reg operation would overwrite the flags. */
2782  			EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);
2783  
2784  			inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
2785  			FAIL_IF(!inst);
2786  			INC_SIZE(3);
2787  
2788  			*inst++ = GROUP_0F;
2789  			/* cmovcc = setcc - 0x50. */
2790  			*inst++ = cond_set - 0x50;
2791  			*inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1];
2792  			return SLJIT_SUCCESS;
2793  		}
2794  
2795  		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2796  		FAIL_IF(!inst);
2797  		INC_SIZE(1 + 3 + 3 + 1);
2798  		*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2799  		/* Set al to conditional flag. */
2800  		*inst++ = GROUP_0F;
2801  		*inst++ = cond_set;
2802  		*inst++ = MOD_REG | 0 /* eax */;
2803  
2804  		*inst++ = GROUP_0F;
2805  		*inst++ = MOVZX_r_rm8;
2806  		*inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */;
2807  		*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2808  		return SLJIT_SUCCESS;
2809  	}
2810  
2811  	if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src && reg_map[dst] <= 4) {
2812  		SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] == 0, scratch_reg1_must_be_eax);
2813  		if (dst != SLJIT_R0) {
2814  			inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
2815  			FAIL_IF(!inst);
2816  			INC_SIZE(1 + 3 + 2 + 1);
2817  			/* Set low register to conditional flag. */
2818  			*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2819  			*inst++ = GROUP_0F;
2820  			*inst++ = cond_set;
2821  			*inst++ = MOD_REG | 0 /* eax */;
2822  			*inst++ = OR_rm8_r8;
2823  			*inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst];
2824  			*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2825  		}
2826  		else {
2827  			inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2);
2828  			FAIL_IF(!inst);
2829  			INC_SIZE(2 + 3 + 2 + 2);
2830  			/* Set low register to conditional flag. */
2831  			*inst++ = XCHG_r_rm;
2832  			*inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
2833  			*inst++ = GROUP_0F;
2834  			*inst++ = cond_set;
2835  			*inst++ = MOD_REG | 1 /* ecx */;
2836  			*inst++ = OR_rm8_r8;
2837  			*inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */;
2838  			*inst++ = XCHG_r_rm;
2839  			*inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
2840  		}
2841  		return SLJIT_SUCCESS;
2842  	}
2843  
2844  	/* Set TMP_REG1 to the bit. */
2845  	inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2846  	FAIL_IF(!inst);
2847  	INC_SIZE(1 + 3 + 3 + 1);
2848  	*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2849  	/* Set al to conditional flag. */
2850  	*inst++ = GROUP_0F;
2851  	*inst++ = cond_set;
2852  	*inst++ = MOD_REG | 0 /* eax */;
2853  
2854  	*inst++ = GROUP_0F;
2855  	*inst++ = MOVZX_r_rm8;
2856  	*inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */;
2857  
2858  	*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2859  
2860  	if (GET_OPCODE(op) < SLJIT_ADD)
2861  		return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2862  
2863  #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2864  		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2865  	compiler->skip_checks = 1;
2866  #endif
2867  	return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
2868  #endif /* SLJIT_CONFIG_X86_64 */
2869  }
2870  
2871  SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
2872  {
2873  	CHECK_ERROR();
2874  	CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
2875  	ADJUST_LOCAL_OFFSET(dst, dstw);
2876  
2877  	CHECK_EXTRA_REGS(dst, dstw, (void)0);
2878  
2879  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2880  	compiler->mode32 = 0;
2881  #endif
2882  
2883  	ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
2884  
2885  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2886  	if (NOT_HALFWORD(offset)) {
2887  		FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
2888  #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
2889  		SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
2890  		return compiler->error;
2891  #else
2892  		return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
2893  #endif
2894  	}
2895  #endif
2896  
2897  	if (offset != 0)
2898  		return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
2899  	return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
2900  }
2901  
2902  SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
2903  {
2904  	sljit_u8 *inst;
2905  	struct sljit_const *const_;
2906  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2907  	sljit_s32 reg;
2908  #endif
2909  
2910  	CHECK_ERROR_PTR();
2911  	CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
2912  	ADJUST_LOCAL_OFFSET(dst, dstw);
2913  
2914  	CHECK_EXTRA_REGS(dst, dstw, (void)0);
2915  
2916  	const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2917  	PTR_FAIL_IF(!const_);
2918  	set_const(const_, compiler);
2919  
2920  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2921  	compiler->mode32 = 0;
2922  	reg = SLOW_IS_REG(dst) ? dst : TMP_REG1;
2923  
2924  	if (emit_load_imm64(compiler, reg, init_value))
2925  		return NULL;
2926  #else
2927  	if (dst == SLJIT_UNUSED)
2928  		dst = TMP_REG1;
2929  
2930  	if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
2931  		return NULL;
2932  #endif
2933  
2934  	inst = (sljit_u8*)ensure_buf(compiler, 2);
2935  	PTR_FAIL_IF(!inst);
2936  
2937  	*inst++ = 0;
2938  	*inst++ = 1;
2939  
2940  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2941  	if (dst & SLJIT_MEM)
2942  		if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
2943  			return NULL;
2944  #endif
2945  
2946  	return const_;
2947  }
2948  
2949  SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
2950  {
2951  #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2952  	sljit_unaligned_store_sw((void*)addr, new_addr - (addr + 4));
2953  #else
2954  	sljit_unaligned_store_sw((void*)addr, (sljit_sw) new_addr);
2955  #endif
2956  }
2957  
2958  SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
2959  {
2960  	sljit_unaligned_store_sw((void*)addr, new_constant);
2961  }
2962  
2963  SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_sse2_available(void)
2964  {
2965  #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
2966  	if (cpu_has_sse2 == -1)
2967  		get_cpu_features();
2968  	return cpu_has_sse2;
2969  #else
2970  	return 1;
2971  #endif
2972  }
2973  
2974  SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_cmov_available(void)
2975  {
2976  	if (cpu_has_cmov == -1)
2977  		get_cpu_features();
2978  	return cpu_has_cmov;
2979  }
2980  
2981  SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_emit_cmov(struct sljit_compiler *compiler,
2982  	sljit_s32 type,
2983  	sljit_s32 dst_reg,
2984  	sljit_s32 src, sljit_sw srcw)
2985  {
2986  	sljit_u8* inst;
2987  
2988  	CHECK_ERROR();
2989  #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2990  	CHECK_ARGUMENT(sljit_x86_is_cmov_available());
2991  	CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_I32_OP)));
2992  	CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_ORDERED_F64);
2993  	CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_I32_OP));
2994  	FUNCTION_CHECK_SRC(src, srcw);
2995  #endif
2996  #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
2997  	if (SLJIT_UNLIKELY(!!compiler->verbose)) {
2998  		fprintf(compiler->verbose, "  x86_cmov%s %s%s, ",
2999  			!(dst_reg & SLJIT_I32_OP) ? "" : ".i",
3000  			jump_names[type & 0xff], JUMP_POSTFIX(type));
3001  		sljit_verbose_reg(compiler, dst_reg & ~SLJIT_I32_OP);
3002  		fprintf(compiler->verbose, ", ");
3003  		sljit_verbose_param(compiler, src, srcw);
3004  		fprintf(compiler->verbose, "\n");
3005  	}
3006  #endif
3007  
3008  	ADJUST_LOCAL_OFFSET(src, srcw);
3009  	CHECK_EXTRA_REGS(src, srcw, (void)0);
3010  
3011  #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3012  	compiler->mode32 = dst_reg & SLJIT_I32_OP;
3013  #endif
3014  	dst_reg &= ~SLJIT_I32_OP;
3015  
3016  	if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
3017  		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
3018  		src = TMP_REG1;
3019  		srcw = 0;
3020  	}
3021  
3022  	inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw);
3023  	FAIL_IF(!inst);
3024  	*inst++ = GROUP_0F;
3025  	*inst = get_jump_code(type & 0xff) - 0x40;
3026  	return SLJIT_SUCCESS;
3027  }
3028