1 /**************************************************************************
2  *
3  * Copyright (C) 1999-2005  Brian Paul   All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included
13  * in all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21  * OTHER DEALINGS IN THE SOFTWARE.
22  *
23  **************************************************************************/
24 
25 #ifndef _RTASM_X86SSE_H_
26 #define _RTASM_X86SSE_H_
27 
28 #include "pipe/p_compiler.h"
29 #include "pipe/p_config.h"
30 
31 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
32 
33 /* It is up to the caller to ensure that instructions issued are
34  * suitable for the host cpu.  There are no checks made in this module
35  * for mmx/sse/sse2 support on the cpu.
36  */
37 struct x86_reg {
38    unsigned file:2;
39    unsigned idx:4;
40    unsigned mod:2;		/* mod_REG if this is just a register */
41    int      disp:24;		/* only +/- 23bits of offset - should be enough... */
42 };
43 
44 #define X86_MMX 1
45 #define X86_MMX2 2
46 #define X86_SSE 4
47 #define X86_SSE2 8
48 #define X86_SSE3 0x10
49 #define X86_SSE4_1 0x20
50 
51 struct x86_function {
52    unsigned caps;
53    unsigned size;
54    unsigned char *store;
55    unsigned char *csr;
56 
57    unsigned stack_offset:16;
58    unsigned need_emms:8;
59    int x87_stack:8;
60 
61    unsigned char error_overflow[4];
62 };
63 
64 enum x86_reg_file {
65    file_REG32,
66    file_MMX,
67    file_XMM,
68    file_x87
69 };
70 
71 /* Values for mod field of modr/m byte
72  */
73 enum x86_reg_mod {
74    mod_INDIRECT,
75    mod_DISP8,
76    mod_DISP32,
77    mod_REG
78 };
79 
80 enum x86_reg_name {
81    reg_AX,
82    reg_CX,
83    reg_DX,
84    reg_BX,
85    reg_SP,
86    reg_BP,
87    reg_SI,
88    reg_DI,
89    reg_R8,
90    reg_R9,
91    reg_R10,
92    reg_R11,
93    reg_R12,
94    reg_R13,
95    reg_R14,
96    reg_R15
97 };
98 
99 
100 enum x86_cc {
101    cc_O,			/* overflow */
102    cc_NO,			/* not overflow */
103    cc_NAE,			/* not above or equal / carry */
104    cc_AE,			/* above or equal / not carry */
105    cc_E,			/* equal / zero */
106    cc_NE			/* not equal / not zero */
107 };
108 
109 enum sse_cc {
110    cc_Equal,
111    cc_LessThan,
112    cc_LessThanEqual,
113    cc_Unordered,
114    cc_NotEqual,
115    cc_NotLessThan,
116    cc_NotLessThanEqual,
117    cc_Ordered
118 };
119 
120 #define cc_Z  cc_E
121 #define cc_NZ cc_NE
122 
123 
124 /** generic pointer to function */
125 typedef void (*x86_func)(void);
126 
127 
128 /* Begin/end/retrieve function creation:
129  */
130 
131 enum x86_target
132 {
133    X86_32,
134    X86_64_STD_ABI,
135    X86_64_WIN64_ABI
136 };
137 
138 /* make this read a member of x86_function if target != host is desired */
x86_target(struct x86_function * p)139 static inline enum x86_target x86_target( struct x86_function* p )
140 {
141 #ifdef PIPE_ARCH_X86
142    return X86_32;
143 #elif (defined(PIPE_OS_CYGWIN) || defined(PIPE_OS_WINDOWS)) && defined(PIPE_ARCH_X86_64)
144    return X86_64_WIN64_ABI;
145 #elif defined(PIPE_ARCH_X86_64)
146    return X86_64_STD_ABI;
147 #endif
148 }
149 
x86_target_caps(struct x86_function * p)150 static inline unsigned x86_target_caps( struct x86_function* p )
151 {
152    return p->caps;
153 }
154 
155 void x86_init_func( struct x86_function *p );
156 void x86_init_func_size( struct x86_function *p, unsigned code_size );
157 void x86_release_func( struct x86_function *p );
158 x86_func x86_get_func( struct x86_function *p );
159 
160 /* Debugging:
161  */
162 void x86_print_reg( struct x86_reg reg );
163 
164 
165 /* Create and manipulate registers and regmem values:
166  */
167 struct x86_reg x86_make_reg( enum x86_reg_file file,
168 			     enum x86_reg_name idx );
169 
170 struct x86_reg x86_make_disp( struct x86_reg reg,
171 			      int disp );
172 
173 struct x86_reg x86_deref( struct x86_reg reg );
174 
175 struct x86_reg x86_get_base_reg( struct x86_reg reg );
176 
177 
178 /* Labels, jumps and fixup:
179  */
180 int x86_get_label( struct x86_function *p );
181 
182 void x64_rexw(struct x86_function *p);
183 
184 void x86_jcc( struct x86_function *p,
185 	      enum x86_cc cc,
186 	      int label );
187 
188 int x86_jcc_forward( struct x86_function *p,
189 			  enum x86_cc cc );
190 
191 int x86_jmp_forward( struct x86_function *p);
192 
193 int x86_call_forward( struct x86_function *p);
194 
195 void x86_fixup_fwd_jump( struct x86_function *p,
196 			 int fixup );
197 
198 void x86_jmp( struct x86_function *p, int label );
199 
200 /* void x86_call( struct x86_function *p, void (*label)() ); */
201 void x86_call( struct x86_function *p, struct x86_reg reg);
202 
203 void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm );
204 void x86_add_imm( struct x86_function *p, struct x86_reg dst, int imm );
205 void x86_or_imm( struct x86_function *p, struct x86_reg dst, int imm );
206 void x86_and_imm( struct x86_function *p, struct x86_reg dst, int imm );
207 void x86_sub_imm( struct x86_function *p, struct x86_reg dst, int imm );
208 void x86_xor_imm( struct x86_function *p, struct x86_reg dst, int imm );
209 void x86_cmp_imm( struct x86_function *p, struct x86_reg dst, int imm );
210 
211 
212 /* Macro for sse_shufps() and sse2_pshufd():
213  */
214 #define SHUF(_x,_y,_z,_w)       (((_x)<<0) | ((_y)<<2) | ((_z)<<4) | ((_w)<<6))
215 #define SHUF_NOOP               RSW(0,1,2,3)
216 #define GET_SHUF(swz, idx)      (((swz) >> ((idx)*2)) & 0x3)
217 
218 void mmx_emms( struct x86_function *p );
219 void mmx_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
220 void mmx_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
221 void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
222 void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
223 
224 void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
225 void sse2_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
226 void sse2_movdqu( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
227 void sse2_movdqa( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
228 void sse2_movsd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
229 void sse2_movupd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
230 void sse2_movapd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
231 
232 void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
233 void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
234 void sse2_cvtdq2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
235 void sse2_cvtsd2ss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
236 void sse2_cvtpd2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
237 
238 void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
239 void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
240 void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
241 void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
242 void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
243                   unsigned char shuf );
244 void sse2_pshuflw( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
245                   unsigned char shuf );
246 void sse2_pshufhw( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
247                   unsigned char shuf );
248 void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
249 void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
250 
251 void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
252 void sse2_punpcklwd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
253 void sse2_punpckldq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
254 void sse2_punpcklqdq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
255 
256 void sse2_psllw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
257 void sse2_pslld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
258 void sse2_psllq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
259 
260 void sse2_psrlw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
261 void sse2_psrld_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
262 void sse2_psrlq_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
263 
264 void sse2_psraw_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
265 void sse2_psrad_imm( struct x86_function *p, struct x86_reg dst, unsigned imm );
266 
267 void sse2_por( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
268 
269 void sse2_pshuflw( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm );
270 void sse2_pshufhw( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm );
271 void sse2_pshufd( struct x86_function *p, struct x86_reg dst, struct x86_reg src, uint8_t imm );
272 
273 void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr);
274 void sse_prefetch0( struct x86_function *p, struct x86_reg ptr);
275 void sse_prefetch1( struct x86_function *p, struct x86_reg ptr);
276 
277 void sse_movntps( struct x86_function *p, struct x86_reg dst, struct x86_reg src);
278 
279 void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
280 void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
281 void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
282 void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
283 void sse_andnps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
284 void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
285 void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src,
286                 enum sse_cc cc );
287 void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
288 void sse_maxss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
289 void sse_minps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
290 void sse_movaps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
291 void sse_movhlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
292 void sse_movhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
293 void sse_movlhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
294 void sse_movlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
295 void sse_movss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
296 void sse_movups( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
297 void sse_mulps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
298 void sse_mulss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
299 void sse_orps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
300 void sse_xorps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
301 void sse_subps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
302 void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
303 void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
304 void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
305                  unsigned char shuf );
306 void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
307 void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
308 void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src );
309 void sse_movmskps( struct x86_function *p, struct x86_reg dst, struct x86_reg src);
310 
311 void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
312 void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
313 void x86_cmovcc( struct x86_function *p, struct x86_reg dst, struct x86_reg src, enum x86_cc cc );
314 void x86_cmp( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
315 void x86_dec( struct x86_function *p, struct x86_reg reg );
316 void x86_inc( struct x86_function *p, struct x86_reg reg );
317 void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
318 void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
319 void x64_mov64( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
320 void x86_mov8( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
321 void x86_mov16( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
322 void x86_movzx8(struct x86_function *p, struct x86_reg dst, struct x86_reg src );
323 void x86_movzx16(struct x86_function *p, struct x86_reg dst, struct x86_reg src );
324 void x86_mov_imm(struct x86_function *p, struct x86_reg dst, int imm );
325 void x86_mov8_imm(struct x86_function *p, struct x86_reg dst, uint8_t imm );
326 void x86_mov16_imm(struct x86_function *p, struct x86_reg dst, uint16_t imm );
327 void x86_mul( struct x86_function *p, struct x86_reg src );
328 void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
329 void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
330 void x86_pop( struct x86_function *p, struct x86_reg reg );
331 void x86_push( struct x86_function *p, struct x86_reg reg );
332 void x86_push_imm32( struct x86_function *p, int imm );
333 void x86_ret( struct x86_function *p );
334 void x86_retw( struct x86_function *p, unsigned short imm );
335 void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
336 void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
337 void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
338 void x86_sahf( struct x86_function *p );
339 void x86_div( struct x86_function *p, struct x86_reg src );
340 void x86_bswap( struct x86_function *p, struct x86_reg src );
341 void x86_shr_imm( struct x86_function *p, struct x86_reg reg, unsigned imm );
342 void x86_sar_imm( struct x86_function *p, struct x86_reg reg, unsigned imm );
343 void x86_shl_imm( struct x86_function *p, struct x86_reg reg, unsigned imm  );
344 
345 void x86_cdecl_caller_push_regs( struct x86_function *p );
346 void x86_cdecl_caller_pop_regs( struct x86_function *p );
347 
348 void x87_assert_stack_empty( struct x86_function *p );
349 
350 void x87_f2xm1( struct x86_function *p );
351 void x87_fabs( struct x86_function *p );
352 void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
353 void x87_faddp( struct x86_function *p, struct x86_reg dst );
354 void x87_fchs( struct x86_function *p );
355 void x87_fclex( struct x86_function *p );
356 void x87_fcmovb( struct x86_function *p, struct x86_reg src );
357 void x87_fcmovbe( struct x86_function *p, struct x86_reg src );
358 void x87_fcmove( struct x86_function *p, struct x86_reg src );
359 void x87_fcmovnb( struct x86_function *p, struct x86_reg src );
360 void x87_fcmovnbe( struct x86_function *p, struct x86_reg src );
361 void x87_fcmovne( struct x86_function *p, struct x86_reg src );
362 void x87_fcom( struct x86_function *p, struct x86_reg dst );
363 void x87_fcomi( struct x86_function *p, struct x86_reg dst );
364 void x87_fcomip( struct x86_function *p, struct x86_reg dst );
365 void x87_fcomp( struct x86_function *p, struct x86_reg dst );
366 void x87_fcos( struct x86_function *p );
367 void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
368 void x87_fdivp( struct x86_function *p, struct x86_reg dst );
369 void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
370 void x87_fdivrp( struct x86_function *p, struct x86_reg dst );
371 void x87_fild( struct x86_function *p, struct x86_reg arg );
372 void x87_fist( struct x86_function *p, struct x86_reg dst );
373 void x87_fistp( struct x86_function *p, struct x86_reg dst );
374 void x87_fld( struct x86_function *p, struct x86_reg arg );
375 void x87_fld1( struct x86_function *p );
376 void x87_fldcw( struct x86_function *p, struct x86_reg arg );
377 void x87_fldl2e( struct x86_function *p );
378 void x87_fldln2( struct x86_function *p );
379 void x87_fldz( struct x86_function *p );
380 void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
381 void x87_fmulp( struct x86_function *p, struct x86_reg dst );
382 void x87_fnclex( struct x86_function *p );
383 void x87_fprndint( struct x86_function *p );
384 void x87_fpop( struct x86_function *p );
385 void x87_fscale( struct x86_function *p );
386 void x87_fsin( struct x86_function *p );
387 void x87_fsincos( struct x86_function *p );
388 void x87_fsqrt( struct x86_function *p );
389 void x87_fst( struct x86_function *p, struct x86_reg dst );
390 void x87_fstp( struct x86_function *p, struct x86_reg dst );
391 void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
392 void x87_fsubp( struct x86_function *p, struct x86_reg dst );
393 void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
394 void x87_fsubrp( struct x86_function *p, struct x86_reg dst );
395 void x87_ftst( struct x86_function *p );
396 void x87_fxch( struct x86_function *p, struct x86_reg dst );
397 void x87_fxtract( struct x86_function *p );
398 void x87_fyl2x( struct x86_function *p );
399 void x87_fyl2xp1( struct x86_function *p );
400 void x87_fwait( struct x86_function *p );
401 void x87_fnstcw( struct x86_function *p, struct x86_reg dst );
402 void x87_fnstsw( struct x86_function *p, struct x86_reg dst );
403 void x87_fucompp( struct x86_function *p );
404 void x87_fucomp( struct x86_function *p, struct x86_reg arg );
405 void x87_fucom( struct x86_function *p, struct x86_reg arg );
406 
407 
408 
409 /* Retrieve a reference to one of the function arguments, taking into
410  * account any push/pop activity.  Note - doesn't track explicit
411  * manipulation of ESP by other instructions.
412  */
413 struct x86_reg x86_fn_arg( struct x86_function *p, unsigned arg );
414 
415 #endif
416 #endif
417