1 
2 #ifndef _X86SSE_H_
3 #define _X86SSE_H_
4 
5 #if defined(__i386__) || defined(__386__)
6 
7 /* It is up to the caller to ensure that instructions issued are
8  * suitable for the host cpu.  There are no checks made in this module
9  * for mmx/sse/sse2 support on the cpu.
10  */
11 struct x86_reg {
12    unsigned file:3;
13    unsigned idx:3;
14    unsigned mod:2;		/* mod_REG if this is just a register */
15    int      disp:24;		/* only +/- 23bits of offset - should be enough... */
16 };
17 
18 struct x86_function {
19    unsigned size;
20    unsigned char *store;
21    unsigned char *csr;
22    unsigned stack_offset;
23    int need_emms;
24    const char *fn;
25 };
26 
27 enum x86_reg_file {
28    file_REG32,
29    file_MMX,
30    file_XMM,
31    file_x87
32 };
33 
34 /* Values for mod field of modr/m byte
35  */
36 enum x86_reg_mod {
37    mod_INDIRECT,
38    mod_DISP8,
39    mod_DISP32,
40    mod_REG
41 };
42 
43 enum x86_reg_name {
44    reg_AX,
45    reg_CX,
46    reg_DX,
47    reg_BX,
48    reg_SP,
49    reg_BP,
50    reg_SI,
51    reg_DI
52 };
53 
54 
55 enum x86_cc {
56    cc_O,			/* overflow */
57    cc_NO,			/* not overflow */
58    cc_NAE,			/* not above or equal / carry */
59    cc_AE,			/* above or equal / not carry */
60    cc_E,			/* equal / zero */
61    cc_NE			/* not equal / not zero */
62 };
63 
64 enum sse_cc {
65    cc_Equal,
66    cc_LessThan,
67    cc_LessThanEqual,
68    cc_Unordered,
69    cc_NotEqual,
70    cc_NotLessThan,
71    cc_NotLessThanEqual,
72    cc_Ordered
73 };
74 
75 #define cc_Z  cc_E
76 #define cc_NZ cc_NE
77 
78 /* Begin/end/retreive function creation:
79  */
80 
81 
82 void x86_init_func( struct x86_function *p );
83 int x86_init_func_size( struct x86_function *p, unsigned code_size );
84 void x86_release_func( struct x86_function *p );
85 void (*x86_get_func( struct x86_function *p ))( void );
86 
87 
88 
89 /* Create and manipulate registers and regmem values:
90  */
91 struct x86_reg x86_make_reg( enum x86_reg_file file,
92 			     enum x86_reg_name idx );
93 
94 struct x86_reg x86_make_disp( struct x86_reg reg,
95 			      int disp );
96 
97 struct x86_reg x86_deref( struct x86_reg reg );
98 
99 struct x86_reg x86_get_base_reg( struct x86_reg reg );
100 
101 
102 /* Labels, jumps and fixup:
103  */
104 unsigned char *x86_get_label( struct x86_function *p );
105 
106 void x86_jcc( struct x86_function *p,
107 	      enum x86_cc cc,
108 	      unsigned char *label );
109 
110 unsigned char *x86_jcc_forward( struct x86_function *p,
111 			  enum x86_cc cc );
112 
113 unsigned char *x86_jmp_forward( struct x86_function *p);
114 
115 unsigned char *x86_call_forward( struct x86_function *p);
116 
117 void x86_fixup_fwd_jump( struct x86_function *p,
118 			 unsigned char *fixup );
119 
120 void x86_jmp( struct x86_function *p, unsigned char *label );
121 
122 /* void x86_call( struct x86_function *p, void (*label)() ); */
123 void x86_call( struct x86_function *p, struct x86_reg reg);
124 
125 /* michal:
126  * Temporary. As I need immediate operands, and dont want to mess with the codegen,
127  * I load the immediate into general purpose register and use it.
128  */
129 void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm );
130 
131 
132 /* Macro for sse_shufps() and sse2_pshufd():
133  */
134 #define SHUF(_x,_y,_z,_w)       (((_x)<<0) | ((_y)<<2) | ((_z)<<4) | ((_w)<<6))
135 #define SHUF_NOOP               RSW(0,1,2,3)
136 #define GET_SHUF(swz, idx)      (((swz) >> ((idx)*2)) & 0x3)
137 
138 void mmx_emms( struct x86_function *p );
139 void mmx_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
140 void mmx_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
141 void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
142 void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
143 
144 void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
145 void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
146 void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
147 void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
148 void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
149 void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
150 void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
151                   unsigned char shuf );
152 void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
153 void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
154 
155 void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
156 void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
157 void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
158 void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
159 void sse_andnps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
160 void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
161 void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src,
162                 unsigned char cc );
163 void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
164 void sse_maxss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
165 void sse_minps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
166 void sse_movaps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
167 void sse_movhlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
168 void sse_movhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
169 void sse_movlhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
170 void sse_movlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
171 void sse_movss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
172 void sse_movups( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
173 void sse_mulps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
174 void sse_mulss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
175 void sse_orps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
176 void sse_xorps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
177 void sse_subps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
178 void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
179 void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
180 void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0,
181                  unsigned char shuf );
182 void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src );
183 
184 void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
185 void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
186 void x86_cmp( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
187 void x86_dec( struct x86_function *p, struct x86_reg reg );
188 void x86_inc( struct x86_function *p, struct x86_reg reg );
189 void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
190 void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
191 void x86_mul( struct x86_function *p, struct x86_reg src );
192 void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
193 void x86_pop( struct x86_function *p, struct x86_reg reg );
194 void x86_push( struct x86_function *p, struct x86_reg reg );
195 void x86_ret( struct x86_function *p );
196 void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
197 void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
198 void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
199 void x86_sahf( struct x86_function *p );
200 
201 void x87_f2xm1( struct x86_function *p );
202 void x87_fabs( struct x86_function *p );
203 void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
204 void x87_faddp( struct x86_function *p, struct x86_reg dst );
205 void x87_fchs( struct x86_function *p );
206 void x87_fclex( struct x86_function *p );
207 void x87_fcom( struct x86_function *p, struct x86_reg dst );
208 void x87_fcomp( struct x86_function *p, struct x86_reg dst );
209 void x87_fcos( struct x86_function *p );
210 void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
211 void x87_fdivp( struct x86_function *p, struct x86_reg dst );
212 void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
213 void x87_fdivrp( struct x86_function *p, struct x86_reg dst );
214 void x87_fild( struct x86_function *p, struct x86_reg arg );
215 void x87_fist( struct x86_function *p, struct x86_reg dst );
216 void x87_fistp( struct x86_function *p, struct x86_reg dst );
217 void x87_fld( struct x86_function *p, struct x86_reg arg );
218 void x87_fld1( struct x86_function *p );
219 void x87_fldcw( struct x86_function *p, struct x86_reg arg );
220 void x87_fldl2e( struct x86_function *p );
221 void x87_fldln2( struct x86_function *p );
222 void x87_fldz( struct x86_function *p );
223 void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
224 void x87_fmulp( struct x86_function *p, struct x86_reg dst );
225 void x87_fnclex( struct x86_function *p );
226 void x87_fprndint( struct x86_function *p );
227 void x87_fscale( struct x86_function *p );
228 void x87_fsin( struct x86_function *p );
229 void x87_fsincos( struct x86_function *p );
230 void x87_fsqrt( struct x86_function *p );
231 void x87_fst( struct x86_function *p, struct x86_reg dst );
232 void x87_fstp( struct x86_function *p, struct x86_reg dst );
233 void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
234 void x87_fsubp( struct x86_function *p, struct x86_reg dst );
235 void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg );
236 void x87_fsubrp( struct x86_function *p, struct x86_reg dst );
237 void x87_fxch( struct x86_function *p, struct x86_reg dst );
238 void x87_fxtract( struct x86_function *p );
239 void x87_fyl2x( struct x86_function *p );
240 void x87_fyl2xp1( struct x86_function *p );
241 void x87_fwait( struct x86_function *p );
242 void x87_fnstsw( struct x86_function *p, struct x86_reg dst );
243 void x87_fucompp( struct x86_function *p );
244 void x87_fucomp( struct x86_function *p, struct x86_reg arg );
245 void x87_fucom( struct x86_function *p, struct x86_reg arg );
246 
247 
248 
249 /* Retreive a reference to one of the function arguments, taking into
250  * account any push/pop activity.  Note - doesn't track explict
251  * manipulation of ESP by other instructions.
252  */
253 struct x86_reg x86_fn_arg( struct x86_function *p, unsigned arg );
254 
255 #endif
256 #endif
257