1 /*--------------------------------------------------------------------*/
2 /*--- Machine-related stuff. m_machine.c ---*/
3 /*--------------------------------------------------------------------*/
4
5 /*
6 This file is part of Valgrind, a dynamic binary instrumentation
7 framework.
8
9 Copyright (C) 2000-2015 Julian Seward
10 jseward@acm.org
11
12 This program is free software; you can redistribute it and/or
13 modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation; either version 2 of the
15 License, or (at your option) any later version.
16
17 This program is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
21
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, write to the Free Software
24 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25 02111-1307, USA.
26
27 The GNU General Public License is contained in the file COPYING.
28 */
29
30 #include "pub_core_basics.h"
31 #include "pub_core_vki.h"
32 #include "pub_core_threadstate.h"
33 #include "pub_core_libcassert.h"
34 #include "pub_core_libcbase.h"
35 #include "pub_core_libcfile.h"
36 #include "pub_core_libcprint.h"
37 #include "pub_core_mallocfree.h"
38 #include "pub_core_machine.h"
39 #include "pub_core_cpuid.h"
40 #include "pub_core_libcsignal.h" // for ppc32 messing with SIGILL and SIGFPE
41 #include "pub_core_debuglog.h"
42
43
44 #define INSTR_PTR(regs) ((regs).vex.VG_INSTR_PTR)
45 #define STACK_PTR(regs) ((regs).vex.VG_STACK_PTR)
46 #define FRAME_PTR(regs) ((regs).vex.VG_FRAME_PTR)
47
VG_(get_IP)48 Addr VG_(get_IP) ( ThreadId tid ) {
49 return INSTR_PTR( VG_(threads)[tid].arch );
50 }
VG_(get_SP)51 Addr VG_(get_SP) ( ThreadId tid ) {
52 return STACK_PTR( VG_(threads)[tid].arch );
53 }
VG_(get_FP)54 Addr VG_(get_FP) ( ThreadId tid ) {
55 return FRAME_PTR( VG_(threads)[tid].arch );
56 }
57
VG_(set_IP)58 void VG_(set_IP) ( ThreadId tid, Addr ip ) {
59 INSTR_PTR( VG_(threads)[tid].arch ) = ip;
60 }
VG_(set_SP)61 void VG_(set_SP) ( ThreadId tid, Addr sp ) {
62 STACK_PTR( VG_(threads)[tid].arch ) = sp;
63 }
64
VG_(get_UnwindStartRegs)65 void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs,
66 ThreadId tid )
67 {
68 # if defined(VGA_x86)
69 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_EIP;
70 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_ESP;
71 regs->misc.X86.r_ebp
72 = VG_(threads)[tid].arch.vex.guest_EBP;
73 # elif defined(VGA_amd64)
74 regs->r_pc = VG_(threads)[tid].arch.vex.guest_RIP;
75 regs->r_sp = VG_(threads)[tid].arch.vex.guest_RSP;
76 regs->misc.AMD64.r_rbp
77 = VG_(threads)[tid].arch.vex.guest_RBP;
78 # elif defined(VGA_ppc32)
79 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_CIA;
80 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_GPR1;
81 regs->misc.PPC32.r_lr
82 = VG_(threads)[tid].arch.vex.guest_LR;
83 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
84 regs->r_pc = VG_(threads)[tid].arch.vex.guest_CIA;
85 regs->r_sp = VG_(threads)[tid].arch.vex.guest_GPR1;
86 regs->misc.PPC64.r_lr
87 = VG_(threads)[tid].arch.vex.guest_LR;
88 # elif defined(VGA_arm)
89 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_R15T;
90 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_R13;
91 regs->misc.ARM.r14
92 = VG_(threads)[tid].arch.vex.guest_R14;
93 regs->misc.ARM.r12
94 = VG_(threads)[tid].arch.vex.guest_R12;
95 regs->misc.ARM.r11
96 = VG_(threads)[tid].arch.vex.guest_R11;
97 regs->misc.ARM.r7
98 = VG_(threads)[tid].arch.vex.guest_R7;
99 # elif defined(VGA_arm64)
100 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
101 regs->r_sp = VG_(threads)[tid].arch.vex.guest_XSP;
102 regs->misc.ARM64.x29 = VG_(threads)[tid].arch.vex.guest_X29;
103 regs->misc.ARM64.x30 = VG_(threads)[tid].arch.vex.guest_X30;
104 # elif defined(VGA_s390x)
105 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_IA;
106 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_SP;
107 regs->misc.S390X.r_fp
108 = VG_(threads)[tid].arch.vex.guest_FP;
109 regs->misc.S390X.r_lr
110 = VG_(threads)[tid].arch.vex.guest_LR;
111 # elif defined(VGA_mips32)
112 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
113 regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
114 regs->misc.MIPS32.r30
115 = VG_(threads)[tid].arch.vex.guest_r30;
116 regs->misc.MIPS32.r31
117 = VG_(threads)[tid].arch.vex.guest_r31;
118 regs->misc.MIPS32.r28
119 = VG_(threads)[tid].arch.vex.guest_r28;
120 # elif defined(VGA_mips64)
121 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
122 regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
123 regs->misc.MIPS64.r30
124 = VG_(threads)[tid].arch.vex.guest_r30;
125 regs->misc.MIPS64.r31
126 = VG_(threads)[tid].arch.vex.guest_r31;
127 regs->misc.MIPS64.r28
128 = VG_(threads)[tid].arch.vex.guest_r28;
129 # elif defined(VGA_tilegx)
130 regs->r_pc = VG_(threads)[tid].arch.vex.guest_pc;
131 regs->r_sp = VG_(threads)[tid].arch.vex.guest_r54;
132 regs->misc.TILEGX.r52
133 = VG_(threads)[tid].arch.vex.guest_r52;
134 regs->misc.TILEGX.r55
135 = VG_(threads)[tid].arch.vex.guest_r55;
136 # else
137 # error "Unknown arch"
138 # endif
139 }
140
141 void
VG_(get_shadow_regs_area)142 VG_(get_shadow_regs_area) ( ThreadId tid,
143 /*DST*/UChar* dst,
144 /*SRC*/Int shadowNo, PtrdiffT offset, SizeT size )
145 {
146 void* src;
147 ThreadState* tst;
148 vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
149 vg_assert(VG_(is_valid_tid)(tid));
150 // Bounds check
151 vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
152 vg_assert(offset + size <= sizeof(VexGuestArchState));
153 // Copy
154 tst = & VG_(threads)[tid];
155 src = NULL;
156 switch (shadowNo) {
157 case 0: src = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
158 case 1: src = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
159 case 2: src = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
160 }
161 vg_assert(src != NULL);
162 VG_(memcpy)( dst, src, size);
163 }
164
165 void
VG_(set_shadow_regs_area)166 VG_(set_shadow_regs_area) ( ThreadId tid,
167 /*DST*/Int shadowNo, PtrdiffT offset, SizeT size,
168 /*SRC*/const UChar* src )
169 {
170 void* dst;
171 ThreadState* tst;
172 vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
173 vg_assert(VG_(is_valid_tid)(tid));
174 // Bounds check
175 vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
176 vg_assert(offset + size <= sizeof(VexGuestArchState));
177 // Copy
178 tst = & VG_(threads)[tid];
179 dst = NULL;
180 switch (shadowNo) {
181 case 0: dst = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
182 case 1: dst = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
183 case 2: dst = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
184 }
185 vg_assert(dst != NULL);
186 VG_(memcpy)( dst, src, size);
187 }
188
189
apply_to_GPs_of_tid(ThreadId tid,void (* f)(ThreadId,const HChar *,Addr))190 static void apply_to_GPs_of_tid(ThreadId tid, void (*f)(ThreadId,
191 const HChar*, Addr))
192 {
193 VexGuestArchState* vex = &(VG_(get_ThreadState)(tid)->arch.vex);
194 VG_(debugLog)(2, "machine", "apply_to_GPs_of_tid %u\n", tid);
195 #if defined(VGA_x86)
196 (*f)(tid, "EAX", vex->guest_EAX);
197 (*f)(tid, "ECX", vex->guest_ECX);
198 (*f)(tid, "EDX", vex->guest_EDX);
199 (*f)(tid, "EBX", vex->guest_EBX);
200 (*f)(tid, "ESI", vex->guest_ESI);
201 (*f)(tid, "EDI", vex->guest_EDI);
202 (*f)(tid, "ESP", vex->guest_ESP);
203 (*f)(tid, "EBP", vex->guest_EBP);
204 #elif defined(VGA_amd64)
205 (*f)(tid, "RAX", vex->guest_RAX);
206 (*f)(tid, "RCX", vex->guest_RCX);
207 (*f)(tid, "RDX", vex->guest_RDX);
208 (*f)(tid, "RBX", vex->guest_RBX);
209 (*f)(tid, "RSI", vex->guest_RSI);
210 (*f)(tid, "RDI", vex->guest_RDI);
211 (*f)(tid, "RSP", vex->guest_RSP);
212 (*f)(tid, "RBP", vex->guest_RBP);
213 (*f)(tid, "R8" , vex->guest_R8 );
214 (*f)(tid, "R9" , vex->guest_R9 );
215 (*f)(tid, "R10", vex->guest_R10);
216 (*f)(tid, "R11", vex->guest_R11);
217 (*f)(tid, "R12", vex->guest_R12);
218 (*f)(tid, "R13", vex->guest_R13);
219 (*f)(tid, "R14", vex->guest_R14);
220 (*f)(tid, "R15", vex->guest_R15);
221 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
222 (*f)(tid, "GPR0" , vex->guest_GPR0 );
223 (*f)(tid, "GPR1" , vex->guest_GPR1 );
224 (*f)(tid, "GPR2" , vex->guest_GPR2 );
225 (*f)(tid, "GPR3" , vex->guest_GPR3 );
226 (*f)(tid, "GPR4" , vex->guest_GPR4 );
227 (*f)(tid, "GPR5" , vex->guest_GPR5 );
228 (*f)(tid, "GPR6" , vex->guest_GPR6 );
229 (*f)(tid, "GPR7" , vex->guest_GPR7 );
230 (*f)(tid, "GPR8" , vex->guest_GPR8 );
231 (*f)(tid, "GPR9" , vex->guest_GPR9 );
232 (*f)(tid, "GPR10", vex->guest_GPR10);
233 (*f)(tid, "GPR11", vex->guest_GPR11);
234 (*f)(tid, "GPR12", vex->guest_GPR12);
235 (*f)(tid, "GPR13", vex->guest_GPR13);
236 (*f)(tid, "GPR14", vex->guest_GPR14);
237 (*f)(tid, "GPR15", vex->guest_GPR15);
238 (*f)(tid, "GPR16", vex->guest_GPR16);
239 (*f)(tid, "GPR17", vex->guest_GPR17);
240 (*f)(tid, "GPR18", vex->guest_GPR18);
241 (*f)(tid, "GPR19", vex->guest_GPR19);
242 (*f)(tid, "GPR20", vex->guest_GPR20);
243 (*f)(tid, "GPR21", vex->guest_GPR21);
244 (*f)(tid, "GPR22", vex->guest_GPR22);
245 (*f)(tid, "GPR23", vex->guest_GPR23);
246 (*f)(tid, "GPR24", vex->guest_GPR24);
247 (*f)(tid, "GPR25", vex->guest_GPR25);
248 (*f)(tid, "GPR26", vex->guest_GPR26);
249 (*f)(tid, "GPR27", vex->guest_GPR27);
250 (*f)(tid, "GPR28", vex->guest_GPR28);
251 (*f)(tid, "GPR29", vex->guest_GPR29);
252 (*f)(tid, "GPR30", vex->guest_GPR30);
253 (*f)(tid, "GPR31", vex->guest_GPR31);
254 (*f)(tid, "CTR" , vex->guest_CTR );
255 (*f)(tid, "LR" , vex->guest_LR );
256 #elif defined(VGA_arm)
257 (*f)(tid, "R0" , vex->guest_R0 );
258 (*f)(tid, "R1" , vex->guest_R1 );
259 (*f)(tid, "R2" , vex->guest_R2 );
260 (*f)(tid, "R3" , vex->guest_R3 );
261 (*f)(tid, "R4" , vex->guest_R4 );
262 (*f)(tid, "R5" , vex->guest_R5 );
263 (*f)(tid, "R6" , vex->guest_R6 );
264 (*f)(tid, "R8" , vex->guest_R8 );
265 (*f)(tid, "R9" , vex->guest_R9 );
266 (*f)(tid, "R10", vex->guest_R10);
267 (*f)(tid, "R11", vex->guest_R11);
268 (*f)(tid, "R12", vex->guest_R12);
269 (*f)(tid, "R13", vex->guest_R13);
270 (*f)(tid, "R14", vex->guest_R14);
271 #elif defined(VGA_s390x)
272 (*f)(tid, "r0" , vex->guest_r0 );
273 (*f)(tid, "r1" , vex->guest_r1 );
274 (*f)(tid, "r2" , vex->guest_r2 );
275 (*f)(tid, "r3" , vex->guest_r3 );
276 (*f)(tid, "r4" , vex->guest_r4 );
277 (*f)(tid, "r5" , vex->guest_r5 );
278 (*f)(tid, "r6" , vex->guest_r6 );
279 (*f)(tid, "r7" , vex->guest_r7 );
280 (*f)(tid, "r8" , vex->guest_r8 );
281 (*f)(tid, "r9" , vex->guest_r9 );
282 (*f)(tid, "r10", vex->guest_r10);
283 (*f)(tid, "r11", vex->guest_r11);
284 (*f)(tid, "r12", vex->guest_r12);
285 (*f)(tid, "r13", vex->guest_r13);
286 (*f)(tid, "r14", vex->guest_r14);
287 (*f)(tid, "r15", vex->guest_r15);
288 #elif defined(VGA_mips32) || defined(VGA_mips64)
289 (*f)(tid, "r0" , vex->guest_r0 );
290 (*f)(tid, "r1" , vex->guest_r1 );
291 (*f)(tid, "r2" , vex->guest_r2 );
292 (*f)(tid, "r3" , vex->guest_r3 );
293 (*f)(tid, "r4" , vex->guest_r4 );
294 (*f)(tid, "r5" , vex->guest_r5 );
295 (*f)(tid, "r6" , vex->guest_r6 );
296 (*f)(tid, "r7" , vex->guest_r7 );
297 (*f)(tid, "r8" , vex->guest_r8 );
298 (*f)(tid, "r9" , vex->guest_r9 );
299 (*f)(tid, "r10", vex->guest_r10);
300 (*f)(tid, "r11", vex->guest_r11);
301 (*f)(tid, "r12", vex->guest_r12);
302 (*f)(tid, "r13", vex->guest_r13);
303 (*f)(tid, "r14", vex->guest_r14);
304 (*f)(tid, "r15", vex->guest_r15);
305 (*f)(tid, "r16", vex->guest_r16);
306 (*f)(tid, "r17", vex->guest_r17);
307 (*f)(tid, "r18", vex->guest_r18);
308 (*f)(tid, "r19", vex->guest_r19);
309 (*f)(tid, "r20", vex->guest_r20);
310 (*f)(tid, "r21", vex->guest_r21);
311 (*f)(tid, "r22", vex->guest_r22);
312 (*f)(tid, "r23", vex->guest_r23);
313 (*f)(tid, "r24", vex->guest_r24);
314 (*f)(tid, "r25", vex->guest_r25);
315 (*f)(tid, "r26", vex->guest_r26);
316 (*f)(tid, "r27", vex->guest_r27);
317 (*f)(tid, "r28", vex->guest_r28);
318 (*f)(tid, "r29", vex->guest_r29);
319 (*f)(tid, "r30", vex->guest_r30);
320 (*f)(tid, "r31", vex->guest_r31);
321 #elif defined(VGA_arm64)
322 (*f)(tid, "x0" , vex->guest_X0 );
323 (*f)(tid, "x1" , vex->guest_X1 );
324 (*f)(tid, "x2" , vex->guest_X2 );
325 (*f)(tid, "x3" , vex->guest_X3 );
326 (*f)(tid, "x4" , vex->guest_X4 );
327 (*f)(tid, "x5" , vex->guest_X5 );
328 (*f)(tid, "x6" , vex->guest_X6 );
329 (*f)(tid, "x7" , vex->guest_X7 );
330 (*f)(tid, "x8" , vex->guest_X8 );
331 (*f)(tid, "x9" , vex->guest_X9 );
332 (*f)(tid, "x10", vex->guest_X10);
333 (*f)(tid, "x11", vex->guest_X11);
334 (*f)(tid, "x12", vex->guest_X12);
335 (*f)(tid, "x13", vex->guest_X13);
336 (*f)(tid, "x14", vex->guest_X14);
337 (*f)(tid, "x15", vex->guest_X15);
338 (*f)(tid, "x16", vex->guest_X16);
339 (*f)(tid, "x17", vex->guest_X17);
340 (*f)(tid, "x18", vex->guest_X18);
341 (*f)(tid, "x19", vex->guest_X19);
342 (*f)(tid, "x20", vex->guest_X20);
343 (*f)(tid, "x21", vex->guest_X21);
344 (*f)(tid, "x22", vex->guest_X22);
345 (*f)(tid, "x23", vex->guest_X23);
346 (*f)(tid, "x24", vex->guest_X24);
347 (*f)(tid, "x25", vex->guest_X25);
348 (*f)(tid, "x26", vex->guest_X26);
349 (*f)(tid, "x27", vex->guest_X27);
350 (*f)(tid, "x28", vex->guest_X28);
351 (*f)(tid, "x29", vex->guest_X29);
352 (*f)(tid, "x30", vex->guest_X30);
353 #elif defined(VGA_tilegx)
354 (*f)(tid, "r0", vex->guest_r0 );
355 (*f)(tid, "r1", vex->guest_r1 );
356 (*f)(tid, "r2", vex->guest_r2 );
357 (*f)(tid, "r3", vex->guest_r3 );
358 (*f)(tid, "r4", vex->guest_r4 );
359 (*f)(tid, "r5", vex->guest_r5 );
360 (*f)(tid, "r6", vex->guest_r6 );
361 (*f)(tid, "r7", vex->guest_r7 );
362 (*f)(tid, "r8", vex->guest_r8 );
363 (*f)(tid, "r9", vex->guest_r9 );
364 (*f)(tid, "r10", vex->guest_r10);
365 (*f)(tid, "r11", vex->guest_r11);
366 (*f)(tid, "r12", vex->guest_r12);
367 (*f)(tid, "r13", vex->guest_r13);
368 (*f)(tid, "r14", vex->guest_r14);
369 (*f)(tid, "r15", vex->guest_r15);
370 (*f)(tid, "r16", vex->guest_r16);
371 (*f)(tid, "r17", vex->guest_r17);
372 (*f)(tid, "r18", vex->guest_r18);
373 (*f)(tid, "r19", vex->guest_r19);
374 (*f)(tid, "r20", vex->guest_r20);
375 (*f)(tid, "r21", vex->guest_r21);
376 (*f)(tid, "r22", vex->guest_r22);
377 (*f)(tid, "r23", vex->guest_r23);
378 (*f)(tid, "r24", vex->guest_r24);
379 (*f)(tid, "r25", vex->guest_r25);
380 (*f)(tid, "r26", vex->guest_r26);
381 (*f)(tid, "r27", vex->guest_r27);
382 (*f)(tid, "r28", vex->guest_r28);
383 (*f)(tid, "r29", vex->guest_r29);
384 (*f)(tid, "r30", vex->guest_r30);
385 (*f)(tid, "r31", vex->guest_r31);
386 (*f)(tid, "r32", vex->guest_r32);
387 (*f)(tid, "r33", vex->guest_r33);
388 (*f)(tid, "r34", vex->guest_r34);
389 (*f)(tid, "r35", vex->guest_r35);
390 (*f)(tid, "r36", vex->guest_r36);
391 (*f)(tid, "r37", vex->guest_r37);
392 (*f)(tid, "r38", vex->guest_r38);
393 (*f)(tid, "r39", vex->guest_r39);
394 (*f)(tid, "r40", vex->guest_r40);
395 (*f)(tid, "r41", vex->guest_r41);
396 (*f)(tid, "r42", vex->guest_r42);
397 (*f)(tid, "r43", vex->guest_r43);
398 (*f)(tid, "r44", vex->guest_r44);
399 (*f)(tid, "r45", vex->guest_r45);
400 (*f)(tid, "r46", vex->guest_r46);
401 (*f)(tid, "r47", vex->guest_r47);
402 (*f)(tid, "r48", vex->guest_r48);
403 (*f)(tid, "r49", vex->guest_r49);
404 (*f)(tid, "r50", vex->guest_r50);
405 (*f)(tid, "r51", vex->guest_r51);
406 (*f)(tid, "r52", vex->guest_r52);
407 (*f)(tid, "r53", vex->guest_r53);
408 (*f)(tid, "r54", vex->guest_r54);
409 (*f)(tid, "r55", vex->guest_r55);
410 #else
411 # error Unknown arch
412 #endif
413 }
414
415
VG_(apply_to_GP_regs)416 void VG_(apply_to_GP_regs)(void (*f)(ThreadId, const HChar*, UWord))
417 {
418 ThreadId tid;
419
420 for (tid = 1; tid < VG_N_THREADS; tid++) {
421 if (VG_(is_valid_tid)(tid)
422 || VG_(threads)[tid].exitreason == VgSrc_ExitProcess) {
423 // live thread or thread instructed to die by another thread that
424 // called exit.
425 apply_to_GPs_of_tid(tid, f);
426 }
427 }
428 }
429
VG_(thread_stack_reset_iter)430 void VG_(thread_stack_reset_iter)(/*OUT*/ThreadId* tid)
431 {
432 *tid = (ThreadId)(-1);
433 }
434
VG_(thread_stack_next)435 Bool VG_(thread_stack_next)(/*MOD*/ThreadId* tid,
436 /*OUT*/Addr* stack_min,
437 /*OUT*/Addr* stack_max)
438 {
439 ThreadId i;
440 for (i = (*tid)+1; i < VG_N_THREADS; i++) {
441 if (i == VG_INVALID_THREADID)
442 continue;
443 if (VG_(threads)[i].status != VgTs_Empty) {
444 *tid = i;
445 *stack_min = VG_(get_SP)(i);
446 *stack_max = VG_(threads)[i].client_stack_highest_byte;
447 return True;
448 }
449 }
450 return False;
451 }
452
VG_(thread_get_stack_max)453 Addr VG_(thread_get_stack_max)(ThreadId tid)
454 {
455 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
456 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
457 return VG_(threads)[tid].client_stack_highest_byte;
458 }
459
VG_(thread_get_stack_size)460 SizeT VG_(thread_get_stack_size)(ThreadId tid)
461 {
462 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
463 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
464 return VG_(threads)[tid].client_stack_szB;
465 }
466
VG_(thread_get_altstack_min)467 Addr VG_(thread_get_altstack_min)(ThreadId tid)
468 {
469 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
470 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
471 return (Addr)VG_(threads)[tid].altstack.ss_sp;
472 }
473
VG_(thread_get_altstack_size)474 SizeT VG_(thread_get_altstack_size)(ThreadId tid)
475 {
476 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
477 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
478 return VG_(threads)[tid].altstack.ss_size;
479 }
480
481 //-------------------------------------------------------------
482 /* Details about the capabilities of the underlying (host) CPU. These
483 details are acquired by (1) enquiring with the CPU at startup, or
484 (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache
485 line size). It's a bit nasty in the sense that there's no obvious
486 way to stop uses of some of this info before it's ready to go.
487 See pub_core_machine.h for more information about that.
488
489 VG_(machine_get_hwcaps) may use signals (although it attempts to
490 leave signal state unchanged) and therefore should only be
491 called before m_main sets up the client's signal state.
492 */
493
494 /* --------- State --------- */
495 static Bool hwcaps_done = False;
496
497 /* --- all archs --- */
498 static VexArch va = VexArch_INVALID;
499 static VexArchInfo vai;
500
501 #if defined(VGA_x86)
502 UInt VG_(machine_x86_have_mxcsr) = 0;
503 #endif
504 #if defined(VGA_ppc32)
505 UInt VG_(machine_ppc32_has_FP) = 0;
506 UInt VG_(machine_ppc32_has_VMX) = 0;
507 #endif
508 #if defined(VGA_ppc64be) || defined(VGA_ppc64le)
509 ULong VG_(machine_ppc64_has_VMX) = 0;
510 #endif
511 #if defined(VGA_arm)
512 Int VG_(machine_arm_archlevel) = 4;
513 #endif
514
515
516 /* For hwcaps detection on ppc32/64, s390x, and arm we'll need to do SIGILL
517 testing, so we need a VG_MINIMAL_JMP_BUF. */
518 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \
519 || defined(VGA_arm) || defined(VGA_s390x) || defined(VGA_mips32)
520 #include "pub_core_libcsetjmp.h"
521 static VG_MINIMAL_JMP_BUF(env_unsup_insn);
handler_unsup_insn(Int x)522 static void handler_unsup_insn ( Int x ) {
523 VG_MINIMAL_LONGJMP(env_unsup_insn);
524 }
525 #endif
526
527
528 /* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc
529 * handlers are installed. Determines the sizes affected by dcbz
530 * and dcbzl instructions and updates the given VexArchInfo structure
531 * accordingly.
532 *
533 * Not very defensive: assumes that as long as the dcbz/dcbzl
534 * instructions don't raise a SIGILL, that they will zero an aligned,
535 * contiguous block of memory of a sensible size. */
536 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
find_ppc_dcbz_sz(VexArchInfo * arch_info)537 static void find_ppc_dcbz_sz(VexArchInfo *arch_info)
538 {
539 Int dcbz_szB = 0;
540 Int dcbzl_szB;
541 # define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */
542 char test_block[4*MAX_DCBZL_SZB];
543 char *aligned = test_block;
544 Int i;
545
546 /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */
547 aligned = (char *)(((HWord)aligned + MAX_DCBZL_SZB) & ~(MAX_DCBZL_SZB - 1));
548 vg_assert((aligned + MAX_DCBZL_SZB) <= &test_block[sizeof(test_block)]);
549
550 /* dcbz often clears 32B, although sometimes whatever the native cache
551 * block size is */
552 VG_(memset)(test_block, 0xff, sizeof(test_block));
553 __asm__ __volatile__("dcbz 0,%0"
554 : /*out*/
555 : "r" (aligned) /*in*/
556 : "memory" /*clobber*/);
557 for (dcbz_szB = 0, i = 0; i < sizeof(test_block); ++i) {
558 if (!test_block[i])
559 ++dcbz_szB;
560 }
561 vg_assert(dcbz_szB == 16 || dcbz_szB == 32 || dcbz_szB == 64 || dcbz_szB == 128);
562
563 /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */
564 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
565 dcbzl_szB = 0; /* indicates unsupported */
566 }
567 else {
568 VG_(memset)(test_block, 0xff, sizeof(test_block));
569 /* some older assemblers won't understand the dcbzl instruction
570 * variant, so we directly emit the instruction ourselves */
571 __asm__ __volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/
572 : /*out*/
573 : "r" (aligned) /*in*/
574 : "memory", "r9" /*clobber*/);
575 for (dcbzl_szB = 0, i = 0; i < sizeof(test_block); ++i) {
576 if (!test_block[i])
577 ++dcbzl_szB;
578 }
579 vg_assert(dcbzl_szB == 16 || dcbzl_szB == 32 || dcbzl_szB == 64 || dcbzl_szB == 128);
580 }
581
582 arch_info->ppc_dcbz_szB = dcbz_szB;
583 arch_info->ppc_dcbzl_szB = dcbzl_szB;
584
585 VG_(debugLog)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n",
586 dcbz_szB, dcbzl_szB);
587 # undef MAX_DCBZL_SZB
588 }
589 #endif /* defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) */
590
591 #ifdef VGA_s390x
592
593 /* Read /proc/cpuinfo. Look for lines like these
594
595 processor 0: version = FF, identification = 0117C9, machine = 2064
596
597 and return the machine model. If the machine model could not be determined
598 or it is an unknown model, return VEX_S390X_MODEL_UNKNOWN. */
599
VG_(get_machine_model)600 static UInt VG_(get_machine_model)(void)
601 {
602 static struct model_map {
603 const HChar name[5];
604 UInt id;
605 } model_map[] = {
606 { "2064", VEX_S390X_MODEL_Z900 },
607 { "2066", VEX_S390X_MODEL_Z800 },
608 { "2084", VEX_S390X_MODEL_Z990 },
609 { "2086", VEX_S390X_MODEL_Z890 },
610 { "2094", VEX_S390X_MODEL_Z9_EC },
611 { "2096", VEX_S390X_MODEL_Z9_BC },
612 { "2097", VEX_S390X_MODEL_Z10_EC },
613 { "2098", VEX_S390X_MODEL_Z10_BC },
614 { "2817", VEX_S390X_MODEL_Z196 },
615 { "2818", VEX_S390X_MODEL_Z114 },
616 { "2827", VEX_S390X_MODEL_ZEC12 },
617 { "2828", VEX_S390X_MODEL_ZBC12 },
618 { "2964", VEX_S390X_MODEL_Z13 },
619 };
620
621 Int model, n, fh;
622 SysRes fd;
623 SizeT num_bytes, file_buf_size;
624 HChar *p, *m, *model_name, *file_buf;
625
626 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
627 fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
628 if ( sr_isError(fd) ) return VEX_S390X_MODEL_UNKNOWN;
629
630 fh = sr_Res(fd);
631
632 /* Determine the size of /proc/cpuinfo.
633 Work around broken-ness in /proc file system implementation.
634 fstat returns a zero size for /proc/cpuinfo although it is
635 claimed to be a regular file. */
636 num_bytes = 0;
637 file_buf_size = 1000;
638 file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
639 while (42) {
640 n = VG_(read)(fh, file_buf, file_buf_size);
641 if (n < 0) break;
642
643 num_bytes += n;
644 if (n < file_buf_size) break; /* reached EOF */
645 }
646
647 if (n < 0) num_bytes = 0; /* read error; ignore contents */
648
649 if (num_bytes > file_buf_size) {
650 VG_(free)( file_buf );
651 VG_(lseek)( fh, 0, VKI_SEEK_SET );
652 file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
653 n = VG_(read)( fh, file_buf, num_bytes );
654 if (n < 0) num_bytes = 0;
655 }
656
657 file_buf[num_bytes] = '\0';
658 VG_(close)(fh);
659
660 /* Parse file */
661 model = VEX_S390X_MODEL_UNKNOWN;
662 for (p = file_buf; *p; ++p) {
663 /* Beginning of line */
664 if (VG_(strncmp)( p, "processor", sizeof "processor" - 1 ) != 0) continue;
665
666 m = VG_(strstr)( p, "machine" );
667 if (m == NULL) continue;
668
669 p = m + sizeof "machine" - 1;
670 while ( VG_(isspace)( *p ) || *p == '=') {
671 if (*p == '\n') goto next_line;
672 ++p;
673 }
674
675 model_name = p;
676 for (n = 0; n < sizeof model_map / sizeof model_map[0]; ++n) {
677 struct model_map *mm = model_map + n;
678 SizeT len = VG_(strlen)( mm->name );
679 if ( VG_(strncmp)( mm->name, model_name, len ) == 0 &&
680 VG_(isspace)( model_name[len] )) {
681 if (mm->id < model) model = mm->id;
682 p = model_name + len;
683 break;
684 }
685 }
686 /* Skip until end-of-line */
687 while (*p != '\n')
688 ++p;
689 next_line: ;
690 }
691
692 VG_(free)( file_buf );
693 VG_(debugLog)(1, "machine", "model = %s\n",
694 model == VEX_S390X_MODEL_UNKNOWN ? "UNKNOWN"
695 : model_map[model].name);
696 return model;
697 }
698
699 #endif /* VGA_s390x */
700
701 #if defined(VGA_mips32) || defined(VGA_mips64)
702
703 /* Read /proc/cpuinfo and return the machine model. */
VG_(get_machine_model)704 static UInt VG_(get_machine_model)(void)
705 {
706 const char *search_MIPS_str = "MIPS";
707 const char *search_Broadcom_str = "Broadcom";
708 const char *search_Netlogic_str = "Netlogic";
709 const char *search_Cavium_str= "Cavium";
710 Int n, fh;
711 SysRes fd;
712 SizeT num_bytes, file_buf_size;
713 HChar *file_buf;
714
715 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
716 fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
717 if ( sr_isError(fd) ) return -1;
718
719 fh = sr_Res(fd);
720
721 /* Determine the size of /proc/cpuinfo.
722 Work around broken-ness in /proc file system implementation.
723 fstat returns a zero size for /proc/cpuinfo although it is
724 claimed to be a regular file. */
725 num_bytes = 0;
726 file_buf_size = 1000;
727 file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
728 while (42) {
729 n = VG_(read)(fh, file_buf, file_buf_size);
730 if (n < 0) break;
731
732 num_bytes += n;
733 if (n < file_buf_size) break; /* reached EOF */
734 }
735
736 if (n < 0) num_bytes = 0; /* read error; ignore contents */
737
738 if (num_bytes > file_buf_size) {
739 VG_(free)( file_buf );
740 VG_(lseek)( fh, 0, VKI_SEEK_SET );
741 file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
742 n = VG_(read)( fh, file_buf, num_bytes );
743 if (n < 0) num_bytes = 0;
744 }
745
746 file_buf[num_bytes] = '\0';
747 VG_(close)(fh);
748
749 /* Parse file */
750 if (VG_(strstr) (file_buf, search_Broadcom_str) != NULL)
751 return VEX_PRID_COMP_BROADCOM;
752 if (VG_(strstr) (file_buf, search_Netlogic_str) != NULL)
753 return VEX_PRID_COMP_NETLOGIC;
754 if (VG_(strstr)(file_buf, search_Cavium_str) != NULL)
755 return VEX_PRID_COMP_CAVIUM;
756 if (VG_(strstr) (file_buf, search_MIPS_str) != NULL)
757 return VEX_PRID_COMP_MIPS;
758
759 /* Did not find string in the proc file. */
760 return -1;
761 }
762
763 #endif
764
765 /* Determine what insn set and insn set variant the host has, and
766 record it. To be called once at system startup. Returns False if
767 this a CPU incapable of running Valgrind.
768 Also determine information about the caches on this host. */
769
VG_(machine_get_hwcaps)770 Bool VG_(machine_get_hwcaps)( void )
771 {
772 vg_assert(hwcaps_done == False);
773 hwcaps_done = True;
774
775 // Whack default settings into vai, so that we only need to fill in
776 // any interesting bits.
777 LibVEX_default_VexArchInfo(&vai);
778
779 #if defined(VGA_x86)
780 { Bool have_sse1, have_sse2, have_sse3, have_cx8, have_lzcnt, have_mmxext;
781 UInt eax, ebx, ecx, edx, max_extended;
782 HChar vstr[13];
783 vstr[0] = 0;
784
785 if (!VG_(has_cpuid)())
786 /* we can't do cpuid at all. Give up. */
787 return False;
788
789 VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
790 if (eax < 1)
791 /* we can't ask for cpuid(x) for x > 0. Give up. */
792 return False;
793
794 /* Get processor ID string, and max basic/extended index
795 values. */
796 VG_(memcpy)(&vstr[0], &ebx, 4);
797 VG_(memcpy)(&vstr[4], &edx, 4);
798 VG_(memcpy)(&vstr[8], &ecx, 4);
799 vstr[12] = 0;
800
801 VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
802 max_extended = eax;
803
804 /* get capabilities bits into edx */
805 VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
806
807 have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */
808 have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */
809 have_sse3 = (ecx & (1<<0)) != 0; /* True => have sse3 insns */
810
811 /* cmpxchg8b is a minimum requirement now; if we don't have it we
812 must simply give up. But all CPUs since Pentium-I have it, so
813 that doesn't seem like much of a restriction. */
814 have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
815 if (!have_cx8)
816 return False;
817
818 /* Figure out if this is an AMD that can do MMXEXT. */
819 have_mmxext = False;
820 if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
821 && max_extended >= 0x80000001) {
822 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
823 /* Some older AMD processors support a sse1 subset (Integer SSE). */
824 have_mmxext = !have_sse1 && ((edx & (1<<22)) != 0);
825 }
826
827 /* Figure out if this is an AMD or Intel that can do LZCNT. */
828 have_lzcnt = False;
829 if ((0 == VG_(strcmp)(vstr, "AuthenticAMD")
830 || 0 == VG_(strcmp)(vstr, "GenuineIntel"))
831 && max_extended >= 0x80000001) {
832 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
833 have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
834 }
835
836 /* Intel processors don't define the mmxext extension, but since it
837 is just a sse1 subset always define it when we have sse1. */
838 if (have_sse1)
839 have_mmxext = True;
840
841 va = VexArchX86;
842 vai.endness = VexEndnessLE;
843
844 if (have_sse3 && have_sse2 && have_sse1 && have_mmxext) {
845 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT;
846 vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
847 vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
848 vai.hwcaps |= VEX_HWCAPS_X86_SSE3;
849 if (have_lzcnt)
850 vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
851 VG_(machine_x86_have_mxcsr) = 1;
852 } else if (have_sse2 && have_sse1 && have_mmxext) {
853 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT;
854 vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
855 vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
856 if (have_lzcnt)
857 vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
858 VG_(machine_x86_have_mxcsr) = 1;
859 } else if (have_sse1 && have_mmxext) {
860 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT;
861 vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
862 VG_(machine_x86_have_mxcsr) = 1;
863 } else if (have_mmxext) {
864 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT; /*integer only sse1 subset*/
865 VG_(machine_x86_have_mxcsr) = 0;
866 } else {
867 vai.hwcaps = 0; /*baseline - no sse at all*/
868 VG_(machine_x86_have_mxcsr) = 0;
869 }
870
871 VG_(machine_get_cache_info)(&vai);
872
873 return True;
874 }
875
876 #elif defined(VGA_amd64)
877 { Bool have_sse3, have_cx8, have_cx16;
878 Bool have_lzcnt, have_avx, have_bmi, have_avx2;
879 Bool have_rdtscp;
880 UInt eax, ebx, ecx, edx, max_basic, max_extended;
881 ULong xgetbv_0 = 0;
882 HChar vstr[13];
883 vstr[0] = 0;
884
885 if (!VG_(has_cpuid)())
886 /* we can't do cpuid at all. Give up. */
887 return False;
888
889 VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
890 max_basic = eax;
891 if (max_basic < 1)
892 /* we can't ask for cpuid(x) for x > 0. Give up. */
893 return False;
894
895 /* Get processor ID string, and max basic/extended index
896 values. */
897 VG_(memcpy)(&vstr[0], &ebx, 4);
898 VG_(memcpy)(&vstr[4], &edx, 4);
899 VG_(memcpy)(&vstr[8], &ecx, 4);
900 vstr[12] = 0;
901
902 VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
903 max_extended = eax;
904
905 /* get capabilities bits into edx */
906 VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
907
908 // we assume that SSE1 and SSE2 are available by default
909 have_sse3 = (ecx & (1<<0)) != 0; /* True => have sse3 insns */
910 // ssse3 is ecx:9
911 // sse41 is ecx:19
912 // sse42 is ecx:20
913
914 // xsave is ecx:26
915 // osxsave is ecx:27
916 // avx is ecx:28
917 // fma is ecx:12
918 have_avx = False;
919 /* have_fma = False; */
920 if ( (ecx & ((1<<28)|(1<<27)|(1<<26))) == ((1<<28)|(1<<27)|(1<<26)) ) {
921 /* Processor supports AVX instructions and XGETBV is enabled
922 by OS and AVX instructions are enabled by the OS. */
923 ULong w;
924 __asm__ __volatile__("movq $0,%%rcx ; "
925 ".byte 0x0F,0x01,0xD0 ; " /* xgetbv */
926 "movq %%rax,%0"
927 :/*OUT*/"=r"(w) :/*IN*/
928 :/*TRASH*/"rdx","rcx","rax");
929 xgetbv_0 = w;
930 if ((xgetbv_0 & 7) == 7) {
931 /* Only say we have AVX if the XSAVE-allowable
932 bitfield-mask allows x87, SSE and AVX state. We could
933 actually run with a more restrictive XGETBV(0) value,
934 but VEX's implementation of XSAVE and XRSTOR assumes
935 that all 3 bits are enabled.
936
937 Also, the VEX implementation of XSAVE/XRSTOR assumes that
938 state component [2] (the YMM high halves) are located in
939 the XSAVE image at offsets 576 .. 831. So we have to
940 check that here before declaring AVX to be supported. */
941 UInt eax2, ebx2, ecx2, edx2;
942 VG_(cpuid)(0xD, 2, &eax2, &ebx2, &ecx2, &edx2);
943 if (ebx2 == 576 && eax2 == 256) {
944 have_avx = True;
945 }
946 /* have_fma = (ecx & (1<<12)) != 0; */
947 /* have_fma: Probably correct, but gcc complains due to
948 unusedness. */
949 }
950 }
951
952 /* cmpxchg8b is a minimum requirement now; if we don't have it we
953 must simply give up. But all CPUs since Pentium-I have it, so
954 that doesn't seem like much of a restriction. */
955 have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
956 if (!have_cx8)
957 return False;
958
959 /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */
960 have_cx16 = (ecx & (1<<13)) != 0; /* True => have cmpxchg16b */
961
962 /* Figure out if this CPU can do LZCNT. */
963 have_lzcnt = False;
964 if (max_extended >= 0x80000001) {
965 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
966 have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
967 }
968
969 /* Can we do RDTSCP? */
970 have_rdtscp = False;
971 if (max_extended >= 0x80000001) {
972 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
973 have_rdtscp = (edx & (1<<27)) != 0; /* True => have RDTSVCP */
974 }
975
976 /* Check for BMI1 and AVX2. If we have AVX1 (plus OS support). */
977 have_bmi = False;
978 have_avx2 = False;
979 if (have_avx && max_basic >= 7) {
980 VG_(cpuid)(7, 0, &eax, &ebx, &ecx, &edx);
981 have_bmi = (ebx & (1<<3)) != 0; /* True => have BMI1 */
982 have_avx2 = (ebx & (1<<5)) != 0; /* True => have AVX2 */
983 }
984
985 va = VexArchAMD64;
986 vai.endness = VexEndnessLE;
987 vai.hwcaps = (have_sse3 ? VEX_HWCAPS_AMD64_SSE3 : 0)
988 | (have_cx16 ? VEX_HWCAPS_AMD64_CX16 : 0)
989 | (have_lzcnt ? VEX_HWCAPS_AMD64_LZCNT : 0)
990 | (have_avx ? VEX_HWCAPS_AMD64_AVX : 0)
991 | (have_bmi ? VEX_HWCAPS_AMD64_BMI : 0)
992 | (have_avx2 ? VEX_HWCAPS_AMD64_AVX2 : 0)
993 | (have_rdtscp ? VEX_HWCAPS_AMD64_RDTSCP : 0);
994
995 VG_(machine_get_cache_info)(&vai);
996
997 return True;
998 }
999
1000 #elif defined(VGA_ppc32)
1001 {
1002 /* Find out which subset of the ppc32 instruction set is supported by
1003 verifying whether various ppc32 instructions generate a SIGILL
1004 or a SIGFPE. An alternative approach is to check the AT_HWCAP and
1005 AT_PLATFORM entries in the ELF auxiliary table -- see also
1006 the_iifii.client_auxv in m_main.c.
1007 */
1008 vki_sigset_t saved_set, tmp_set;
1009 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1010 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act;
1011
1012 volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
1013 volatile Bool have_isa_2_07;
1014 Int r;
1015
1016 /* This is a kludge. Really we ought to back-convert saved_act
1017 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1018 since that's a no-op on all ppc32 platforms so far supported,
1019 it's not worth the typing effort. At least include most basic
1020 sanity check: */
1021 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1022
1023 VG_(sigemptyset)(&tmp_set);
1024 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1025 VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1026
1027 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1028 vg_assert(r == 0);
1029
1030 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1031 vg_assert(r == 0);
1032 tmp_sigill_act = saved_sigill_act;
1033
1034 r = VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1035 vg_assert(r == 0);
1036 tmp_sigfpe_act = saved_sigfpe_act;
1037
1038 /* NODEFER: signal handler does not return (from the kernel's point of
1039 view), hence if it is to successfully catch a signal more than once,
1040 we need the NODEFER flag. */
1041 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1042 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1043 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1044 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1045 r = VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1046 vg_assert(r == 0);
1047
1048 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1049 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1050 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER;
1051 tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1052 r = VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1053 vg_assert(r == 0);
1054
1055 /* standard FP insns */
1056 have_F = True;
1057 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1058 have_F = False;
1059 } else {
1060 __asm__ __volatile__(".long 0xFC000090"); /*fmr 0,0 */
1061 }
1062
1063 /* Altivec insns */
1064 have_V = True;
1065 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1066 have_V = False;
1067 } else {
1068 /* Unfortunately some older assemblers don't speak Altivec (or
1069 choose not to), so to be safe we directly emit the 32-bit
1070 word corresponding to "vor 0,0,0". This fixes a build
1071 problem that happens on Debian 3.1 (ppc32), and probably
1072 various other places. */
1073 __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
1074 }
1075
1076 /* General-Purpose optional (fsqrt, fsqrts) */
1077 have_FX = True;
1078 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1079 have_FX = False;
1080 } else {
1081 __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */
1082 }
1083
1084 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1085 have_GX = True;
1086 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1087 have_GX = False;
1088 } else {
1089 __asm__ __volatile__(".long 0xFC000034"); /* frsqrte 0,0 */
1090 }
1091
1092 /* VSX support implies Power ISA 2.06 */
1093 have_VX = True;
1094 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1095 have_VX = False;
1096 } else {
1097 __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1098 }
1099
1100 /* Check for Decimal Floating Point (DFP) support. */
1101 have_DFP = True;
1102 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1103 have_DFP = False;
1104 } else {
1105 __asm__ __volatile__(".long 0xee4e8005"); /* dadd FRT,FRA, FRB */
1106 }
1107
1108 /* Check for ISA 2.07 support. */
1109 have_isa_2_07 = True;
1110 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1111 have_isa_2_07 = False;
1112 } else {
1113 __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
1114 }
1115
1116 /* determine dcbz/dcbzl sizes while we still have the signal
1117 * handlers registered */
1118 find_ppc_dcbz_sz(&vai);
1119
1120 r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1121 vg_assert(r == 0);
1122 r = VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
1123 vg_assert(r == 0);
1124 r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1125 vg_assert(r == 0);
1126 VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d\n",
1127 (Int)have_F, (Int)have_V, (Int)have_FX,
1128 (Int)have_GX, (Int)have_VX, (Int)have_DFP,
1129 (Int)have_isa_2_07);
1130 /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */
1131 if (have_V && !have_F)
1132 have_V = False;
1133 if (have_FX && !have_F)
1134 have_FX = False;
1135 if (have_GX && !have_F)
1136 have_GX = False;
1137
1138 VG_(machine_ppc32_has_FP) = have_F ? 1 : 0;
1139 VG_(machine_ppc32_has_VMX) = have_V ? 1 : 0;
1140
1141 va = VexArchPPC32;
1142 vai.endness = VexEndnessBE;
1143
1144 vai.hwcaps = 0;
1145 if (have_F) vai.hwcaps |= VEX_HWCAPS_PPC32_F;
1146 if (have_V) vai.hwcaps |= VEX_HWCAPS_PPC32_V;
1147 if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC32_FX;
1148 if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC32_GX;
1149 if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC32_VX;
1150 if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC32_DFP;
1151 if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC32_ISA2_07;
1152
1153 VG_(machine_get_cache_info)(&vai);
1154
1155 /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be
1156 called before we're ready to go. */
1157 return True;
1158 }
1159
1160 #elif defined(VGA_ppc64be)|| defined(VGA_ppc64le)
1161 {
1162 /* Same instruction set detection algorithm as for ppc32. */
1163 vki_sigset_t saved_set, tmp_set;
1164 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1165 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act;
1166
1167 volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
1168 volatile Bool have_isa_2_07;
1169 Int r;
1170
1171 /* This is a kludge. Really we ought to back-convert saved_act
1172 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1173 since that's a no-op on all ppc64 platforms so far supported,
1174 it's not worth the typing effort. At least include most basic
1175 sanity check: */
1176 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1177
1178 VG_(sigemptyset)(&tmp_set);
1179 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1180 VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1181
1182 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1183 vg_assert(r == 0);
1184
1185 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1186 vg_assert(r == 0);
1187 tmp_sigill_act = saved_sigill_act;
1188
1189 VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1190 tmp_sigfpe_act = saved_sigfpe_act;
1191
1192 /* NODEFER: signal handler does not return (from the kernel's point of
1193 view), hence if it is to successfully catch a signal more than once,
1194 we need the NODEFER flag. */
1195 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1196 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1197 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1198 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1199 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1200
1201 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1202 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1203 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER;
1204 tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1205 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1206
1207 /* standard FP insns */
1208 have_F = True;
1209 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1210 have_F = False;
1211 } else {
1212 __asm__ __volatile__("fmr 0,0");
1213 }
1214
1215 /* Altivec insns */
1216 have_V = True;
1217 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1218 have_V = False;
1219 } else {
1220 __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
1221 }
1222
1223 /* General-Purpose optional (fsqrt, fsqrts) */
1224 have_FX = True;
1225 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1226 have_FX = False;
1227 } else {
1228 __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/
1229 }
1230
1231 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1232 have_GX = True;
1233 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1234 have_GX = False;
1235 } else {
1236 __asm__ __volatile__(".long 0xFC000034"); /*frsqrte 0,0*/
1237 }
1238
1239 /* VSX support implies Power ISA 2.06 */
1240 have_VX = True;
1241 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1242 have_VX = False;
1243 } else {
1244 __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1245 }
1246
1247 /* Check for Decimal Floating Point (DFP) support. */
1248 have_DFP = True;
1249 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1250 have_DFP = False;
1251 } else {
1252 __asm__ __volatile__(".long 0xee4e8005"); /* dadd FRT,FRA, FRB */
1253 }
1254
1255 /* Check for ISA 2.07 support. */
1256 have_isa_2_07 = True;
1257 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1258 have_isa_2_07 = False;
1259 } else {
1260 __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
1261 }
1262
1263 /* determine dcbz/dcbzl sizes while we still have the signal
1264 * handlers registered */
1265 find_ppc_dcbz_sz(&vai);
1266
1267 VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1268 VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
1269 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1270 VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d\n",
1271 (Int)have_F, (Int)have_V, (Int)have_FX,
1272 (Int)have_GX, (Int)have_VX, (Int)have_DFP,
1273 (Int)have_isa_2_07);
1274 /* on ppc64be, if we don't even have FP, just give up. */
1275 if (!have_F)
1276 return False;
1277
1278 VG_(machine_ppc64_has_VMX) = have_V ? 1 : 0;
1279
1280 va = VexArchPPC64;
1281 # if defined(VKI_LITTLE_ENDIAN)
1282 vai.endness = VexEndnessLE;
1283 # elif defined(VKI_BIG_ENDIAN)
1284 vai.endness = VexEndnessBE;
1285 # else
1286 vai.endness = VexEndness_INVALID;
1287 # endif
1288
1289 vai.hwcaps = 0;
1290 if (have_V) vai.hwcaps |= VEX_HWCAPS_PPC64_V;
1291 if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC64_FX;
1292 if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC64_GX;
1293 if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC64_VX;
1294 if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC64_DFP;
1295 if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC64_ISA2_07;
1296
1297 VG_(machine_get_cache_info)(&vai);
1298
1299 /* But we're not done yet: VG_(machine_ppc64_set_clszB) must be
1300 called before we're ready to go. */
1301 return True;
1302 }
1303
1304 #elif defined(VGA_s390x)
1305
1306 # include "libvex_s390x_common.h"
1307
1308 {
1309 /* Instruction set detection code borrowed from ppc above. */
1310 vki_sigset_t saved_set, tmp_set;
1311 vki_sigaction_fromK_t saved_sigill_act;
1312 vki_sigaction_toK_t tmp_sigill_act;
1313
1314 volatile Bool have_LDISP, have_STFLE;
1315 Int i, r, model;
1316
1317 /* If the model is "unknown" don't treat this as an error. Assume
1318 this is a brand-new machine model for which we don't have the
1319 identification yet. Keeping fingers crossed. */
1320 model = VG_(get_machine_model)();
1321
1322 /* Unblock SIGILL and stash away the old action for that signal */
1323 VG_(sigemptyset)(&tmp_set);
1324 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1325
1326 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1327 vg_assert(r == 0);
1328
1329 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1330 vg_assert(r == 0);
1331 tmp_sigill_act = saved_sigill_act;
1332
1333 /* NODEFER: signal handler does not return (from the kernel's point of
1334 view), hence if it is to successfully catch a signal more than once,
1335 we need the NODEFER flag. */
1336 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1337 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1338 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1339 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1340 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1341
1342 /* Determine hwcaps. Note, we cannot use the stfle insn because it
1343 is not supported on z900. */
1344
1345 have_LDISP = True;
1346 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1347 have_LDISP = False;
1348 } else {
1349 /* BASR loads the address of the next insn into r1. Needed to avoid
1350 a segfault in XY. */
1351 __asm__ __volatile__("basr %%r1,%%r0\n\t"
1352 ".long 0xe3001000\n\t" /* XY 0,0(%r1) */
1353 ".short 0x0057" : : : "r0", "r1", "cc", "memory");
1354 }
1355
1356 /* Check availability of STFLE. If available store facility bits
1357 in hoststfle. */
1358 ULong hoststfle[S390_NUM_FACILITY_DW];
1359
1360 for (i = 0; i < S390_NUM_FACILITY_DW; ++i)
1361 hoststfle[i] = 0;
1362
1363 have_STFLE = True;
1364 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1365 have_STFLE = False;
1366 } else {
1367 register ULong reg0 asm("0") = S390_NUM_FACILITY_DW - 1;
1368
1369 __asm__ __volatile__(" .insn s,0xb2b00000,%0\n" /* stfle */
1370 : "=m" (hoststfle), "+d"(reg0)
1371 : : "cc", "memory");
1372 }
1373
1374 /* Restore signals */
1375 r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1376 vg_assert(r == 0);
1377 r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1378 vg_assert(r == 0);
1379 va = VexArchS390X;
1380 vai.endness = VexEndnessBE;
1381
1382 vai.hwcaps = model;
1383 if (have_STFLE) vai.hwcaps |= VEX_HWCAPS_S390X_STFLE;
1384 if (have_LDISP) {
1385 /* Use long displacement only on machines >= z990. For all other
1386 machines it is millicoded and therefore slow. */
1387 if (model >= VEX_S390X_MODEL_Z990)
1388 vai.hwcaps |= VEX_HWCAPS_S390X_LDISP;
1389 }
1390
1391 /* Detect presence of certain facilities using the STFLE insn.
1392 Note, that these facilities were introduced at the same time or later
1393 as STFLE, so the absence of STLFE implies the absence of the facility
1394 we're trying to detect. */
1395 struct fac_hwcaps_map {
1396 UInt installed;
1397 UInt facility_bit;
1398 UInt hwcaps_bit;
1399 const HChar name[6]; // may need adjustment for new facility names
1400 } fac_hwcaps[] = {
1401 { False, S390_FAC_EIMM, VEX_HWCAPS_S390X_EIMM, "EIMM" },
1402 { False, S390_FAC_GIE, VEX_HWCAPS_S390X_GIE, "GIE" },
1403 { False, S390_FAC_DFP, VEX_HWCAPS_S390X_DFP, "DFP" },
1404 { False, S390_FAC_FPSE, VEX_HWCAPS_S390X_FGX, "FGX" },
1405 { False, S390_FAC_ETF2, VEX_HWCAPS_S390X_ETF2, "ETF2" },
1406 { False, S390_FAC_ETF3, VEX_HWCAPS_S390X_ETF3, "ETF3" },
1407 { False, S390_FAC_STCKF, VEX_HWCAPS_S390X_STCKF, "STCKF" },
1408 { False, S390_FAC_FPEXT, VEX_HWCAPS_S390X_FPEXT, "FPEXT" },
1409 { False, S390_FAC_LSC, VEX_HWCAPS_S390X_LSC, "LSC" },
1410 { False, S390_FAC_PFPO, VEX_HWCAPS_S390X_PFPO, "PFPO" },
1411 };
1412
1413 /* Set hwcaps according to the detected facilities */
1414 for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) {
1415 vg_assert(fac_hwcaps[i].facility_bit <= 63); // for now
1416 if (hoststfle[0] & (1ULL << (63 - fac_hwcaps[i].facility_bit))) {
1417 fac_hwcaps[i].installed = True;
1418 vai.hwcaps |= fac_hwcaps[i].hwcaps_bit;
1419 }
1420 }
1421
1422 /* Build up a string showing the probed-for facilities */
1423 HChar fac_str[(sizeof fac_hwcaps / sizeof fac_hwcaps[0]) *
1424 (sizeof fac_hwcaps[0].name + 3) + // %s %d
1425 7 + 1 + 4 + 2 // machine %4d
1426 + 1]; // \0
1427 HChar *p = fac_str;
1428 p += VG_(sprintf)(p, "machine %4d ", model);
1429 for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) {
1430 p += VG_(sprintf)(p, " %s %1u", fac_hwcaps[i].name,
1431 fac_hwcaps[i].installed);
1432 }
1433 *p++ = '\0';
1434
1435 VG_(debugLog)(1, "machine", "%s\n", fac_str);
1436 VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
1437
1438 VG_(machine_get_cache_info)(&vai);
1439
1440 return True;
1441 }
1442
1443 #elif defined(VGA_arm)
1444 {
1445 /* Same instruction set detection algorithm as for ppc32. */
1446 vki_sigset_t saved_set, tmp_set;
1447 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1448 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act;
1449
1450 volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON;
1451 volatile Int archlevel;
1452 Int r;
1453
1454 /* This is a kludge. Really we ought to back-convert saved_act
1455 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1456 since that's a no-op on all ppc64 platforms so far supported,
1457 it's not worth the typing effort. At least include most basic
1458 sanity check: */
1459 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1460
1461 VG_(sigemptyset)(&tmp_set);
1462 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1463 VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1464
1465 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1466 vg_assert(r == 0);
1467
1468 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1469 vg_assert(r == 0);
1470 tmp_sigill_act = saved_sigill_act;
1471
1472 VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1473 tmp_sigfpe_act = saved_sigfpe_act;
1474
1475 /* NODEFER: signal handler does not return (from the kernel's point of
1476 view), hence if it is to successfully catch a signal more than once,
1477 we need the NODEFER flag. */
1478 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1479 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1480 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1481 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1482 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1483
1484 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1485 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1486 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER;
1487 tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1488 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1489
1490 /* VFP insns */
1491 have_VFP = True;
1492 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1493 have_VFP = False;
1494 } else {
1495 __asm__ __volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */
1496 }
1497 /* There are several generation of VFP extension but they differs very
1498 little so for now we will not distinguish them. */
1499 have_VFP2 = have_VFP;
1500 have_VFP3 = have_VFP;
1501
1502 /* NEON insns */
1503 have_NEON = True;
1504 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1505 have_NEON = False;
1506 } else {
1507 __asm__ __volatile__(".word 0xF2244154"); /* VMOV q2, q2 */
1508 }
1509
1510 /* ARM architecture level */
1511 archlevel = 5; /* v5 will be base level */
1512 if (archlevel < 7) {
1513 archlevel = 7;
1514 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1515 archlevel = 5;
1516 } else {
1517 __asm__ __volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */
1518 }
1519 }
1520 if (archlevel < 6) {
1521 archlevel = 6;
1522 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1523 archlevel = 5;
1524 } else {
1525 __asm__ __volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */
1526 }
1527 }
1528
1529 VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
1530 VG_(convert_sigaction_fromK_to_toK)(&saved_sigfpe_act, &tmp_sigfpe_act);
1531 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1532 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1533 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1534
1535 VG_(debugLog)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n",
1536 archlevel, (Int)have_VFP, (Int)have_VFP2, (Int)have_VFP3,
1537 (Int)have_NEON);
1538
1539 VG_(machine_arm_archlevel) = archlevel;
1540
1541 va = VexArchARM;
1542 vai.endness = VexEndnessLE;
1543
1544 vai.hwcaps = VEX_ARM_ARCHLEVEL(archlevel);
1545 if (have_VFP3) vai.hwcaps |= VEX_HWCAPS_ARM_VFP3;
1546 if (have_VFP2) vai.hwcaps |= VEX_HWCAPS_ARM_VFP2;
1547 if (have_VFP) vai.hwcaps |= VEX_HWCAPS_ARM_VFP;
1548 if (have_NEON) vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1549
1550 VG_(machine_get_cache_info)(&vai);
1551
1552 return True;
1553 }
1554
1555 #elif defined(VGA_arm64)
1556 {
1557 va = VexArchARM64;
1558 vai.endness = VexEndnessLE;
1559
1560 /* So far there are no variants. */
1561 vai.hwcaps = 0;
1562
1563 VG_(machine_get_cache_info)(&vai);
1564
1565 /* 0 denotes 'not set'. The range of legitimate values here,
1566 after being set that is, is 2 though 17 inclusive. */
1567 vg_assert(vai.arm64_dMinLine_lg2_szB == 0);
1568 vg_assert(vai.arm64_iMinLine_lg2_szB == 0);
1569 ULong ctr_el0;
1570 __asm__ __volatile__("mrs %0, ctr_el0" : "=r"(ctr_el0));
1571 vai.arm64_dMinLine_lg2_szB = ((ctr_el0 >> 16) & 0xF) + 2;
1572 vai.arm64_iMinLine_lg2_szB = ((ctr_el0 >> 0) & 0xF) + 2;
1573 VG_(debugLog)(1, "machine", "ARM64: ctr_el0.dMinLine_szB = %d, "
1574 "ctr_el0.iMinLine_szB = %d\n",
1575 1 << vai.arm64_dMinLine_lg2_szB,
1576 1 << vai.arm64_iMinLine_lg2_szB);
1577
1578 return True;
1579 }
1580
1581 #elif defined(VGA_mips32)
1582 {
1583 /* Define the position of F64 bit in FIR register. */
1584 # define FP64 22
1585 va = VexArchMIPS32;
1586 UInt model = VG_(get_machine_model)();
1587 if (model == -1)
1588 return False;
1589
1590 vai.hwcaps = model;
1591
1592 # if defined(VKI_LITTLE_ENDIAN)
1593 vai.endness = VexEndnessLE;
1594 # elif defined(VKI_BIG_ENDIAN)
1595 vai.endness = VexEndnessBE;
1596 # else
1597 vai.endness = VexEndness_INVALID;
1598 # endif
1599
1600 /* Same instruction set detection algorithm as for ppc32/arm... */
1601 vki_sigset_t saved_set, tmp_set;
1602 vki_sigaction_fromK_t saved_sigill_act;
1603 vki_sigaction_toK_t tmp_sigill_act;
1604
1605 volatile Bool have_DSP, have_DSPr2;
1606 Int r;
1607
1608 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1609
1610 VG_(sigemptyset)(&tmp_set);
1611 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1612
1613 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1614 vg_assert(r == 0);
1615
1616 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1617 vg_assert(r == 0);
1618 tmp_sigill_act = saved_sigill_act;
1619
1620 /* NODEFER: signal handler does not return (from the kernel's point of
1621 view), hence if it is to successfully catch a signal more than once,
1622 we need the NODEFER flag. */
1623 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1624 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1625 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1626 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1627 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1628
1629 if (model == VEX_PRID_COMP_MIPS) {
1630 /* DSPr2 instructions. */
1631 have_DSPr2 = True;
1632 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1633 have_DSPr2 = False;
1634 } else {
1635 __asm__ __volatile__(".word 0x7d095351"); /* precr.qb.ph t2, t0, t1 */
1636 }
1637 if (have_DSPr2) {
1638 /* We assume it's 74K, since it can run DSPr2. */
1639 vai.hwcaps |= VEX_PRID_IMP_74K;
1640 } else {
1641 /* DSP instructions. */
1642 have_DSP = True;
1643 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1644 have_DSP = False;
1645 } else {
1646 __asm__ __volatile__(".word 0x7c3f44b8"); /* rddsp t0, 0x3f */
1647 }
1648 if (have_DSP) {
1649 /* We assume it's 34K, since it has support for DSP. */
1650 vai.hwcaps |= VEX_PRID_IMP_34K;
1651 }
1652 }
1653 }
1654
1655 /* Check if CPU has FPU and 32 dbl. prec. FP registers */
1656 int FIR = 0;
1657 __asm__ __volatile__(
1658 "cfc1 %0, $0" "\n\t"
1659 : "=r" (FIR)
1660 );
1661 if (FIR & (1 << FP64)) {
1662 vai.hwcaps |= VEX_PRID_CPU_32FPR;
1663 }
1664
1665 VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
1666 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1667 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1668
1669 VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
1670 VG_(machine_get_cache_info)(&vai);
1671
1672 return True;
1673 }
1674
1675 #elif defined(VGA_mips64)
1676 {
1677 va = VexArchMIPS64;
1678 UInt model = VG_(get_machine_model)();
1679 if (model == -1)
1680 return False;
1681
1682 vai.hwcaps = model;
1683
1684 # if defined(VKI_LITTLE_ENDIAN)
1685 vai.endness = VexEndnessLE;
1686 # elif defined(VKI_BIG_ENDIAN)
1687 vai.endness = VexEndnessBE;
1688 # else
1689 vai.endness = VexEndness_INVALID;
1690 # endif
1691
1692 VG_(machine_get_cache_info)(&vai);
1693
1694 return True;
1695 }
1696
1697 #elif defined(VGA_tilegx)
1698 {
1699 va = VexArchTILEGX;
1700 vai.hwcaps = VEX_HWCAPS_TILEGX_BASE;
1701 vai.endness = VexEndnessLE;
1702
1703 VG_(machine_get_cache_info)(&vai);
1704
1705 return True;
1706 }
1707
1708 #else
1709 # error "Unknown arch"
1710 #endif
1711 }
1712
1713 /* Notify host cpu instruction cache line size. */
1714 #if defined(VGA_ppc32)
VG_(machine_ppc32_set_clszB)1715 void VG_(machine_ppc32_set_clszB)( Int szB )
1716 {
1717 vg_assert(hwcaps_done);
1718
1719 /* Either the value must not have been set yet (zero) or we can
1720 tolerate it being set to the same value multiple times, as the
1721 stack scanning logic in m_main is a bit stupid. */
1722 vg_assert(vai.ppc_icache_line_szB == 0
1723 || vai.ppc_icache_line_szB == szB);
1724
1725 vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128);
1726 vai.ppc_icache_line_szB = szB;
1727 }
1728 #endif
1729
1730
1731 /* Notify host cpu instruction cache line size. */
1732 #if defined(VGA_ppc64be)|| defined(VGA_ppc64le)
VG_(machine_ppc64_set_clszB)1733 void VG_(machine_ppc64_set_clszB)( Int szB )
1734 {
1735 vg_assert(hwcaps_done);
1736
1737 /* Either the value must not have been set yet (zero) or we can
1738 tolerate it being set to the same value multiple times, as the
1739 stack scanning logic in m_main is a bit stupid. */
1740 vg_assert(vai.ppc_icache_line_szB == 0
1741 || vai.ppc_icache_line_szB == szB);
1742
1743 vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128);
1744 vai.ppc_icache_line_szB = szB;
1745 }
1746 #endif
1747
1748
1749 /* Notify host's ability to handle NEON instructions. */
1750 #if defined(VGA_arm)
VG_(machine_arm_set_has_NEON)1751 void VG_(machine_arm_set_has_NEON)( Bool has_neon )
1752 {
1753 vg_assert(hwcaps_done);
1754 /* There's nothing else we can sanity check. */
1755
1756 if (has_neon) {
1757 vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1758 } else {
1759 vai.hwcaps &= ~VEX_HWCAPS_ARM_NEON;
1760 }
1761 }
1762 #endif
1763
1764
1765 /* Fetch host cpu info, once established. */
VG_(machine_get_VexArchInfo)1766 void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa,
1767 /*OUT*/VexArchInfo* pVai )
1768 {
1769 vg_assert(hwcaps_done);
1770 if (pVa) *pVa = va;
1771 if (pVai) *pVai = vai;
1772 }
1773
1774
1775 /* Returns the size of the largest guest register that we will
1776 simulate in this run. This depends on both the guest architecture
1777 and on the specific capabilities we are simulating for that guest
1778 (eg, AVX or non-AVX ?, for amd64). Should return either 4, 8, 16
1779 or 32. General rule: if in doubt, return a value larger than
1780 reality.
1781
1782 This information is needed by Cachegrind and Callgrind to decide
1783 what the minimum cache line size they are prepared to simulate is.
1784 Basically require that the minimum cache line size is at least as
1785 large as the largest register that might get transferred to/from
1786 memory, so as to guarantee that any such transaction can straddle
1787 at most 2 cache lines.
1788 */
VG_(machine_get_size_of_largest_guest_register)1789 Int VG_(machine_get_size_of_largest_guest_register) ( void )
1790 {
1791 vg_assert(hwcaps_done);
1792 /* Once hwcaps_done is True, we can fish around inside va/vai to
1793 find the information we need. */
1794
1795 # if defined(VGA_x86)
1796 vg_assert(va == VexArchX86);
1797 /* We don't support AVX, so 32 is out. At the other end, even if
1798 we don't support any SSE, the X87 can generate 10 byte
1799 transfers, so let's say 16 to be on the safe side. Hence the
1800 answer is always 16. */
1801 return 16;
1802
1803 # elif defined(VGA_amd64)
1804 /* if AVX then 32 else 16 */
1805 return (vai.hwcaps & VEX_HWCAPS_AMD64_AVX) ? 32 : 16;
1806
1807 # elif defined(VGA_ppc32)
1808 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
1809 if (vai.hwcaps & VEX_HWCAPS_PPC32_V) return 16;
1810 if (vai.hwcaps & VEX_HWCAPS_PPC32_VX) return 16;
1811 if (vai.hwcaps & VEX_HWCAPS_PPC32_DFP) return 16;
1812 return 8;
1813
1814 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
1815 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
1816 if (vai.hwcaps & VEX_HWCAPS_PPC64_V) return 16;
1817 if (vai.hwcaps & VEX_HWCAPS_PPC64_VX) return 16;
1818 if (vai.hwcaps & VEX_HWCAPS_PPC64_DFP) return 16;
1819 return 8;
1820
1821 # elif defined(VGA_s390x)
1822 return 8;
1823
1824 # elif defined(VGA_arm)
1825 /* Really it depends whether or not we have NEON, but let's just
1826 assume we always do. */
1827 return 16;
1828
1829 # elif defined(VGA_arm64)
1830 /* ARM64 always has Neon, AFAICS. */
1831 return 16;
1832
1833 # elif defined(VGA_mips32)
1834 /* The guest state implies 4, but that can't really be true, can
1835 it? */
1836 return 8;
1837
1838 # elif defined(VGA_mips64)
1839 return 8;
1840
1841 # elif defined(VGA_tilegx)
1842 return 8;
1843
1844 # else
1845 # error "Unknown arch"
1846 # endif
1847 }
1848
1849
1850 // Given a pointer to a function as obtained by "& functionname" in C,
1851 // produce a pointer to the actual entry point for the function.
VG_(fnptr_to_fnentry)1852 void* VG_(fnptr_to_fnentry)( void* f )
1853 {
1854 # if defined(VGP_x86_linux) || defined(VGP_amd64_linux) \
1855 || defined(VGP_arm_linux) || defined(VGO_darwin) \
1856 || defined(VGP_ppc32_linux) || defined(VGP_ppc64le_linux) \
1857 || defined(VGP_s390x_linux) || defined(VGP_mips32_linux) \
1858 || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \
1859 || defined(VGP_tilegx_linux) || defined(VGP_x86_solaris) \
1860 || defined(VGP_amd64_solaris)
1861 return f;
1862 # elif defined(VGP_ppc64be_linux)
1863 /* ppc64-linux uses the AIX scheme, in which f is a pointer to a
1864 3-word function descriptor, of which the first word is the entry
1865 address. */
1866 UWord* descr = (UWord*)f;
1867 return (void*)(descr[0]);
1868 # else
1869 # error "Unknown platform"
1870 # endif
1871 }
1872
1873 /*--------------------------------------------------------------------*/
1874 /*--- end ---*/
1875 /*--------------------------------------------------------------------*/
1876