1 /*--------------------------------------------------------------------*/
2 /*--- Machine-related stuff. m_machine.c ---*/
3 /*--------------------------------------------------------------------*/
4
5 /*
6 This file is part of Valgrind, a dynamic binary instrumentation
7 framework.
8
9 Copyright (C) 2000-2013 Julian Seward
10 jseward@acm.org
11
12 This program is free software; you can redistribute it and/or
13 modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation; either version 2 of the
15 License, or (at your option) any later version.
16
17 This program is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
21
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, write to the Free Software
24 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25 02111-1307, USA.
26
27 The GNU General Public License is contained in the file COPYING.
28 */
29
30 #include "pub_core_basics.h"
31 #include "pub_core_vki.h"
32 #include "pub_core_threadstate.h"
33 #include "pub_core_libcassert.h"
34 #include "pub_core_libcbase.h"
35 #include "pub_core_libcfile.h"
36 #include "pub_core_libcprint.h"
37 #include "pub_core_mallocfree.h"
38 #include "pub_core_machine.h"
39 #include "pub_core_cpuid.h"
40 #include "pub_core_libcsignal.h" // for ppc32 messing with SIGILL and SIGFPE
41 #include "pub_core_debuglog.h"
42
43
44 #define INSTR_PTR(regs) ((regs).vex.VG_INSTR_PTR)
45 #define STACK_PTR(regs) ((regs).vex.VG_STACK_PTR)
46 #define FRAME_PTR(regs) ((regs).vex.VG_FRAME_PTR)
47
VG_(get_IP)48 Addr VG_(get_IP) ( ThreadId tid ) {
49 return INSTR_PTR( VG_(threads)[tid].arch );
50 }
VG_(get_SP)51 Addr VG_(get_SP) ( ThreadId tid ) {
52 return STACK_PTR( VG_(threads)[tid].arch );
53 }
VG_(get_FP)54 Addr VG_(get_FP) ( ThreadId tid ) {
55 return FRAME_PTR( VG_(threads)[tid].arch );
56 }
57
VG_(set_IP)58 void VG_(set_IP) ( ThreadId tid, Addr ip ) {
59 INSTR_PTR( VG_(threads)[tid].arch ) = ip;
60 }
VG_(set_SP)61 void VG_(set_SP) ( ThreadId tid, Addr sp ) {
62 STACK_PTR( VG_(threads)[tid].arch ) = sp;
63 }
64
VG_(get_UnwindStartRegs)65 void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs,
66 ThreadId tid )
67 {
68 # if defined(VGA_x86)
69 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_EIP;
70 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_ESP;
71 regs->misc.X86.r_ebp
72 = VG_(threads)[tid].arch.vex.guest_EBP;
73 # elif defined(VGA_amd64)
74 regs->r_pc = VG_(threads)[tid].arch.vex.guest_RIP;
75 regs->r_sp = VG_(threads)[tid].arch.vex.guest_RSP;
76 regs->misc.AMD64.r_rbp
77 = VG_(threads)[tid].arch.vex.guest_RBP;
78 # elif defined(VGA_ppc32)
79 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_CIA;
80 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_GPR1;
81 regs->misc.PPC32.r_lr
82 = VG_(threads)[tid].arch.vex.guest_LR;
83 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
84 regs->r_pc = VG_(threads)[tid].arch.vex.guest_CIA;
85 regs->r_sp = VG_(threads)[tid].arch.vex.guest_GPR1;
86 regs->misc.PPC64.r_lr
87 = VG_(threads)[tid].arch.vex.guest_LR;
88 # elif defined(VGA_arm)
89 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_R15T;
90 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_R13;
91 regs->misc.ARM.r14
92 = VG_(threads)[tid].arch.vex.guest_R14;
93 regs->misc.ARM.r12
94 = VG_(threads)[tid].arch.vex.guest_R12;
95 regs->misc.ARM.r11
96 = VG_(threads)[tid].arch.vex.guest_R11;
97 regs->misc.ARM.r7
98 = VG_(threads)[tid].arch.vex.guest_R7;
99 # elif defined(VGA_arm64)
100 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
101 regs->r_sp = VG_(threads)[tid].arch.vex.guest_XSP;
102 regs->misc.ARM64.x29 = VG_(threads)[tid].arch.vex.guest_X29;
103 regs->misc.ARM64.x30 = VG_(threads)[tid].arch.vex.guest_X30;
104 # elif defined(VGA_s390x)
105 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_IA;
106 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_SP;
107 regs->misc.S390X.r_fp
108 = VG_(threads)[tid].arch.vex.guest_FP;
109 regs->misc.S390X.r_lr
110 = VG_(threads)[tid].arch.vex.guest_LR;
111 # elif defined(VGA_mips32)
112 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
113 regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
114 regs->misc.MIPS32.r30
115 = VG_(threads)[tid].arch.vex.guest_r30;
116 regs->misc.MIPS32.r31
117 = VG_(threads)[tid].arch.vex.guest_r31;
118 regs->misc.MIPS32.r28
119 = VG_(threads)[tid].arch.vex.guest_r28;
120 # elif defined(VGA_mips64)
121 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
122 regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
123 regs->misc.MIPS64.r30
124 = VG_(threads)[tid].arch.vex.guest_r30;
125 regs->misc.MIPS64.r31
126 = VG_(threads)[tid].arch.vex.guest_r31;
127 regs->misc.MIPS64.r28
128 = VG_(threads)[tid].arch.vex.guest_r28;
129 # elif defined(VGA_tilegx)
130 regs->r_pc = VG_(threads)[tid].arch.vex.guest_pc;
131 regs->r_sp = VG_(threads)[tid].arch.vex.guest_r54;
132 regs->misc.TILEGX.r52
133 = VG_(threads)[tid].arch.vex.guest_r52;
134 regs->misc.TILEGX.r55
135 = VG_(threads)[tid].arch.vex.guest_r55;
136 # else
137 # error "Unknown arch"
138 # endif
139 }
140
141 void
VG_(get_shadow_regs_area)142 VG_(get_shadow_regs_area) ( ThreadId tid,
143 /*DST*/UChar* dst,
144 /*SRC*/Int shadowNo, PtrdiffT offset, SizeT size )
145 {
146 void* src;
147 ThreadState* tst;
148 vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
149 vg_assert(VG_(is_valid_tid)(tid));
150 // Bounds check
151 vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
152 vg_assert(offset + size <= sizeof(VexGuestArchState));
153 // Copy
154 tst = & VG_(threads)[tid];
155 src = NULL;
156 switch (shadowNo) {
157 case 0: src = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
158 case 1: src = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
159 case 2: src = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
160 }
161 vg_assert(src != NULL);
162 VG_(memcpy)( dst, src, size);
163 }
164
165 void
VG_(set_shadow_regs_area)166 VG_(set_shadow_regs_area) ( ThreadId tid,
167 /*DST*/Int shadowNo, PtrdiffT offset, SizeT size,
168 /*SRC*/const UChar* src )
169 {
170 void* dst;
171 ThreadState* tst;
172 vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
173 vg_assert(VG_(is_valid_tid)(tid));
174 // Bounds check
175 vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
176 vg_assert(offset + size <= sizeof(VexGuestArchState));
177 // Copy
178 tst = & VG_(threads)[tid];
179 dst = NULL;
180 switch (shadowNo) {
181 case 0: dst = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
182 case 1: dst = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
183 case 2: dst = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
184 }
185 vg_assert(dst != NULL);
186 VG_(memcpy)( dst, src, size);
187 }
188
189
apply_to_GPs_of_tid(ThreadId tid,void (* f)(ThreadId,const HChar *,Addr))190 static void apply_to_GPs_of_tid(ThreadId tid, void (*f)(ThreadId,
191 const HChar*, Addr))
192 {
193 VexGuestArchState* vex = &(VG_(get_ThreadState)(tid)->arch.vex);
194 VG_(debugLog)(2, "machine", "apply_to_GPs_of_tid %d\n", tid);
195 #if defined(VGA_x86)
196 (*f)(tid, "EAX", vex->guest_EAX);
197 (*f)(tid, "ECX", vex->guest_ECX);
198 (*f)(tid, "EDX", vex->guest_EDX);
199 (*f)(tid, "EBX", vex->guest_EBX);
200 (*f)(tid, "ESI", vex->guest_ESI);
201 (*f)(tid, "EDI", vex->guest_EDI);
202 (*f)(tid, "ESP", vex->guest_ESP);
203 (*f)(tid, "EBP", vex->guest_EBP);
204 #elif defined(VGA_amd64)
205 (*f)(tid, "RAX", vex->guest_RAX);
206 (*f)(tid, "RCX", vex->guest_RCX);
207 (*f)(tid, "RDX", vex->guest_RDX);
208 (*f)(tid, "RBX", vex->guest_RBX);
209 (*f)(tid, "RSI", vex->guest_RSI);
210 (*f)(tid, "RDI", vex->guest_RDI);
211 (*f)(tid, "RSP", vex->guest_RSP);
212 (*f)(tid, "RBP", vex->guest_RBP);
213 (*f)(tid, "R8" , vex->guest_R8 );
214 (*f)(tid, "R9" , vex->guest_R9 );
215 (*f)(tid, "R10", vex->guest_R10);
216 (*f)(tid, "R11", vex->guest_R11);
217 (*f)(tid, "R12", vex->guest_R12);
218 (*f)(tid, "R13", vex->guest_R13);
219 (*f)(tid, "R14", vex->guest_R14);
220 (*f)(tid, "R15", vex->guest_R15);
221 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
222 (*f)(tid, "GPR0" , vex->guest_GPR0 );
223 (*f)(tid, "GPR1" , vex->guest_GPR1 );
224 (*f)(tid, "GPR2" , vex->guest_GPR2 );
225 (*f)(tid, "GPR3" , vex->guest_GPR3 );
226 (*f)(tid, "GPR4" , vex->guest_GPR4 );
227 (*f)(tid, "GPR5" , vex->guest_GPR5 );
228 (*f)(tid, "GPR6" , vex->guest_GPR6 );
229 (*f)(tid, "GPR7" , vex->guest_GPR7 );
230 (*f)(tid, "GPR8" , vex->guest_GPR8 );
231 (*f)(tid, "GPR9" , vex->guest_GPR9 );
232 (*f)(tid, "GPR10", vex->guest_GPR10);
233 (*f)(tid, "GPR11", vex->guest_GPR11);
234 (*f)(tid, "GPR12", vex->guest_GPR12);
235 (*f)(tid, "GPR13", vex->guest_GPR13);
236 (*f)(tid, "GPR14", vex->guest_GPR14);
237 (*f)(tid, "GPR15", vex->guest_GPR15);
238 (*f)(tid, "GPR16", vex->guest_GPR16);
239 (*f)(tid, "GPR17", vex->guest_GPR17);
240 (*f)(tid, "GPR18", vex->guest_GPR18);
241 (*f)(tid, "GPR19", vex->guest_GPR19);
242 (*f)(tid, "GPR20", vex->guest_GPR20);
243 (*f)(tid, "GPR21", vex->guest_GPR21);
244 (*f)(tid, "GPR22", vex->guest_GPR22);
245 (*f)(tid, "GPR23", vex->guest_GPR23);
246 (*f)(tid, "GPR24", vex->guest_GPR24);
247 (*f)(tid, "GPR25", vex->guest_GPR25);
248 (*f)(tid, "GPR26", vex->guest_GPR26);
249 (*f)(tid, "GPR27", vex->guest_GPR27);
250 (*f)(tid, "GPR28", vex->guest_GPR28);
251 (*f)(tid, "GPR29", vex->guest_GPR29);
252 (*f)(tid, "GPR30", vex->guest_GPR30);
253 (*f)(tid, "GPR31", vex->guest_GPR31);
254 (*f)(tid, "CTR" , vex->guest_CTR );
255 (*f)(tid, "LR" , vex->guest_LR );
256 #elif defined(VGA_arm)
257 (*f)(tid, "R0" , vex->guest_R0 );
258 (*f)(tid, "R1" , vex->guest_R1 );
259 (*f)(tid, "R2" , vex->guest_R2 );
260 (*f)(tid, "R3" , vex->guest_R3 );
261 (*f)(tid, "R4" , vex->guest_R4 );
262 (*f)(tid, "R5" , vex->guest_R5 );
263 (*f)(tid, "R6" , vex->guest_R6 );
264 (*f)(tid, "R8" , vex->guest_R8 );
265 (*f)(tid, "R9" , vex->guest_R9 );
266 (*f)(tid, "R10", vex->guest_R10);
267 (*f)(tid, "R11", vex->guest_R11);
268 (*f)(tid, "R12", vex->guest_R12);
269 (*f)(tid, "R13", vex->guest_R13);
270 (*f)(tid, "R14", vex->guest_R14);
271 #elif defined(VGA_s390x)
272 (*f)(tid, "r0" , vex->guest_r0 );
273 (*f)(tid, "r1" , vex->guest_r1 );
274 (*f)(tid, "r2" , vex->guest_r2 );
275 (*f)(tid, "r3" , vex->guest_r3 );
276 (*f)(tid, "r4" , vex->guest_r4 );
277 (*f)(tid, "r5" , vex->guest_r5 );
278 (*f)(tid, "r6" , vex->guest_r6 );
279 (*f)(tid, "r7" , vex->guest_r7 );
280 (*f)(tid, "r8" , vex->guest_r8 );
281 (*f)(tid, "r9" , vex->guest_r9 );
282 (*f)(tid, "r10", vex->guest_r10);
283 (*f)(tid, "r11", vex->guest_r11);
284 (*f)(tid, "r12", vex->guest_r12);
285 (*f)(tid, "r13", vex->guest_r13);
286 (*f)(tid, "r14", vex->guest_r14);
287 (*f)(tid, "r15", vex->guest_r15);
288 #elif defined(VGA_mips32) || defined(VGA_mips64)
289 (*f)(tid, "r0" , vex->guest_r0 );
290 (*f)(tid, "r1" , vex->guest_r1 );
291 (*f)(tid, "r2" , vex->guest_r2 );
292 (*f)(tid, "r3" , vex->guest_r3 );
293 (*f)(tid, "r4" , vex->guest_r4 );
294 (*f)(tid, "r5" , vex->guest_r5 );
295 (*f)(tid, "r6" , vex->guest_r6 );
296 (*f)(tid, "r7" , vex->guest_r7 );
297 (*f)(tid, "r8" , vex->guest_r8 );
298 (*f)(tid, "r9" , vex->guest_r9 );
299 (*f)(tid, "r10", vex->guest_r10);
300 (*f)(tid, "r11", vex->guest_r11);
301 (*f)(tid, "r12", vex->guest_r12);
302 (*f)(tid, "r13", vex->guest_r13);
303 (*f)(tid, "r14", vex->guest_r14);
304 (*f)(tid, "r15", vex->guest_r15);
305 (*f)(tid, "r16", vex->guest_r16);
306 (*f)(tid, "r17", vex->guest_r17);
307 (*f)(tid, "r18", vex->guest_r18);
308 (*f)(tid, "r19", vex->guest_r19);
309 (*f)(tid, "r20", vex->guest_r20);
310 (*f)(tid, "r21", vex->guest_r21);
311 (*f)(tid, "r22", vex->guest_r22);
312 (*f)(tid, "r23", vex->guest_r23);
313 (*f)(tid, "r24", vex->guest_r24);
314 (*f)(tid, "r25", vex->guest_r25);
315 (*f)(tid, "r26", vex->guest_r26);
316 (*f)(tid, "r27", vex->guest_r27);
317 (*f)(tid, "r28", vex->guest_r28);
318 (*f)(tid, "r29", vex->guest_r29);
319 (*f)(tid, "r30", vex->guest_r30);
320 (*f)(tid, "r31", vex->guest_r31);
321 #elif defined(VGA_arm64)
322 (*f)(tid, "x0" , vex->guest_X0 );
323 (*f)(tid, "x1" , vex->guest_X1 );
324 (*f)(tid, "x2" , vex->guest_X2 );
325 (*f)(tid, "x3" , vex->guest_X3 );
326 (*f)(tid, "x4" , vex->guest_X4 );
327 (*f)(tid, "x5" , vex->guest_X5 );
328 (*f)(tid, "x6" , vex->guest_X6 );
329 (*f)(tid, "x7" , vex->guest_X7 );
330 (*f)(tid, "x8" , vex->guest_X8 );
331 (*f)(tid, "x9" , vex->guest_X9 );
332 (*f)(tid, "x10", vex->guest_X10);
333 (*f)(tid, "x11", vex->guest_X11);
334 (*f)(tid, "x12", vex->guest_X12);
335 (*f)(tid, "x13", vex->guest_X13);
336 (*f)(tid, "x14", vex->guest_X14);
337 (*f)(tid, "x15", vex->guest_X15);
338 (*f)(tid, "x16", vex->guest_X16);
339 (*f)(tid, "x17", vex->guest_X17);
340 (*f)(tid, "x18", vex->guest_X18);
341 (*f)(tid, "x19", vex->guest_X19);
342 (*f)(tid, "x20", vex->guest_X20);
343 (*f)(tid, "x21", vex->guest_X21);
344 (*f)(tid, "x22", vex->guest_X22);
345 (*f)(tid, "x23", vex->guest_X23);
346 (*f)(tid, "x24", vex->guest_X24);
347 (*f)(tid, "x25", vex->guest_X25);
348 (*f)(tid, "x26", vex->guest_X26);
349 (*f)(tid, "x27", vex->guest_X27);
350 (*f)(tid, "x28", vex->guest_X28);
351 (*f)(tid, "x29", vex->guest_X29);
352 (*f)(tid, "x30", vex->guest_X30);
353 #elif defined(VGA_tilegx)
354 (*f)(tid, "r0", vex->guest_r0 );
355 (*f)(tid, "r1", vex->guest_r1 );
356 (*f)(tid, "r2", vex->guest_r2 );
357 (*f)(tid, "r3", vex->guest_r3 );
358 (*f)(tid, "r4", vex->guest_r4 );
359 (*f)(tid, "r5", vex->guest_r5 );
360 (*f)(tid, "r6", vex->guest_r6 );
361 (*f)(tid, "r7", vex->guest_r7 );
362 (*f)(tid, "r8", vex->guest_r8 );
363 (*f)(tid, "r9", vex->guest_r9 );
364 (*f)(tid, "r10", vex->guest_r10);
365 (*f)(tid, "r11", vex->guest_r11);
366 (*f)(tid, "r12", vex->guest_r12);
367 (*f)(tid, "r13", vex->guest_r13);
368 (*f)(tid, "r14", vex->guest_r14);
369 (*f)(tid, "r15", vex->guest_r15);
370 (*f)(tid, "r16", vex->guest_r16);
371 (*f)(tid, "r17", vex->guest_r17);
372 (*f)(tid, "r18", vex->guest_r18);
373 (*f)(tid, "r19", vex->guest_r19);
374 (*f)(tid, "r20", vex->guest_r20);
375 (*f)(tid, "r21", vex->guest_r21);
376 (*f)(tid, "r22", vex->guest_r22);
377 (*f)(tid, "r23", vex->guest_r23);
378 (*f)(tid, "r24", vex->guest_r24);
379 (*f)(tid, "r25", vex->guest_r25);
380 (*f)(tid, "r26", vex->guest_r26);
381 (*f)(tid, "r27", vex->guest_r27);
382 (*f)(tid, "r28", vex->guest_r28);
383 (*f)(tid, "r29", vex->guest_r29);
384 (*f)(tid, "r30", vex->guest_r30);
385 (*f)(tid, "r31", vex->guest_r31);
386 (*f)(tid, "r32", vex->guest_r32);
387 (*f)(tid, "r33", vex->guest_r33);
388 (*f)(tid, "r34", vex->guest_r34);
389 (*f)(tid, "r35", vex->guest_r35);
390 (*f)(tid, "r36", vex->guest_r36);
391 (*f)(tid, "r37", vex->guest_r37);
392 (*f)(tid, "r38", vex->guest_r38);
393 (*f)(tid, "r39", vex->guest_r39);
394 (*f)(tid, "r40", vex->guest_r40);
395 (*f)(tid, "r41", vex->guest_r41);
396 (*f)(tid, "r42", vex->guest_r42);
397 (*f)(tid, "r43", vex->guest_r43);
398 (*f)(tid, "r44", vex->guest_r44);
399 (*f)(tid, "r45", vex->guest_r45);
400 (*f)(tid, "r46", vex->guest_r46);
401 (*f)(tid, "r47", vex->guest_r47);
402 (*f)(tid, "r48", vex->guest_r48);
403 (*f)(tid, "r49", vex->guest_r49);
404 (*f)(tid, "r50", vex->guest_r50);
405 (*f)(tid, "r51", vex->guest_r51);
406 (*f)(tid, "r52", vex->guest_r52);
407 (*f)(tid, "r53", vex->guest_r53);
408 (*f)(tid, "r54", vex->guest_r54);
409 (*f)(tid, "r55", vex->guest_r55);
410 #else
411 # error Unknown arch
412 #endif
413 }
414
415
VG_(apply_to_GP_regs)416 void VG_(apply_to_GP_regs)(void (*f)(ThreadId, const HChar*, UWord))
417 {
418 ThreadId tid;
419
420 for (tid = 1; tid < VG_N_THREADS; tid++) {
421 if (VG_(is_valid_tid)(tid)
422 || VG_(threads)[tid].exitreason == VgSrc_ExitProcess) {
423 // live thread or thread instructed to die by another thread that
424 // called exit.
425 apply_to_GPs_of_tid(tid, f);
426 }
427 }
428 }
429
VG_(thread_stack_reset_iter)430 void VG_(thread_stack_reset_iter)(/*OUT*/ThreadId* tid)
431 {
432 *tid = (ThreadId)(-1);
433 }
434
VG_(thread_stack_next)435 Bool VG_(thread_stack_next)(/*MOD*/ThreadId* tid,
436 /*OUT*/Addr* stack_min,
437 /*OUT*/Addr* stack_max)
438 {
439 ThreadId i;
440 for (i = (*tid)+1; i < VG_N_THREADS; i++) {
441 if (i == VG_INVALID_THREADID)
442 continue;
443 if (VG_(threads)[i].status != VgTs_Empty) {
444 *tid = i;
445 *stack_min = VG_(get_SP)(i);
446 *stack_max = VG_(threads)[i].client_stack_highest_byte;
447 return True;
448 }
449 }
450 return False;
451 }
452
VG_(thread_get_stack_max)453 Addr VG_(thread_get_stack_max)(ThreadId tid)
454 {
455 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
456 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
457 return VG_(threads)[tid].client_stack_highest_byte;
458 }
459
VG_(thread_get_stack_size)460 SizeT VG_(thread_get_stack_size)(ThreadId tid)
461 {
462 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
463 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
464 return VG_(threads)[tid].client_stack_szB;
465 }
466
VG_(thread_get_altstack_min)467 Addr VG_(thread_get_altstack_min)(ThreadId tid)
468 {
469 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
470 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
471 return (Addr)VG_(threads)[tid].altstack.ss_sp;
472 }
473
VG_(thread_get_altstack_size)474 SizeT VG_(thread_get_altstack_size)(ThreadId tid)
475 {
476 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
477 vg_assert(VG_(threads)[tid].status != VgTs_Empty);
478 return VG_(threads)[tid].altstack.ss_size;
479 }
480
481 //-------------------------------------------------------------
482 /* Details about the capabilities of the underlying (host) CPU. These
483 details are acquired by (1) enquiring with the CPU at startup, or
484 (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache
485 line size). It's a bit nasty in the sense that there's no obvious
486 way to stop uses of some of this info before it's ready to go.
487 See pub_core_machine.h for more information about that.
488
489 VG_(machine_get_hwcaps) may use signals (although it attempts to
490 leave signal state unchanged) and therefore should only be
491 called before m_main sets up the client's signal state.
492 */
493
494 /* --------- State --------- */
495 static Bool hwcaps_done = False;
496
497 /* --- all archs --- */
498 static VexArch va = VexArch_INVALID;
499 static VexArchInfo vai;
500
501 #if defined(VGA_x86)
502 UInt VG_(machine_x86_have_mxcsr) = 0;
503 #endif
504 #if defined(VGA_ppc32)
505 UInt VG_(machine_ppc32_has_FP) = 0;
506 UInt VG_(machine_ppc32_has_VMX) = 0;
507 #endif
508 #if defined(VGA_ppc64be) || defined(VGA_ppc64le)
509 ULong VG_(machine_ppc64_has_VMX) = 0;
510 #endif
511 #if defined(VGA_arm)
512 Int VG_(machine_arm_archlevel) = 4;
513 #endif
514
515
516 /* For hwcaps detection on ppc32/64, s390x, and arm we'll need to do SIGILL
517 testing, so we need a VG_MINIMAL_JMP_BUF. */
518 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \
519 || defined(VGA_arm) || defined(VGA_s390x) || defined(VGA_mips32)
520 #include "pub_core_libcsetjmp.h"
521 static VG_MINIMAL_JMP_BUF(env_unsup_insn);
handler_unsup_insn(Int x)522 static void handler_unsup_insn ( Int x ) {
523 VG_MINIMAL_LONGJMP(env_unsup_insn);
524 }
525 #endif
526
527
528 /* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc
529 * handlers are installed. Determines the the sizes affected by dcbz
530 * and dcbzl instructions and updates the given VexArchInfo structure
531 * accordingly.
532 *
533 * Not very defensive: assumes that as long as the dcbz/dcbzl
534 * instructions don't raise a SIGILL, that they will zero an aligned,
535 * contiguous block of memory of a sensible size. */
536 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
find_ppc_dcbz_sz(VexArchInfo * arch_info)537 static void find_ppc_dcbz_sz(VexArchInfo *arch_info)
538 {
539 Int dcbz_szB = 0;
540 Int dcbzl_szB;
541 # define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */
542 char test_block[4*MAX_DCBZL_SZB];
543 char *aligned = test_block;
544 Int i;
545
546 /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */
547 aligned = (char *)(((HWord)aligned + MAX_DCBZL_SZB) & ~(MAX_DCBZL_SZB - 1));
548 vg_assert((aligned + MAX_DCBZL_SZB) <= &test_block[sizeof(test_block)]);
549
550 /* dcbz often clears 32B, although sometimes whatever the native cache
551 * block size is */
552 VG_(memset)(test_block, 0xff, sizeof(test_block));
553 __asm__ __volatile__("dcbz 0,%0"
554 : /*out*/
555 : "r" (aligned) /*in*/
556 : "memory" /*clobber*/);
557 for (dcbz_szB = 0, i = 0; i < sizeof(test_block); ++i) {
558 if (!test_block[i])
559 ++dcbz_szB;
560 }
561 vg_assert(dcbz_szB == 16 || dcbz_szB == 32 || dcbz_szB == 64 || dcbz_szB == 128);
562
563 /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */
564 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
565 dcbzl_szB = 0; /* indicates unsupported */
566 }
567 else {
568 VG_(memset)(test_block, 0xff, sizeof(test_block));
569 /* some older assemblers won't understand the dcbzl instruction
570 * variant, so we directly emit the instruction ourselves */
571 __asm__ __volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/
572 : /*out*/
573 : "r" (aligned) /*in*/
574 : "memory", "r9" /*clobber*/);
575 for (dcbzl_szB = 0, i = 0; i < sizeof(test_block); ++i) {
576 if (!test_block[i])
577 ++dcbzl_szB;
578 }
579 vg_assert(dcbzl_szB == 16 || dcbzl_szB == 32 || dcbzl_szB == 64 || dcbzl_szB == 128);
580 }
581
582 arch_info->ppc_dcbz_szB = dcbz_szB;
583 arch_info->ppc_dcbzl_szB = dcbzl_szB;
584
585 VG_(debugLog)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n",
586 dcbz_szB, dcbzl_szB);
587 # undef MAX_DCBZL_SZB
588 }
589 #endif /* defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) */
590
591 #ifdef VGA_s390x
592
593 /* Read /proc/cpuinfo. Look for lines like these
594
595 processor 0: version = FF, identification = 0117C9, machine = 2064
596
597 and return the machine model. If the machine model could not be determined
598 or it is an unknown model, return VEX_S390X_MODEL_UNKNOWN. */
599
VG_(get_machine_model)600 static UInt VG_(get_machine_model)(void)
601 {
602 static struct model_map {
603 const HChar name[5];
604 UInt id;
605 } model_map[] = {
606 { "2064", VEX_S390X_MODEL_Z900 },
607 { "2066", VEX_S390X_MODEL_Z800 },
608 { "2084", VEX_S390X_MODEL_Z990 },
609 { "2086", VEX_S390X_MODEL_Z890 },
610 { "2094", VEX_S390X_MODEL_Z9_EC },
611 { "2096", VEX_S390X_MODEL_Z9_BC },
612 { "2097", VEX_S390X_MODEL_Z10_EC },
613 { "2098", VEX_S390X_MODEL_Z10_BC },
614 { "2817", VEX_S390X_MODEL_Z196 },
615 { "2818", VEX_S390X_MODEL_Z114 },
616 { "2827", VEX_S390X_MODEL_ZEC12 },
617 { "2828", VEX_S390X_MODEL_ZBC12 },
618 { "2964", VEX_S390X_MODEL_Z13 },
619 };
620
621 Int model, n, fh;
622 SysRes fd;
623 SizeT num_bytes, file_buf_size;
624 HChar *p, *m, *model_name, *file_buf;
625
626 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
627 fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
628 if ( sr_isError(fd) ) return VEX_S390X_MODEL_UNKNOWN;
629
630 fh = sr_Res(fd);
631
632 /* Determine the size of /proc/cpuinfo.
633 Work around broken-ness in /proc file system implementation.
634 fstat returns a zero size for /proc/cpuinfo although it is
635 claimed to be a regular file. */
636 num_bytes = 0;
637 file_buf_size = 1000;
638 file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
639 while (42) {
640 n = VG_(read)(fh, file_buf, file_buf_size);
641 if (n < 0) break;
642
643 num_bytes += n;
644 if (n < file_buf_size) break; /* reached EOF */
645 }
646
647 if (n < 0) num_bytes = 0; /* read error; ignore contents */
648
649 if (num_bytes > file_buf_size) {
650 VG_(free)( file_buf );
651 VG_(lseek)( fh, 0, VKI_SEEK_SET );
652 file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
653 n = VG_(read)( fh, file_buf, num_bytes );
654 if (n < 0) num_bytes = 0;
655 }
656
657 file_buf[num_bytes] = '\0';
658 VG_(close)(fh);
659
660 /* Parse file */
661 model = VEX_S390X_MODEL_UNKNOWN;
662 for (p = file_buf; *p; ++p) {
663 /* Beginning of line */
664 if (VG_(strncmp)( p, "processor", sizeof "processor" - 1 ) != 0) continue;
665
666 m = VG_(strstr)( p, "machine" );
667 if (m == NULL) continue;
668
669 p = m + sizeof "machine" - 1;
670 while ( VG_(isspace)( *p ) || *p == '=') {
671 if (*p == '\n') goto next_line;
672 ++p;
673 }
674
675 model_name = p;
676 for (n = 0; n < sizeof model_map / sizeof model_map[0]; ++n) {
677 struct model_map *mm = model_map + n;
678 SizeT len = VG_(strlen)( mm->name );
679 if ( VG_(strncmp)( mm->name, model_name, len ) == 0 &&
680 VG_(isspace)( model_name[len] )) {
681 if (mm->id < model) model = mm->id;
682 p = model_name + len;
683 break;
684 }
685 }
686 /* Skip until end-of-line */
687 while (*p != '\n')
688 ++p;
689 next_line: ;
690 }
691
692 VG_(free)( file_buf );
693 VG_(debugLog)(1, "machine", "model = %s\n",
694 model == VEX_S390X_MODEL_UNKNOWN ? "UNKNOWN"
695 : model_map[model].name);
696 return model;
697 }
698
699 #endif /* VGA_s390x */
700
701 #if defined(VGA_mips32) || defined(VGA_mips64)
702
703 /* Read /proc/cpuinfo and return the machine model. */
VG_(get_machine_model)704 static UInt VG_(get_machine_model)(void)
705 {
706 const char *search_MIPS_str = "MIPS";
707 const char *search_Broadcom_str = "Broadcom";
708 const char *search_Netlogic_str = "Netlogic";
709 const char *search_Cavium_str= "Cavium";
710 Int n, fh;
711 SysRes fd;
712 SizeT num_bytes, file_buf_size;
713 HChar *file_buf;
714
715 /* Slurp contents of /proc/cpuinfo into FILE_BUF */
716 fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
717 if ( sr_isError(fd) ) return -1;
718
719 fh = sr_Res(fd);
720
721 /* Determine the size of /proc/cpuinfo.
722 Work around broken-ness in /proc file system implementation.
723 fstat returns a zero size for /proc/cpuinfo although it is
724 claimed to be a regular file. */
725 num_bytes = 0;
726 file_buf_size = 1000;
727 file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
728 while (42) {
729 n = VG_(read)(fh, file_buf, file_buf_size);
730 if (n < 0) break;
731
732 num_bytes += n;
733 if (n < file_buf_size) break; /* reached EOF */
734 }
735
736 if (n < 0) num_bytes = 0; /* read error; ignore contents */
737
738 if (num_bytes > file_buf_size) {
739 VG_(free)( file_buf );
740 VG_(lseek)( fh, 0, VKI_SEEK_SET );
741 file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
742 n = VG_(read)( fh, file_buf, num_bytes );
743 if (n < 0) num_bytes = 0;
744 }
745
746 file_buf[num_bytes] = '\0';
747 VG_(close)(fh);
748
749 /* Parse file */
750 if (VG_(strstr) (file_buf, search_Broadcom_str) != NULL)
751 return VEX_PRID_COMP_BROADCOM;
752 if (VG_(strstr) (file_buf, search_Netlogic_str) != NULL)
753 return VEX_PRID_COMP_NETLOGIC;
754 if (VG_(strstr)(file_buf, search_Cavium_str) != NULL)
755 return VEX_PRID_COMP_CAVIUM;
756 if (VG_(strstr) (file_buf, search_MIPS_str) != NULL)
757 return VEX_PRID_COMP_MIPS;
758
759 /* Did not find string in the proc file. */
760 return -1;
761 }
762
763 #endif
764
765 /* Determine what insn set and insn set variant the host has, and
766 record it. To be called once at system startup. Returns False if
767 this a CPU incapable of running Valgrind.
768 Also determine information about the caches on this host. */
769
VG_(machine_get_hwcaps)770 Bool VG_(machine_get_hwcaps)( void )
771 {
772 vg_assert(hwcaps_done == False);
773 hwcaps_done = True;
774
775 // Whack default settings into vai, so that we only need to fill in
776 // any interesting bits.
777 LibVEX_default_VexArchInfo(&vai);
778
779 #if defined(VGA_x86)
780 { Bool have_sse1, have_sse2, have_sse3, have_cx8, have_lzcnt, have_mmxext;
781 UInt eax, ebx, ecx, edx, max_extended;
782 HChar vstr[13];
783 vstr[0] = 0;
784
785 if (!VG_(has_cpuid)())
786 /* we can't do cpuid at all. Give up. */
787 return False;
788
789 VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
790 if (eax < 1)
791 /* we can't ask for cpuid(x) for x > 0. Give up. */
792 return False;
793
794 /* Get processor ID string, and max basic/extended index
795 values. */
796 VG_(memcpy)(&vstr[0], &ebx, 4);
797 VG_(memcpy)(&vstr[4], &edx, 4);
798 VG_(memcpy)(&vstr[8], &ecx, 4);
799 vstr[12] = 0;
800
801 VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
802 max_extended = eax;
803
804 /* get capabilities bits into edx */
805 VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
806
807 have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */
808 have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */
809 have_sse3 = (ecx & (1<<0)) != 0; /* True => have sse3 insns */
810
811 /* cmpxchg8b is a minimum requirement now; if we don't have it we
812 must simply give up. But all CPUs since Pentium-I have it, so
813 that doesn't seem like much of a restriction. */
814 have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
815 if (!have_cx8)
816 return False;
817
818 /* Figure out if this is an AMD that can do MMXEXT. */
819 have_mmxext = False;
820 if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
821 && max_extended >= 0x80000001) {
822 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
823 /* Some older AMD processors support a sse1 subset (Integer SSE). */
824 have_mmxext = !have_sse1 && ((edx & (1<<22)) != 0);
825 }
826
827 /* Figure out if this is an AMD or Intel that can do LZCNT. */
828 have_lzcnt = False;
829 if ((0 == VG_(strcmp)(vstr, "AuthenticAMD")
830 || 0 == VG_(strcmp)(vstr, "GenuineIntel"))
831 && max_extended >= 0x80000001) {
832 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
833 have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
834 }
835
836 /* Intel processors don't define the mmxext extension, but since it
837 is just a sse1 subset always define it when we have sse1. */
838 if (have_sse1)
839 have_mmxext = True;
840
841 va = VexArchX86;
842 vai.endness = VexEndnessLE;
843
844 if (have_sse3 && have_sse2 && have_sse1 && have_mmxext) {
845 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT;
846 vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
847 vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
848 vai.hwcaps |= VEX_HWCAPS_X86_SSE3;
849 if (have_lzcnt)
850 vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
851 VG_(machine_x86_have_mxcsr) = 1;
852 } else if (have_sse2 && have_sse1 && have_mmxext) {
853 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT;
854 vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
855 vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
856 if (have_lzcnt)
857 vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
858 VG_(machine_x86_have_mxcsr) = 1;
859 } else if (have_sse1 && have_mmxext) {
860 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT;
861 vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
862 VG_(machine_x86_have_mxcsr) = 1;
863 } else if (have_mmxext) {
864 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT; /*integer only sse1 subset*/
865 VG_(machine_x86_have_mxcsr) = 0;
866 } else {
867 vai.hwcaps = 0; /*baseline - no sse at all*/
868 VG_(machine_x86_have_mxcsr) = 0;
869 }
870
871 VG_(machine_get_cache_info)(&vai);
872
873 return True;
874 }
875
876 #elif defined(VGA_amd64)
877 { Bool have_sse3, have_cx8, have_cx16;
878 Bool have_lzcnt, have_avx, have_bmi, have_avx2;
879 Bool have_rdtscp;
880 UInt eax, ebx, ecx, edx, max_basic, max_extended;
881 HChar vstr[13];
882 vstr[0] = 0;
883
884 if (!VG_(has_cpuid)())
885 /* we can't do cpuid at all. Give up. */
886 return False;
887
888 VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
889 max_basic = eax;
890 if (max_basic < 1)
891 /* we can't ask for cpuid(x) for x > 0. Give up. */
892 return False;
893
894 /* Get processor ID string, and max basic/extended index
895 values. */
896 VG_(memcpy)(&vstr[0], &ebx, 4);
897 VG_(memcpy)(&vstr[4], &edx, 4);
898 VG_(memcpy)(&vstr[8], &ecx, 4);
899 vstr[12] = 0;
900
901 VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
902 max_extended = eax;
903
904 /* get capabilities bits into edx */
905 VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
906
907 // we assume that SSE1 and SSE2 are available by default
908 have_sse3 = (ecx & (1<<0)) != 0; /* True => have sse3 insns */
909 // ssse3 is ecx:9
910 // sse41 is ecx:19
911 // sse42 is ecx:20
912
913 // osxsave is ecx:27
914 // avx is ecx:28
915 // fma is ecx:12
916 have_avx = False;
917 /* have_fma = False; */
918 if ( (ecx & ((1<<27)|(1<<28))) == ((1<<27)|(1<<28)) ) {
919 /* processor supports AVX instructions and XGETBV is enabled
920 by OS */
921 ULong w;
922 __asm__ __volatile__("movq $0,%%rcx ; "
923 ".byte 0x0F,0x01,0xD0 ; " /* xgetbv */
924 "movq %%rax,%0"
925 :/*OUT*/"=r"(w) :/*IN*/
926 :/*TRASH*/"rdx","rcx");
927 if ((w & 6) == 6) {
928 /* OS has enabled both XMM and YMM state support */
929 have_avx = True;
930 /* have_fma = (ecx & (1<<12)) != 0; */
931 /* have_fma: Probably correct, but gcc complains due to
932 unusedness. &*/
933 }
934 }
935
936 /* cmpxchg8b is a minimum requirement now; if we don't have it we
937 must simply give up. But all CPUs since Pentium-I have it, so
938 that doesn't seem like much of a restriction. */
939 have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
940 if (!have_cx8)
941 return False;
942
943 /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */
944 have_cx16 = (ecx & (1<<13)) != 0; /* True => have cmpxchg16b */
945
946 /* Figure out if this CPU can do LZCNT. */
947 have_lzcnt = False;
948 if (max_extended >= 0x80000001) {
949 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
950 have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
951 }
952
953 /* Can we do RDTSCP? */
954 have_rdtscp = False;
955 if (max_extended >= 0x80000001) {
956 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
957 have_rdtscp = (edx & (1<<27)) != 0; /* True => have RDTSVCP */
958 }
959
960 /* Check for BMI1 and AVX2. If we have AVX1 (plus OS support). */
961 have_bmi = False;
962 have_avx2 = False;
963 if (have_avx && max_basic >= 7) {
964 VG_(cpuid)(7, 0, &eax, &ebx, &ecx, &edx);
965 have_bmi = (ebx & (1<<3)) != 0; /* True => have BMI1 */
966 have_avx2 = (ebx & (1<<5)) != 0; /* True => have AVX2 */
967 }
968
969 va = VexArchAMD64;
970 vai.endness = VexEndnessLE;
971 vai.hwcaps = (have_sse3 ? VEX_HWCAPS_AMD64_SSE3 : 0)
972 | (have_cx16 ? VEX_HWCAPS_AMD64_CX16 : 0)
973 | (have_lzcnt ? VEX_HWCAPS_AMD64_LZCNT : 0)
974 | (have_avx ? VEX_HWCAPS_AMD64_AVX : 0)
975 | (have_bmi ? VEX_HWCAPS_AMD64_BMI : 0)
976 | (have_avx2 ? VEX_HWCAPS_AMD64_AVX2 : 0)
977 | (have_rdtscp ? VEX_HWCAPS_AMD64_RDTSCP : 0);
978
979 VG_(machine_get_cache_info)(&vai);
980
981 return True;
982 }
983
984 #elif defined(VGA_ppc32)
985 {
986 /* Find out which subset of the ppc32 instruction set is supported by
987 verifying whether various ppc32 instructions generate a SIGILL
988 or a SIGFPE. An alternative approach is to check the AT_HWCAP and
989 AT_PLATFORM entries in the ELF auxiliary table -- see also
990 the_iifii.client_auxv in m_main.c.
991 */
992 vki_sigset_t saved_set, tmp_set;
993 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
994 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act;
995
996 volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
997 volatile Bool have_isa_2_07;
998 Int r;
999
1000 /* This is a kludge. Really we ought to back-convert saved_act
1001 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1002 since that's a no-op on all ppc32 platforms so far supported,
1003 it's not worth the typing effort. At least include most basic
1004 sanity check: */
1005 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1006
1007 VG_(sigemptyset)(&tmp_set);
1008 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1009 VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1010
1011 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1012 vg_assert(r == 0);
1013
1014 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1015 vg_assert(r == 0);
1016 tmp_sigill_act = saved_sigill_act;
1017
1018 r = VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1019 vg_assert(r == 0);
1020 tmp_sigfpe_act = saved_sigfpe_act;
1021
1022 /* NODEFER: signal handler does not return (from the kernel's point of
1023 view), hence if it is to successfully catch a signal more than once,
1024 we need the NODEFER flag. */
1025 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1026 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1027 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1028 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1029 r = VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1030 vg_assert(r == 0);
1031
1032 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1033 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1034 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER;
1035 tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1036 r = VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1037 vg_assert(r == 0);
1038
1039 /* standard FP insns */
1040 have_F = True;
1041 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1042 have_F = False;
1043 } else {
1044 __asm__ __volatile__(".long 0xFC000090"); /*fmr 0,0 */
1045 }
1046
1047 /* Altivec insns */
1048 have_V = True;
1049 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1050 have_V = False;
1051 } else {
1052 /* Unfortunately some older assemblers don't speak Altivec (or
1053 choose not to), so to be safe we directly emit the 32-bit
1054 word corresponding to "vor 0,0,0". This fixes a build
1055 problem that happens on Debian 3.1 (ppc32), and probably
1056 various other places. */
1057 __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
1058 }
1059
1060 /* General-Purpose optional (fsqrt, fsqrts) */
1061 have_FX = True;
1062 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1063 have_FX = False;
1064 } else {
1065 __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */
1066 }
1067
1068 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1069 have_GX = True;
1070 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1071 have_GX = False;
1072 } else {
1073 __asm__ __volatile__(".long 0xFC000034"); /* frsqrte 0,0 */
1074 }
1075
1076 /* VSX support implies Power ISA 2.06 */
1077 have_VX = True;
1078 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1079 have_VX = False;
1080 } else {
1081 __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1082 }
1083
1084 /* Check for Decimal Floating Point (DFP) support. */
1085 have_DFP = True;
1086 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1087 have_DFP = False;
1088 } else {
1089 __asm__ __volatile__(".long 0xee4e8005"); /* dadd FRT,FRA, FRB */
1090 }
1091
1092 /* Check for ISA 2.07 support. */
1093 have_isa_2_07 = True;
1094 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1095 have_isa_2_07 = False;
1096 } else {
1097 __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
1098 }
1099
1100 /* determine dcbz/dcbzl sizes while we still have the signal
1101 * handlers registered */
1102 find_ppc_dcbz_sz(&vai);
1103
1104 r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1105 vg_assert(r == 0);
1106 r = VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
1107 vg_assert(r == 0);
1108 r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1109 vg_assert(r == 0);
1110 VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d\n",
1111 (Int)have_F, (Int)have_V, (Int)have_FX,
1112 (Int)have_GX, (Int)have_VX, (Int)have_DFP,
1113 (Int)have_isa_2_07);
1114 /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */
1115 if (have_V && !have_F)
1116 have_V = False;
1117 if (have_FX && !have_F)
1118 have_FX = False;
1119 if (have_GX && !have_F)
1120 have_GX = False;
1121
1122 VG_(machine_ppc32_has_FP) = have_F ? 1 : 0;
1123 VG_(machine_ppc32_has_VMX) = have_V ? 1 : 0;
1124
1125 va = VexArchPPC32;
1126 vai.endness = VexEndnessBE;
1127
1128 vai.hwcaps = 0;
1129 if (have_F) vai.hwcaps |= VEX_HWCAPS_PPC32_F;
1130 if (have_V) vai.hwcaps |= VEX_HWCAPS_PPC32_V;
1131 if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC32_FX;
1132 if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC32_GX;
1133 if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC32_VX;
1134 if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC32_DFP;
1135 if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC32_ISA2_07;
1136
1137 VG_(machine_get_cache_info)(&vai);
1138
1139 /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be
1140 called before we're ready to go. */
1141 return True;
1142 }
1143
1144 #elif defined(VGA_ppc64be)|| defined(VGA_ppc64le)
1145 {
1146 /* Same instruction set detection algorithm as for ppc32. */
1147 vki_sigset_t saved_set, tmp_set;
1148 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1149 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act;
1150
1151 volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
1152 volatile Bool have_isa_2_07;
1153 Int r;
1154
1155 /* This is a kludge. Really we ought to back-convert saved_act
1156 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1157 since that's a no-op on all ppc64 platforms so far supported,
1158 it's not worth the typing effort. At least include most basic
1159 sanity check: */
1160 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1161
1162 VG_(sigemptyset)(&tmp_set);
1163 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1164 VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1165
1166 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1167 vg_assert(r == 0);
1168
1169 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1170 vg_assert(r == 0);
1171 tmp_sigill_act = saved_sigill_act;
1172
1173 VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1174 tmp_sigfpe_act = saved_sigfpe_act;
1175
1176 /* NODEFER: signal handler does not return (from the kernel's point of
1177 view), hence if it is to successfully catch a signal more than once,
1178 we need the NODEFER flag. */
1179 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1180 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1181 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1182 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1183 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1184
1185 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1186 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1187 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER;
1188 tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1189 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1190
1191 /* standard FP insns */
1192 have_F = True;
1193 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1194 have_F = False;
1195 } else {
1196 __asm__ __volatile__("fmr 0,0");
1197 }
1198
1199 /* Altivec insns */
1200 have_V = True;
1201 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1202 have_V = False;
1203 } else {
1204 __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
1205 }
1206
1207 /* General-Purpose optional (fsqrt, fsqrts) */
1208 have_FX = True;
1209 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1210 have_FX = False;
1211 } else {
1212 __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/
1213 }
1214
1215 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1216 have_GX = True;
1217 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1218 have_GX = False;
1219 } else {
1220 __asm__ __volatile__(".long 0xFC000034"); /*frsqrte 0,0*/
1221 }
1222
1223 /* VSX support implies Power ISA 2.06 */
1224 have_VX = True;
1225 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1226 have_VX = False;
1227 } else {
1228 __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1229 }
1230
1231 /* Check for Decimal Floating Point (DFP) support. */
1232 have_DFP = True;
1233 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1234 have_DFP = False;
1235 } else {
1236 __asm__ __volatile__(".long 0xee4e8005"); /* dadd FRT,FRA, FRB */
1237 }
1238
1239 /* Check for ISA 2.07 support. */
1240 have_isa_2_07 = True;
1241 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1242 have_isa_2_07 = False;
1243 } else {
1244 __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
1245 }
1246
1247 /* determine dcbz/dcbzl sizes while we still have the signal
1248 * handlers registered */
1249 find_ppc_dcbz_sz(&vai);
1250
1251 VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1252 VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
1253 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1254 VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d\n",
1255 (Int)have_F, (Int)have_V, (Int)have_FX,
1256 (Int)have_GX, (Int)have_VX, (Int)have_DFP,
1257 (Int)have_isa_2_07);
1258 /* on ppc64be, if we don't even have FP, just give up. */
1259 if (!have_F)
1260 return False;
1261
1262 VG_(machine_ppc64_has_VMX) = have_V ? 1 : 0;
1263
1264 va = VexArchPPC64;
1265 # if defined(VKI_LITTLE_ENDIAN)
1266 vai.endness = VexEndnessLE;
1267 # elif defined(VKI_BIG_ENDIAN)
1268 vai.endness = VexEndnessBE;
1269 # else
1270 vai.endness = VexEndness_INVALID;
1271 # endif
1272
1273 vai.hwcaps = 0;
1274 if (have_V) vai.hwcaps |= VEX_HWCAPS_PPC64_V;
1275 if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC64_FX;
1276 if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC64_GX;
1277 if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC64_VX;
1278 if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC64_DFP;
1279 if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC64_ISA2_07;
1280
1281 VG_(machine_get_cache_info)(&vai);
1282
1283 /* But we're not done yet: VG_(machine_ppc64_set_clszB) must be
1284 called before we're ready to go. */
1285 return True;
1286 }
1287
1288 #elif defined(VGA_s390x)
1289
1290 # include "libvex_s390x_common.h"
1291
1292 {
1293 /* Instruction set detection code borrowed from ppc above. */
1294 vki_sigset_t saved_set, tmp_set;
1295 vki_sigaction_fromK_t saved_sigill_act;
1296 vki_sigaction_toK_t tmp_sigill_act;
1297
1298 volatile Bool have_LDISP, have_STFLE;
1299 Int i, r, model;
1300
1301 /* If the model is "unknown" don't treat this as an error. Assume
1302 this is a brand-new machine model for which we don't have the
1303 identification yet. Keeping fingers crossed. */
1304 model = VG_(get_machine_model)();
1305
1306 /* Unblock SIGILL and stash away the old action for that signal */
1307 VG_(sigemptyset)(&tmp_set);
1308 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1309
1310 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1311 vg_assert(r == 0);
1312
1313 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1314 vg_assert(r == 0);
1315 tmp_sigill_act = saved_sigill_act;
1316
1317 /* NODEFER: signal handler does not return (from the kernel's point of
1318 view), hence if it is to successfully catch a signal more than once,
1319 we need the NODEFER flag. */
1320 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1321 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1322 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1323 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1324 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1325
1326 /* Determine hwcaps. Note, we cannot use the stfle insn because it
1327 is not supported on z900. */
1328
1329 have_LDISP = True;
1330 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1331 have_LDISP = False;
1332 } else {
1333 /* BASR loads the address of the next insn into r1. Needed to avoid
1334 a segfault in XY. */
1335 __asm__ __volatile__("basr %%r1,%%r0\n\t"
1336 ".long 0xe3001000\n\t" /* XY 0,0(%r1) */
1337 ".short 0x0057" : : : "r0", "r1", "cc", "memory");
1338 }
1339
1340 /* Check availability of STFLE. If available store facility bits
1341 in hoststfle. */
1342 ULong hoststfle[S390_NUM_FACILITY_DW];
1343
1344 for (i = 0; i < S390_NUM_FACILITY_DW; ++i)
1345 hoststfle[i] = 0;
1346
1347 have_STFLE = True;
1348 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1349 have_STFLE = False;
1350 } else {
1351 register ULong reg0 asm("0") = S390_NUM_FACILITY_DW - 1;
1352
1353 __asm__ __volatile__(" .insn s,0xb2b00000,%0\n" /* stfle */
1354 : "=m" (hoststfle), "+d"(reg0)
1355 : : "cc", "memory");
1356 }
1357
1358 /* Restore signals */
1359 r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1360 vg_assert(r == 0);
1361 r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1362 vg_assert(r == 0);
1363 va = VexArchS390X;
1364 vai.endness = VexEndnessBE;
1365
1366 vai.hwcaps = model;
1367 if (have_STFLE) vai.hwcaps |= VEX_HWCAPS_S390X_STFLE;
1368 if (have_LDISP) {
1369 /* Use long displacement only on machines >= z990. For all other
1370 machines it is millicoded and therefore slow. */
1371 if (model >= VEX_S390X_MODEL_Z990)
1372 vai.hwcaps |= VEX_HWCAPS_S390X_LDISP;
1373 }
1374
1375 /* Detect presence of certain facilities using the STFLE insn.
1376 Note, that these facilities were introduced at the same time or later
1377 as STFLE, so the absence of STLFE implies the absence of the facility
1378 we're trying to detect. */
1379 struct fac_hwcaps_map {
1380 UInt installed;
1381 UInt facility_bit;
1382 UInt hwcaps_bit;
1383 const HChar name[6]; // may need adjustment for new facility names
1384 } fac_hwcaps[] = {
1385 { False, S390_FAC_EIMM, VEX_HWCAPS_S390X_EIMM, "EIMM" },
1386 { False, S390_FAC_GIE, VEX_HWCAPS_S390X_GIE, "GIE" },
1387 { False, S390_FAC_DFP, VEX_HWCAPS_S390X_DFP, "DFP" },
1388 { False, S390_FAC_FPSE, VEX_HWCAPS_S390X_FGX, "FGX" },
1389 { False, S390_FAC_ETF2, VEX_HWCAPS_S390X_ETF2, "ETF2" },
1390 { False, S390_FAC_ETF3, VEX_HWCAPS_S390X_ETF3, "ETF3" },
1391 { False, S390_FAC_STCKF, VEX_HWCAPS_S390X_STCKF, "STCKF" },
1392 { False, S390_FAC_FPEXT, VEX_HWCAPS_S390X_FPEXT, "FPEXT" },
1393 { False, S390_FAC_LSC, VEX_HWCAPS_S390X_LSC, "LSC" },
1394 { False, S390_FAC_PFPO, VEX_HWCAPS_S390X_PFPO, "PFPO" },
1395 };
1396
1397 /* Set hwcaps according to the detected facilities */
1398 for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) {
1399 vg_assert(fac_hwcaps[i].facility_bit <= 63); // for now
1400 if (hoststfle[0] & (1ULL << (63 - fac_hwcaps[i].facility_bit))) {
1401 fac_hwcaps[i].installed = True;
1402 vai.hwcaps |= fac_hwcaps[i].hwcaps_bit;
1403 }
1404 }
1405
1406 /* Build up a string showing the probed-for facilities */
1407 HChar fac_str[(sizeof fac_hwcaps / sizeof fac_hwcaps[0]) *
1408 (sizeof fac_hwcaps[0].name + 3) + // %s %d
1409 7 + 1 + 4 + 2 // machine %4d
1410 + 1]; // \0
1411 HChar *p = fac_str;
1412 p += VG_(sprintf)(p, "machine %4d ", model);
1413 for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) {
1414 p += VG_(sprintf)(p, " %s %1d", fac_hwcaps[i].name,
1415 fac_hwcaps[i].installed);
1416 }
1417 *p++ = '\0';
1418
1419 VG_(debugLog)(1, "machine", "%s\n", fac_str);
1420 VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
1421
1422 VG_(machine_get_cache_info)(&vai);
1423
1424 return True;
1425 }
1426
1427 #elif defined(VGA_arm)
1428 {
1429 /* Same instruction set detection algorithm as for ppc32. */
1430 vki_sigset_t saved_set, tmp_set;
1431 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1432 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act;
1433
1434 volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON;
1435 volatile Int archlevel;
1436 Int r;
1437
1438 /* This is a kludge. Really we ought to back-convert saved_act
1439 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1440 since that's a no-op on all ppc64 platforms so far supported,
1441 it's not worth the typing effort. At least include most basic
1442 sanity check: */
1443 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1444
1445 VG_(sigemptyset)(&tmp_set);
1446 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1447 VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1448
1449 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1450 vg_assert(r == 0);
1451
1452 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1453 vg_assert(r == 0);
1454 tmp_sigill_act = saved_sigill_act;
1455
1456 VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1457 tmp_sigfpe_act = saved_sigfpe_act;
1458
1459 /* NODEFER: signal handler does not return (from the kernel's point of
1460 view), hence if it is to successfully catch a signal more than once,
1461 we need the NODEFER flag. */
1462 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1463 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1464 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1465 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1466 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1467
1468 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1469 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1470 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER;
1471 tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1472 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1473
1474 /* VFP insns */
1475 have_VFP = True;
1476 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1477 have_VFP = False;
1478 } else {
1479 __asm__ __volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */
1480 }
1481 /* There are several generation of VFP extension but they differs very
1482 little so for now we will not distinguish them. */
1483 have_VFP2 = have_VFP;
1484 have_VFP3 = have_VFP;
1485
1486 /* NEON insns */
1487 have_NEON = True;
1488 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1489 have_NEON = False;
1490 } else {
1491 __asm__ __volatile__(".word 0xF2244154"); /* VMOV q2, q2 */
1492 }
1493
1494 /* ARM architecture level */
1495 archlevel = 5; /* v5 will be base level */
1496 if (archlevel < 7) {
1497 archlevel = 7;
1498 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1499 archlevel = 5;
1500 } else {
1501 __asm__ __volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */
1502 }
1503 }
1504 if (archlevel < 6) {
1505 archlevel = 6;
1506 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1507 archlevel = 5;
1508 } else {
1509 __asm__ __volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */
1510 }
1511 }
1512
1513 VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
1514 VG_(convert_sigaction_fromK_to_toK)(&saved_sigfpe_act, &tmp_sigfpe_act);
1515 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1516 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1517 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1518
1519 VG_(debugLog)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n",
1520 archlevel, (Int)have_VFP, (Int)have_VFP2, (Int)have_VFP3,
1521 (Int)have_NEON);
1522
1523 VG_(machine_arm_archlevel) = archlevel;
1524
1525 va = VexArchARM;
1526 vai.endness = VexEndnessLE;
1527
1528 vai.hwcaps = VEX_ARM_ARCHLEVEL(archlevel);
1529 if (have_VFP3) vai.hwcaps |= VEX_HWCAPS_ARM_VFP3;
1530 if (have_VFP2) vai.hwcaps |= VEX_HWCAPS_ARM_VFP2;
1531 if (have_VFP) vai.hwcaps |= VEX_HWCAPS_ARM_VFP;
1532 if (have_NEON) vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1533
1534 VG_(machine_get_cache_info)(&vai);
1535
1536 return True;
1537 }
1538
1539 #elif defined(VGA_arm64)
1540 {
1541 va = VexArchARM64;
1542 vai.endness = VexEndnessLE;
1543
1544 /* So far there are no variants. */
1545 vai.hwcaps = 0;
1546
1547 VG_(machine_get_cache_info)(&vai);
1548
1549 /* 0 denotes 'not set'. The range of legitimate values here,
1550 after being set that is, is 2 though 17 inclusive. */
1551 vg_assert(vai.arm64_dMinLine_lg2_szB == 0);
1552 vg_assert(vai.arm64_iMinLine_lg2_szB == 0);
1553 ULong ctr_el0;
1554 __asm__ __volatile__("mrs %0, ctr_el0" : "=r"(ctr_el0));
1555 vai.arm64_dMinLine_lg2_szB = ((ctr_el0 >> 16) & 0xF) + 2;
1556 vai.arm64_iMinLine_lg2_szB = ((ctr_el0 >> 0) & 0xF) + 2;
1557 VG_(debugLog)(1, "machine", "ARM64: ctr_el0.dMinLine_szB = %d, "
1558 "ctr_el0.iMinLine_szB = %d\n",
1559 1 << vai.arm64_dMinLine_lg2_szB,
1560 1 << vai.arm64_iMinLine_lg2_szB);
1561
1562 return True;
1563 }
1564
1565 #elif defined(VGA_mips32)
1566 {
1567 /* Define the position of F64 bit in FIR register. */
1568 # define FP64 22
1569 va = VexArchMIPS32;
1570 UInt model = VG_(get_machine_model)();
1571 if (model == -1)
1572 return False;
1573
1574 vai.hwcaps = model;
1575
1576 # if defined(VKI_LITTLE_ENDIAN)
1577 vai.endness = VexEndnessLE;
1578 # elif defined(VKI_BIG_ENDIAN)
1579 vai.endness = VexEndnessBE;
1580 # else
1581 vai.endness = VexEndness_INVALID;
1582 # endif
1583
1584 /* Same instruction set detection algorithm as for ppc32/arm... */
1585 vki_sigset_t saved_set, tmp_set;
1586 vki_sigaction_fromK_t saved_sigill_act;
1587 vki_sigaction_toK_t tmp_sigill_act;
1588
1589 volatile Bool have_DSP, have_DSPr2;
1590 Int r;
1591
1592 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1593
1594 VG_(sigemptyset)(&tmp_set);
1595 VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1596
1597 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1598 vg_assert(r == 0);
1599
1600 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1601 vg_assert(r == 0);
1602 tmp_sigill_act = saved_sigill_act;
1603
1604 /* NODEFER: signal handler does not return (from the kernel's point of
1605 view), hence if it is to successfully catch a signal more than once,
1606 we need the NODEFER flag. */
1607 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1608 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1609 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER;
1610 tmp_sigill_act.ksa_handler = handler_unsup_insn;
1611 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1612
1613 if (model == VEX_PRID_COMP_MIPS) {
1614 /* DSPr2 instructions. */
1615 have_DSPr2 = True;
1616 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1617 have_DSPr2 = False;
1618 } else {
1619 __asm__ __volatile__(".word 0x7d095351"); /* precr.qb.ph t2, t0, t1 */
1620 }
1621 if (have_DSPr2) {
1622 /* We assume it's 74K, since it can run DSPr2. */
1623 vai.hwcaps |= VEX_PRID_IMP_74K;
1624 } else {
1625 /* DSP instructions. */
1626 have_DSP = True;
1627 if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1628 have_DSP = False;
1629 } else {
1630 __asm__ __volatile__(".word 0x7c3f44b8"); /* rddsp t0, 0x3f */
1631 }
1632 if (have_DSP) {
1633 /* We assume it's 34K, since it has support for DSP. */
1634 vai.hwcaps |= VEX_PRID_IMP_34K;
1635 }
1636 }
1637 }
1638
1639 /* Check if CPU has FPU and 32 dbl. prec. FP registers */
1640 int FIR = 0;
1641 __asm__ __volatile__(
1642 "cfc1 %0, $0" "\n\t"
1643 : "=r" (FIR)
1644 );
1645 if (FIR & (1 << FP64)) {
1646 vai.hwcaps |= VEX_PRID_CPU_32FPR;
1647 }
1648
1649 VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
1650 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1651 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1652
1653 VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
1654 VG_(machine_get_cache_info)(&vai);
1655
1656 return True;
1657 }
1658
1659 #elif defined(VGA_mips64)
1660 {
1661 va = VexArchMIPS64;
1662 UInt model = VG_(get_machine_model)();
1663 if (model == -1)
1664 return False;
1665
1666 vai.hwcaps = model;
1667
1668 # if defined(VKI_LITTLE_ENDIAN)
1669 vai.endness = VexEndnessLE;
1670 # elif defined(VKI_BIG_ENDIAN)
1671 vai.endness = VexEndnessBE;
1672 # else
1673 vai.endness = VexEndness_INVALID;
1674 # endif
1675
1676 VG_(machine_get_cache_info)(&vai);
1677
1678 return True;
1679 }
1680
1681 #elif defined(VGA_tilegx)
1682 {
1683 va = VexArchTILEGX;
1684 vai.hwcaps = VEX_HWCAPS_TILEGX_BASE;
1685 vai.endness = VexEndnessLE;
1686
1687 VG_(machine_get_cache_info)(&vai);
1688
1689 return True;
1690 }
1691
1692 #else
1693 # error "Unknown arch"
1694 #endif
1695 }
1696
1697 /* Notify host cpu instruction cache line size. */
1698 #if defined(VGA_ppc32)
VG_(machine_ppc32_set_clszB)1699 void VG_(machine_ppc32_set_clszB)( Int szB )
1700 {
1701 vg_assert(hwcaps_done);
1702
1703 /* Either the value must not have been set yet (zero) or we can
1704 tolerate it being set to the same value multiple times, as the
1705 stack scanning logic in m_main is a bit stupid. */
1706 vg_assert(vai.ppc_icache_line_szB == 0
1707 || vai.ppc_icache_line_szB == szB);
1708
1709 vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128);
1710 vai.ppc_icache_line_szB = szB;
1711 }
1712 #endif
1713
1714
1715 /* Notify host cpu instruction cache line size. */
1716 #if defined(VGA_ppc64be)|| defined(VGA_ppc64le)
VG_(machine_ppc64_set_clszB)1717 void VG_(machine_ppc64_set_clszB)( Int szB )
1718 {
1719 vg_assert(hwcaps_done);
1720
1721 /* Either the value must not have been set yet (zero) or we can
1722 tolerate it being set to the same value multiple times, as the
1723 stack scanning logic in m_main is a bit stupid. */
1724 vg_assert(vai.ppc_icache_line_szB == 0
1725 || vai.ppc_icache_line_szB == szB);
1726
1727 vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128);
1728 vai.ppc_icache_line_szB = szB;
1729 }
1730 #endif
1731
1732
1733 /* Notify host's ability to handle NEON instructions. */
1734 #if defined(VGA_arm)
VG_(machine_arm_set_has_NEON)1735 void VG_(machine_arm_set_has_NEON)( Bool has_neon )
1736 {
1737 vg_assert(hwcaps_done);
1738 /* There's nothing else we can sanity check. */
1739
1740 if (has_neon) {
1741 vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1742 } else {
1743 vai.hwcaps &= ~VEX_HWCAPS_ARM_NEON;
1744 }
1745 }
1746 #endif
1747
1748
1749 /* Fetch host cpu info, once established. */
VG_(machine_get_VexArchInfo)1750 void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa,
1751 /*OUT*/VexArchInfo* pVai )
1752 {
1753 vg_assert(hwcaps_done);
1754 if (pVa) *pVa = va;
1755 if (pVai) *pVai = vai;
1756 }
1757
1758
1759 /* Returns the size of the largest guest register that we will
1760 simulate in this run. This depends on both the guest architecture
1761 and on the specific capabilities we are simulating for that guest
1762 (eg, AVX or non-AVX ?, for amd64). Should return either 4, 8, 16
1763 or 32. General rule: if in doubt, return a value larger than
1764 reality.
1765
1766 This information is needed by Cachegrind and Callgrind to decide
1767 what the minimum cache line size they are prepared to simulate is.
1768 Basically require that the minimum cache line size is at least as
1769 large as the largest register that might get transferred to/from
1770 memory, so as to guarantee that any such transaction can straddle
1771 at most 2 cache lines.
1772 */
VG_(machine_get_size_of_largest_guest_register)1773 Int VG_(machine_get_size_of_largest_guest_register) ( void )
1774 {
1775 vg_assert(hwcaps_done);
1776 /* Once hwcaps_done is True, we can fish around inside va/vai to
1777 find the information we need. */
1778
1779 # if defined(VGA_x86)
1780 vg_assert(va == VexArchX86);
1781 /* We don't support AVX, so 32 is out. At the other end, even if
1782 we don't support any SSE, the X87 can generate 10 byte
1783 transfers, so let's say 16 to be on the safe side. Hence the
1784 answer is always 16. */
1785 return 16;
1786
1787 # elif defined(VGA_amd64)
1788 /* if AVX then 32 else 16 */
1789 return (vai.hwcaps & VEX_HWCAPS_AMD64_AVX) ? 32 : 16;
1790
1791 # elif defined(VGA_ppc32)
1792 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
1793 if (vai.hwcaps & VEX_HWCAPS_PPC32_V) return 16;
1794 if (vai.hwcaps & VEX_HWCAPS_PPC32_VX) return 16;
1795 if (vai.hwcaps & VEX_HWCAPS_PPC32_DFP) return 16;
1796 return 8;
1797
1798 # elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
1799 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
1800 if (vai.hwcaps & VEX_HWCAPS_PPC64_V) return 16;
1801 if (vai.hwcaps & VEX_HWCAPS_PPC64_VX) return 16;
1802 if (vai.hwcaps & VEX_HWCAPS_PPC64_DFP) return 16;
1803 return 8;
1804
1805 # elif defined(VGA_s390x)
1806 return 8;
1807
1808 # elif defined(VGA_arm)
1809 /* Really it depends whether or not we have NEON, but let's just
1810 assume we always do. */
1811 return 16;
1812
1813 # elif defined(VGA_arm64)
1814 /* ARM64 always has Neon, AFAICS. */
1815 return 16;
1816
1817 # elif defined(VGA_mips32)
1818 /* The guest state implies 4, but that can't really be true, can
1819 it? */
1820 return 8;
1821
1822 # elif defined(VGA_mips64)
1823 return 8;
1824
1825 # elif defined(VGA_tilegx)
1826 return 8;
1827
1828 # else
1829 # error "Unknown arch"
1830 # endif
1831 }
1832
1833
1834 // Given a pointer to a function as obtained by "& functionname" in C,
1835 // produce a pointer to the actual entry point for the function.
VG_(fnptr_to_fnentry)1836 void* VG_(fnptr_to_fnentry)( void* f )
1837 {
1838 # if defined(VGP_x86_linux) || defined(VGP_amd64_linux) \
1839 || defined(VGP_arm_linux) || defined(VGO_darwin) \
1840 || defined(VGP_ppc32_linux) || defined(VGP_ppc64le_linux) \
1841 || defined(VGP_s390x_linux) || defined(VGP_mips32_linux) \
1842 || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \
1843 || defined(VGP_tilegx_linux)
1844 return f;
1845 # elif defined(VGP_ppc64be_linux)
1846 /* ppc64-linux uses the AIX scheme, in which f is a pointer to a
1847 3-word function descriptor, of which the first word is the entry
1848 address. */
1849 UWord* descr = (UWord*)f;
1850 return (void*)(descr[0]);
1851 # else
1852 # error "Unknown platform"
1853 # endif
1854 }
1855
1856 /*--------------------------------------------------------------------*/
1857 /*--- end ---*/
1858 /*--------------------------------------------------------------------*/
1859