1 /*--------------------------------------------------------------------*/
2 /*--- Machine-related stuff.                           m_machine.c ---*/
3 /*--------------------------------------------------------------------*/
4 
5 /*
6    This file is part of Valgrind, a dynamic binary instrumentation
7    framework.
8 
9    Copyright (C) 2000-2013 Julian Seward
10       jseward@acm.org
11 
12    This program is free software; you can redistribute it and/or
13    modify it under the terms of the GNU General Public License as
14    published by the Free Software Foundation; either version 2 of the
15    License, or (at your option) any later version.
16 
17    This program is distributed in the hope that it will be useful, but
18    WITHOUT ANY WARRANTY; without even the implied warranty of
19    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20    General Public License for more details.
21 
22    You should have received a copy of the GNU General Public License
23    along with this program; if not, write to the Free Software
24    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25    02111-1307, USA.
26 
27    The GNU General Public License is contained in the file COPYING.
28 */
29 
30 #include "pub_core_basics.h"
31 #include "pub_core_vki.h"
32 #include "pub_core_threadstate.h"
33 #include "pub_core_libcassert.h"
34 #include "pub_core_libcbase.h"
35 #include "pub_core_libcfile.h"
36 #include "pub_core_libcprint.h"
37 #include "pub_core_mallocfree.h"
38 #include "pub_core_machine.h"
39 #include "pub_core_cpuid.h"
40 #include "pub_core_libcsignal.h"   // for ppc32 messing with SIGILL and SIGFPE
41 #include "pub_core_debuglog.h"
42 
43 
44 #define INSTR_PTR(regs)    ((regs).vex.VG_INSTR_PTR)
45 #define STACK_PTR(regs)    ((regs).vex.VG_STACK_PTR)
46 #define FRAME_PTR(regs)    ((regs).vex.VG_FRAME_PTR)
47 
VG_(get_IP)48 Addr VG_(get_IP) ( ThreadId tid ) {
49    return INSTR_PTR( VG_(threads)[tid].arch );
50 }
VG_(get_SP)51 Addr VG_(get_SP) ( ThreadId tid ) {
52    return STACK_PTR( VG_(threads)[tid].arch );
53 }
VG_(get_FP)54 Addr VG_(get_FP) ( ThreadId tid ) {
55    return FRAME_PTR( VG_(threads)[tid].arch );
56 }
57 
VG_(set_IP)58 void VG_(set_IP) ( ThreadId tid, Addr ip ) {
59    INSTR_PTR( VG_(threads)[tid].arch ) = ip;
60 }
VG_(set_SP)61 void VG_(set_SP) ( ThreadId tid, Addr sp ) {
62    STACK_PTR( VG_(threads)[tid].arch ) = sp;
63 }
64 
VG_(get_UnwindStartRegs)65 void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs,
66                                 ThreadId tid )
67 {
68 #  if defined(VGA_x86)
69    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_EIP;
70    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_ESP;
71    regs->misc.X86.r_ebp
72       = VG_(threads)[tid].arch.vex.guest_EBP;
73 #  elif defined(VGA_amd64)
74    regs->r_pc = VG_(threads)[tid].arch.vex.guest_RIP;
75    regs->r_sp = VG_(threads)[tid].arch.vex.guest_RSP;
76    regs->misc.AMD64.r_rbp
77       = VG_(threads)[tid].arch.vex.guest_RBP;
78 #  elif defined(VGA_ppc32)
79    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_CIA;
80    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_GPR1;
81    regs->misc.PPC32.r_lr
82       = VG_(threads)[tid].arch.vex.guest_LR;
83 #  elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
84    regs->r_pc = VG_(threads)[tid].arch.vex.guest_CIA;
85    regs->r_sp = VG_(threads)[tid].arch.vex.guest_GPR1;
86    regs->misc.PPC64.r_lr
87       = VG_(threads)[tid].arch.vex.guest_LR;
88 #  elif defined(VGA_arm)
89    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_R15T;
90    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_R13;
91    regs->misc.ARM.r14
92       = VG_(threads)[tid].arch.vex.guest_R14;
93    regs->misc.ARM.r12
94       = VG_(threads)[tid].arch.vex.guest_R12;
95    regs->misc.ARM.r11
96       = VG_(threads)[tid].arch.vex.guest_R11;
97    regs->misc.ARM.r7
98       = VG_(threads)[tid].arch.vex.guest_R7;
99 #  elif defined(VGA_arm64)
100    regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
101    regs->r_sp = VG_(threads)[tid].arch.vex.guest_XSP;
102    regs->misc.ARM64.x29 = VG_(threads)[tid].arch.vex.guest_X29;
103    regs->misc.ARM64.x30 = VG_(threads)[tid].arch.vex.guest_X30;
104 #  elif defined(VGA_s390x)
105    regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_IA;
106    regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_SP;
107    regs->misc.S390X.r_fp
108       = VG_(threads)[tid].arch.vex.guest_FP;
109    regs->misc.S390X.r_lr
110       = VG_(threads)[tid].arch.vex.guest_LR;
111 #  elif defined(VGA_mips32)
112    regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
113    regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
114    regs->misc.MIPS32.r30
115       = VG_(threads)[tid].arch.vex.guest_r30;
116    regs->misc.MIPS32.r31
117       = VG_(threads)[tid].arch.vex.guest_r31;
118    regs->misc.MIPS32.r28
119       = VG_(threads)[tid].arch.vex.guest_r28;
120 #  elif defined(VGA_mips64)
121    regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
122    regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
123    regs->misc.MIPS64.r30
124       = VG_(threads)[tid].arch.vex.guest_r30;
125    regs->misc.MIPS64.r31
126       = VG_(threads)[tid].arch.vex.guest_r31;
127    regs->misc.MIPS64.r28
128       = VG_(threads)[tid].arch.vex.guest_r28;
129 #  elif defined(VGA_tilegx)
130    regs->r_pc = VG_(threads)[tid].arch.vex.guest_pc;
131    regs->r_sp = VG_(threads)[tid].arch.vex.guest_r54;
132    regs->misc.TILEGX.r52
133       = VG_(threads)[tid].arch.vex.guest_r52;
134    regs->misc.TILEGX.r55
135       = VG_(threads)[tid].arch.vex.guest_r55;
136 #  else
137 #    error "Unknown arch"
138 #  endif
139 }
140 
141 void
VG_(get_shadow_regs_area)142 VG_(get_shadow_regs_area) ( ThreadId tid,
143                             /*DST*/UChar* dst,
144                             /*SRC*/Int shadowNo, PtrdiffT offset, SizeT size )
145 {
146    void*        src;
147    ThreadState* tst;
148    vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
149    vg_assert(VG_(is_valid_tid)(tid));
150    // Bounds check
151    vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
152    vg_assert(offset + size <= sizeof(VexGuestArchState));
153    // Copy
154    tst = & VG_(threads)[tid];
155    src = NULL;
156    switch (shadowNo) {
157       case 0: src = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
158       case 1: src = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
159       case 2: src = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
160    }
161    vg_assert(src != NULL);
162    VG_(memcpy)( dst, src, size);
163 }
164 
165 void
VG_(set_shadow_regs_area)166 VG_(set_shadow_regs_area) ( ThreadId tid,
167                             /*DST*/Int shadowNo, PtrdiffT offset, SizeT size,
168                             /*SRC*/const UChar* src )
169 {
170    void*        dst;
171    ThreadState* tst;
172    vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
173    vg_assert(VG_(is_valid_tid)(tid));
174    // Bounds check
175    vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
176    vg_assert(offset + size <= sizeof(VexGuestArchState));
177    // Copy
178    tst = & VG_(threads)[tid];
179    dst = NULL;
180    switch (shadowNo) {
181       case 0: dst = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
182       case 1: dst = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
183       case 2: dst = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
184    }
185    vg_assert(dst != NULL);
186    VG_(memcpy)( dst, src, size);
187 }
188 
189 
apply_to_GPs_of_tid(ThreadId tid,void (* f)(ThreadId,const HChar *,Addr))190 static void apply_to_GPs_of_tid(ThreadId tid, void (*f)(ThreadId,
191                                                         const HChar*, Addr))
192 {
193    VexGuestArchState* vex = &(VG_(get_ThreadState)(tid)->arch.vex);
194    VG_(debugLog)(2, "machine", "apply_to_GPs_of_tid %d\n", tid);
195 #if defined(VGA_x86)
196    (*f)(tid, "EAX", vex->guest_EAX);
197    (*f)(tid, "ECX", vex->guest_ECX);
198    (*f)(tid, "EDX", vex->guest_EDX);
199    (*f)(tid, "EBX", vex->guest_EBX);
200    (*f)(tid, "ESI", vex->guest_ESI);
201    (*f)(tid, "EDI", vex->guest_EDI);
202    (*f)(tid, "ESP", vex->guest_ESP);
203    (*f)(tid, "EBP", vex->guest_EBP);
204 #elif defined(VGA_amd64)
205    (*f)(tid, "RAX", vex->guest_RAX);
206    (*f)(tid, "RCX", vex->guest_RCX);
207    (*f)(tid, "RDX", vex->guest_RDX);
208    (*f)(tid, "RBX", vex->guest_RBX);
209    (*f)(tid, "RSI", vex->guest_RSI);
210    (*f)(tid, "RDI", vex->guest_RDI);
211    (*f)(tid, "RSP", vex->guest_RSP);
212    (*f)(tid, "RBP", vex->guest_RBP);
213    (*f)(tid, "R8" , vex->guest_R8 );
214    (*f)(tid, "R9" , vex->guest_R9 );
215    (*f)(tid, "R10", vex->guest_R10);
216    (*f)(tid, "R11", vex->guest_R11);
217    (*f)(tid, "R12", vex->guest_R12);
218    (*f)(tid, "R13", vex->guest_R13);
219    (*f)(tid, "R14", vex->guest_R14);
220    (*f)(tid, "R15", vex->guest_R15);
221 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
222    (*f)(tid, "GPR0" , vex->guest_GPR0 );
223    (*f)(tid, "GPR1" , vex->guest_GPR1 );
224    (*f)(tid, "GPR2" , vex->guest_GPR2 );
225    (*f)(tid, "GPR3" , vex->guest_GPR3 );
226    (*f)(tid, "GPR4" , vex->guest_GPR4 );
227    (*f)(tid, "GPR5" , vex->guest_GPR5 );
228    (*f)(tid, "GPR6" , vex->guest_GPR6 );
229    (*f)(tid, "GPR7" , vex->guest_GPR7 );
230    (*f)(tid, "GPR8" , vex->guest_GPR8 );
231    (*f)(tid, "GPR9" , vex->guest_GPR9 );
232    (*f)(tid, "GPR10", vex->guest_GPR10);
233    (*f)(tid, "GPR11", vex->guest_GPR11);
234    (*f)(tid, "GPR12", vex->guest_GPR12);
235    (*f)(tid, "GPR13", vex->guest_GPR13);
236    (*f)(tid, "GPR14", vex->guest_GPR14);
237    (*f)(tid, "GPR15", vex->guest_GPR15);
238    (*f)(tid, "GPR16", vex->guest_GPR16);
239    (*f)(tid, "GPR17", vex->guest_GPR17);
240    (*f)(tid, "GPR18", vex->guest_GPR18);
241    (*f)(tid, "GPR19", vex->guest_GPR19);
242    (*f)(tid, "GPR20", vex->guest_GPR20);
243    (*f)(tid, "GPR21", vex->guest_GPR21);
244    (*f)(tid, "GPR22", vex->guest_GPR22);
245    (*f)(tid, "GPR23", vex->guest_GPR23);
246    (*f)(tid, "GPR24", vex->guest_GPR24);
247    (*f)(tid, "GPR25", vex->guest_GPR25);
248    (*f)(tid, "GPR26", vex->guest_GPR26);
249    (*f)(tid, "GPR27", vex->guest_GPR27);
250    (*f)(tid, "GPR28", vex->guest_GPR28);
251    (*f)(tid, "GPR29", vex->guest_GPR29);
252    (*f)(tid, "GPR30", vex->guest_GPR30);
253    (*f)(tid, "GPR31", vex->guest_GPR31);
254    (*f)(tid, "CTR"  , vex->guest_CTR  );
255    (*f)(tid, "LR"   , vex->guest_LR   );
256 #elif defined(VGA_arm)
257    (*f)(tid, "R0" , vex->guest_R0 );
258    (*f)(tid, "R1" , vex->guest_R1 );
259    (*f)(tid, "R2" , vex->guest_R2 );
260    (*f)(tid, "R3" , vex->guest_R3 );
261    (*f)(tid, "R4" , vex->guest_R4 );
262    (*f)(tid, "R5" , vex->guest_R5 );
263    (*f)(tid, "R6" , vex->guest_R6 );
264    (*f)(tid, "R8" , vex->guest_R8 );
265    (*f)(tid, "R9" , vex->guest_R9 );
266    (*f)(tid, "R10", vex->guest_R10);
267    (*f)(tid, "R11", vex->guest_R11);
268    (*f)(tid, "R12", vex->guest_R12);
269    (*f)(tid, "R13", vex->guest_R13);
270    (*f)(tid, "R14", vex->guest_R14);
271 #elif defined(VGA_s390x)
272    (*f)(tid, "r0" , vex->guest_r0 );
273    (*f)(tid, "r1" , vex->guest_r1 );
274    (*f)(tid, "r2" , vex->guest_r2 );
275    (*f)(tid, "r3" , vex->guest_r3 );
276    (*f)(tid, "r4" , vex->guest_r4 );
277    (*f)(tid, "r5" , vex->guest_r5 );
278    (*f)(tid, "r6" , vex->guest_r6 );
279    (*f)(tid, "r7" , vex->guest_r7 );
280    (*f)(tid, "r8" , vex->guest_r8 );
281    (*f)(tid, "r9" , vex->guest_r9 );
282    (*f)(tid, "r10", vex->guest_r10);
283    (*f)(tid, "r11", vex->guest_r11);
284    (*f)(tid, "r12", vex->guest_r12);
285    (*f)(tid, "r13", vex->guest_r13);
286    (*f)(tid, "r14", vex->guest_r14);
287    (*f)(tid, "r15", vex->guest_r15);
288 #elif defined(VGA_mips32) || defined(VGA_mips64)
289    (*f)(tid, "r0" , vex->guest_r0 );
290    (*f)(tid, "r1" , vex->guest_r1 );
291    (*f)(tid, "r2" , vex->guest_r2 );
292    (*f)(tid, "r3" , vex->guest_r3 );
293    (*f)(tid, "r4" , vex->guest_r4 );
294    (*f)(tid, "r5" , vex->guest_r5 );
295    (*f)(tid, "r6" , vex->guest_r6 );
296    (*f)(tid, "r7" , vex->guest_r7 );
297    (*f)(tid, "r8" , vex->guest_r8 );
298    (*f)(tid, "r9" , vex->guest_r9 );
299    (*f)(tid, "r10", vex->guest_r10);
300    (*f)(tid, "r11", vex->guest_r11);
301    (*f)(tid, "r12", vex->guest_r12);
302    (*f)(tid, "r13", vex->guest_r13);
303    (*f)(tid, "r14", vex->guest_r14);
304    (*f)(tid, "r15", vex->guest_r15);
305    (*f)(tid, "r16", vex->guest_r16);
306    (*f)(tid, "r17", vex->guest_r17);
307    (*f)(tid, "r18", vex->guest_r18);
308    (*f)(tid, "r19", vex->guest_r19);
309    (*f)(tid, "r20", vex->guest_r20);
310    (*f)(tid, "r21", vex->guest_r21);
311    (*f)(tid, "r22", vex->guest_r22);
312    (*f)(tid, "r23", vex->guest_r23);
313    (*f)(tid, "r24", vex->guest_r24);
314    (*f)(tid, "r25", vex->guest_r25);
315    (*f)(tid, "r26", vex->guest_r26);
316    (*f)(tid, "r27", vex->guest_r27);
317    (*f)(tid, "r28", vex->guest_r28);
318    (*f)(tid, "r29", vex->guest_r29);
319    (*f)(tid, "r30", vex->guest_r30);
320    (*f)(tid, "r31", vex->guest_r31);
321 #elif defined(VGA_arm64)
322    (*f)(tid, "x0" , vex->guest_X0 );
323    (*f)(tid, "x1" , vex->guest_X1 );
324    (*f)(tid, "x2" , vex->guest_X2 );
325    (*f)(tid, "x3" , vex->guest_X3 );
326    (*f)(tid, "x4" , vex->guest_X4 );
327    (*f)(tid, "x5" , vex->guest_X5 );
328    (*f)(tid, "x6" , vex->guest_X6 );
329    (*f)(tid, "x7" , vex->guest_X7 );
330    (*f)(tid, "x8" , vex->guest_X8 );
331    (*f)(tid, "x9" , vex->guest_X9 );
332    (*f)(tid, "x10", vex->guest_X10);
333    (*f)(tid, "x11", vex->guest_X11);
334    (*f)(tid, "x12", vex->guest_X12);
335    (*f)(tid, "x13", vex->guest_X13);
336    (*f)(tid, "x14", vex->guest_X14);
337    (*f)(tid, "x15", vex->guest_X15);
338    (*f)(tid, "x16", vex->guest_X16);
339    (*f)(tid, "x17", vex->guest_X17);
340    (*f)(tid, "x18", vex->guest_X18);
341    (*f)(tid, "x19", vex->guest_X19);
342    (*f)(tid, "x20", vex->guest_X20);
343    (*f)(tid, "x21", vex->guest_X21);
344    (*f)(tid, "x22", vex->guest_X22);
345    (*f)(tid, "x23", vex->guest_X23);
346    (*f)(tid, "x24", vex->guest_X24);
347    (*f)(tid, "x25", vex->guest_X25);
348    (*f)(tid, "x26", vex->guest_X26);
349    (*f)(tid, "x27", vex->guest_X27);
350    (*f)(tid, "x28", vex->guest_X28);
351    (*f)(tid, "x29", vex->guest_X29);
352    (*f)(tid, "x30", vex->guest_X30);
353 #elif defined(VGA_tilegx)
354    (*f)(tid, "r0",  vex->guest_r0 );
355    (*f)(tid, "r1",  vex->guest_r1 );
356    (*f)(tid, "r2",  vex->guest_r2 );
357    (*f)(tid, "r3",  vex->guest_r3 );
358    (*f)(tid, "r4",  vex->guest_r4 );
359    (*f)(tid, "r5",  vex->guest_r5 );
360    (*f)(tid, "r6",  vex->guest_r6 );
361    (*f)(tid, "r7",  vex->guest_r7 );
362    (*f)(tid, "r8",  vex->guest_r8 );
363    (*f)(tid, "r9",  vex->guest_r9 );
364    (*f)(tid, "r10", vex->guest_r10);
365    (*f)(tid, "r11", vex->guest_r11);
366    (*f)(tid, "r12", vex->guest_r12);
367    (*f)(tid, "r13", vex->guest_r13);
368    (*f)(tid, "r14", vex->guest_r14);
369    (*f)(tid, "r15", vex->guest_r15);
370    (*f)(tid, "r16", vex->guest_r16);
371    (*f)(tid, "r17", vex->guest_r17);
372    (*f)(tid, "r18", vex->guest_r18);
373    (*f)(tid, "r19", vex->guest_r19);
374    (*f)(tid, "r20", vex->guest_r20);
375    (*f)(tid, "r21", vex->guest_r21);
376    (*f)(tid, "r22", vex->guest_r22);
377    (*f)(tid, "r23", vex->guest_r23);
378    (*f)(tid, "r24", vex->guest_r24);
379    (*f)(tid, "r25", vex->guest_r25);
380    (*f)(tid, "r26", vex->guest_r26);
381    (*f)(tid, "r27", vex->guest_r27);
382    (*f)(tid, "r28", vex->guest_r28);
383    (*f)(tid, "r29", vex->guest_r29);
384    (*f)(tid, "r30", vex->guest_r30);
385    (*f)(tid, "r31", vex->guest_r31);
386    (*f)(tid, "r32", vex->guest_r32);
387    (*f)(tid, "r33", vex->guest_r33);
388    (*f)(tid, "r34", vex->guest_r34);
389    (*f)(tid, "r35", vex->guest_r35);
390    (*f)(tid, "r36", vex->guest_r36);
391    (*f)(tid, "r37", vex->guest_r37);
392    (*f)(tid, "r38", vex->guest_r38);
393    (*f)(tid, "r39", vex->guest_r39);
394    (*f)(tid, "r40", vex->guest_r40);
395    (*f)(tid, "r41", vex->guest_r41);
396    (*f)(tid, "r42", vex->guest_r42);
397    (*f)(tid, "r43", vex->guest_r43);
398    (*f)(tid, "r44", vex->guest_r44);
399    (*f)(tid, "r45", vex->guest_r45);
400    (*f)(tid, "r46", vex->guest_r46);
401    (*f)(tid, "r47", vex->guest_r47);
402    (*f)(tid, "r48", vex->guest_r48);
403    (*f)(tid, "r49", vex->guest_r49);
404    (*f)(tid, "r50", vex->guest_r50);
405    (*f)(tid, "r51", vex->guest_r51);
406    (*f)(tid, "r52", vex->guest_r52);
407    (*f)(tid, "r53", vex->guest_r53);
408    (*f)(tid, "r54", vex->guest_r54);
409    (*f)(tid, "r55", vex->guest_r55);
410 #else
411 #  error Unknown arch
412 #endif
413 }
414 
415 
VG_(apply_to_GP_regs)416 void VG_(apply_to_GP_regs)(void (*f)(ThreadId, const HChar*, UWord))
417 {
418    ThreadId tid;
419 
420    for (tid = 1; tid < VG_N_THREADS; tid++) {
421       if (VG_(is_valid_tid)(tid)
422           || VG_(threads)[tid].exitreason == VgSrc_ExitProcess) {
423          // live thread or thread instructed to die by another thread that
424          // called exit.
425          apply_to_GPs_of_tid(tid, f);
426       }
427    }
428 }
429 
VG_(thread_stack_reset_iter)430 void VG_(thread_stack_reset_iter)(/*OUT*/ThreadId* tid)
431 {
432    *tid = (ThreadId)(-1);
433 }
434 
VG_(thread_stack_next)435 Bool VG_(thread_stack_next)(/*MOD*/ThreadId* tid,
436                             /*OUT*/Addr* stack_min,
437                             /*OUT*/Addr* stack_max)
438 {
439    ThreadId i;
440    for (i = (*tid)+1; i < VG_N_THREADS; i++) {
441       if (i == VG_INVALID_THREADID)
442          continue;
443       if (VG_(threads)[i].status != VgTs_Empty) {
444          *tid       = i;
445          *stack_min = VG_(get_SP)(i);
446          *stack_max = VG_(threads)[i].client_stack_highest_byte;
447          return True;
448       }
449    }
450    return False;
451 }
452 
VG_(thread_get_stack_max)453 Addr VG_(thread_get_stack_max)(ThreadId tid)
454 {
455    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
456    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
457    return VG_(threads)[tid].client_stack_highest_byte;
458 }
459 
VG_(thread_get_stack_size)460 SizeT VG_(thread_get_stack_size)(ThreadId tid)
461 {
462    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
463    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
464    return VG_(threads)[tid].client_stack_szB;
465 }
466 
VG_(thread_get_altstack_min)467 Addr VG_(thread_get_altstack_min)(ThreadId tid)
468 {
469    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
470    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
471    return (Addr)VG_(threads)[tid].altstack.ss_sp;
472 }
473 
VG_(thread_get_altstack_size)474 SizeT VG_(thread_get_altstack_size)(ThreadId tid)
475 {
476    vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
477    vg_assert(VG_(threads)[tid].status != VgTs_Empty);
478    return VG_(threads)[tid].altstack.ss_size;
479 }
480 
481 //-------------------------------------------------------------
482 /* Details about the capabilities of the underlying (host) CPU.  These
483    details are acquired by (1) enquiring with the CPU at startup, or
484    (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache
485    line size).  It's a bit nasty in the sense that there's no obvious
486    way to stop uses of some of this info before it's ready to go.
487    See pub_core_machine.h for more information about that.
488 
489    VG_(machine_get_hwcaps) may use signals (although it attempts to
490    leave signal state unchanged) and therefore should only be
491    called before m_main sets up the client's signal state.
492 */
493 
494 /* --------- State --------- */
495 static Bool hwcaps_done = False;
496 
497 /* --- all archs --- */
498 static VexArch     va = VexArch_INVALID;
499 static VexArchInfo vai;
500 
501 #if defined(VGA_x86)
502 UInt VG_(machine_x86_have_mxcsr) = 0;
503 #endif
504 #if defined(VGA_ppc32)
505 UInt VG_(machine_ppc32_has_FP)  = 0;
506 UInt VG_(machine_ppc32_has_VMX) = 0;
507 #endif
508 #if defined(VGA_ppc64be) || defined(VGA_ppc64le)
509 ULong VG_(machine_ppc64_has_VMX) = 0;
510 #endif
511 #if defined(VGA_arm)
512 Int VG_(machine_arm_archlevel) = 4;
513 #endif
514 
515 
516 /* For hwcaps detection on ppc32/64, s390x, and arm we'll need to do SIGILL
517    testing, so we need a VG_MINIMAL_JMP_BUF. */
518 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \
519     || defined(VGA_arm) || defined(VGA_s390x) || defined(VGA_mips32)
520 #include "pub_core_libcsetjmp.h"
521 static VG_MINIMAL_JMP_BUF(env_unsup_insn);
handler_unsup_insn(Int x)522 static void handler_unsup_insn ( Int x ) {
523    VG_MINIMAL_LONGJMP(env_unsup_insn);
524 }
525 #endif
526 
527 
528 /* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc
529  * handlers are installed.  Determines the the sizes affected by dcbz
530  * and dcbzl instructions and updates the given VexArchInfo structure
531  * accordingly.
532  *
533  * Not very defensive: assumes that as long as the dcbz/dcbzl
534  * instructions don't raise a SIGILL, that they will zero an aligned,
535  * contiguous block of memory of a sensible size. */
536 #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
find_ppc_dcbz_sz(VexArchInfo * arch_info)537 static void find_ppc_dcbz_sz(VexArchInfo *arch_info)
538 {
539    Int dcbz_szB = 0;
540    Int dcbzl_szB;
541 #  define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */
542    char test_block[4*MAX_DCBZL_SZB];
543    char *aligned = test_block;
544    Int i;
545 
546    /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */
547    aligned = (char *)(((HWord)aligned + MAX_DCBZL_SZB) & ~(MAX_DCBZL_SZB - 1));
548    vg_assert((aligned + MAX_DCBZL_SZB) <= &test_block[sizeof(test_block)]);
549 
550    /* dcbz often clears 32B, although sometimes whatever the native cache
551     * block size is */
552    VG_(memset)(test_block, 0xff, sizeof(test_block));
553    __asm__ __volatile__("dcbz 0,%0"
554                         : /*out*/
555                         : "r" (aligned) /*in*/
556                         : "memory" /*clobber*/);
557    for (dcbz_szB = 0, i = 0; i < sizeof(test_block); ++i) {
558       if (!test_block[i])
559          ++dcbz_szB;
560    }
561    vg_assert(dcbz_szB == 16 || dcbz_szB == 32 || dcbz_szB == 64 || dcbz_szB == 128);
562 
563    /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */
564    if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
565       dcbzl_szB = 0; /* indicates unsupported */
566    }
567    else {
568       VG_(memset)(test_block, 0xff, sizeof(test_block));
569       /* some older assemblers won't understand the dcbzl instruction
570        * variant, so we directly emit the instruction ourselves */
571       __asm__ __volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/
572                            : /*out*/
573                            : "r" (aligned) /*in*/
574                            : "memory", "r9" /*clobber*/);
575       for (dcbzl_szB = 0, i = 0; i < sizeof(test_block); ++i) {
576          if (!test_block[i])
577             ++dcbzl_szB;
578       }
579       vg_assert(dcbzl_szB == 16 || dcbzl_szB == 32 || dcbzl_szB == 64 || dcbzl_szB == 128);
580    }
581 
582    arch_info->ppc_dcbz_szB  = dcbz_szB;
583    arch_info->ppc_dcbzl_szB = dcbzl_szB;
584 
585    VG_(debugLog)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n",
586                  dcbz_szB, dcbzl_szB);
587 #  undef MAX_DCBZL_SZB
588 }
589 #endif /* defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) */
590 
591 #ifdef VGA_s390x
592 
593 /* Read /proc/cpuinfo. Look for lines like these
594 
595    processor 0: version = FF,  identification = 0117C9,  machine = 2064
596 
597    and return the machine model. If the machine model could not be determined
598    or it is an unknown model, return VEX_S390X_MODEL_UNKNOWN. */
599 
VG_(get_machine_model)600 static UInt VG_(get_machine_model)(void)
601 {
602    static struct model_map {
603       const HChar name[5];
604       UInt  id;
605    } model_map[] = {
606       { "2064", VEX_S390X_MODEL_Z900 },
607       { "2066", VEX_S390X_MODEL_Z800 },
608       { "2084", VEX_S390X_MODEL_Z990 },
609       { "2086", VEX_S390X_MODEL_Z890 },
610       { "2094", VEX_S390X_MODEL_Z9_EC },
611       { "2096", VEX_S390X_MODEL_Z9_BC },
612       { "2097", VEX_S390X_MODEL_Z10_EC },
613       { "2098", VEX_S390X_MODEL_Z10_BC },
614       { "2817", VEX_S390X_MODEL_Z196 },
615       { "2818", VEX_S390X_MODEL_Z114 },
616       { "2827", VEX_S390X_MODEL_ZEC12 },
617       { "2828", VEX_S390X_MODEL_ZBC12 },
618       { "2964", VEX_S390X_MODEL_Z13 },
619    };
620 
621    Int    model, n, fh;
622    SysRes fd;
623    SizeT  num_bytes, file_buf_size;
624    HChar *p, *m, *model_name, *file_buf;
625 
626    /* Slurp contents of /proc/cpuinfo into FILE_BUF */
627    fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
628    if ( sr_isError(fd) ) return VEX_S390X_MODEL_UNKNOWN;
629 
630    fh  = sr_Res(fd);
631 
632    /* Determine the size of /proc/cpuinfo.
633       Work around broken-ness in /proc file system implementation.
634       fstat returns a zero size for /proc/cpuinfo although it is
635       claimed to be a regular file. */
636    num_bytes = 0;
637    file_buf_size = 1000;
638    file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
639    while (42) {
640       n = VG_(read)(fh, file_buf, file_buf_size);
641       if (n < 0) break;
642 
643       num_bytes += n;
644       if (n < file_buf_size) break;  /* reached EOF */
645    }
646 
647    if (n < 0) num_bytes = 0;   /* read error; ignore contents */
648 
649    if (num_bytes > file_buf_size) {
650       VG_(free)( file_buf );
651       VG_(lseek)( fh, 0, VKI_SEEK_SET );
652       file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
653       n = VG_(read)( fh, file_buf, num_bytes );
654       if (n < 0) num_bytes = 0;
655    }
656 
657    file_buf[num_bytes] = '\0';
658    VG_(close)(fh);
659 
660    /* Parse file */
661    model = VEX_S390X_MODEL_UNKNOWN;
662    for (p = file_buf; *p; ++p) {
663       /* Beginning of line */
664      if (VG_(strncmp)( p, "processor", sizeof "processor" - 1 ) != 0) continue;
665 
666      m = VG_(strstr)( p, "machine" );
667      if (m == NULL) continue;
668 
669      p = m + sizeof "machine" - 1;
670      while ( VG_(isspace)( *p ) || *p == '=') {
671        if (*p == '\n') goto next_line;
672        ++p;
673      }
674 
675      model_name = p;
676      for (n = 0; n < sizeof model_map / sizeof model_map[0]; ++n) {
677        struct model_map *mm = model_map + n;
678        SizeT len = VG_(strlen)( mm->name );
679        if ( VG_(strncmp)( mm->name, model_name, len ) == 0 &&
680             VG_(isspace)( model_name[len] )) {
681          if (mm->id < model) model = mm->id;
682          p = model_name + len;
683          break;
684        }
685      }
686      /* Skip until end-of-line */
687      while (*p != '\n')
688        ++p;
689    next_line: ;
690    }
691 
692    VG_(free)( file_buf );
693    VG_(debugLog)(1, "machine", "model = %s\n",
694                  model == VEX_S390X_MODEL_UNKNOWN ? "UNKNOWN"
695                                                   : model_map[model].name);
696    return model;
697 }
698 
699 #endif /* VGA_s390x */
700 
701 #if defined(VGA_mips32) || defined(VGA_mips64)
702 
703 /* Read /proc/cpuinfo and return the machine model. */
VG_(get_machine_model)704 static UInt VG_(get_machine_model)(void)
705 {
706    const char *search_MIPS_str = "MIPS";
707    const char *search_Broadcom_str = "Broadcom";
708    const char *search_Netlogic_str = "Netlogic";
709    const char *search_Cavium_str= "Cavium";
710    Int    n, fh;
711    SysRes fd;
712    SizeT  num_bytes, file_buf_size;
713    HChar  *file_buf;
714 
715    /* Slurp contents of /proc/cpuinfo into FILE_BUF */
716    fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
717    if ( sr_isError(fd) ) return -1;
718 
719    fh  = sr_Res(fd);
720 
721    /* Determine the size of /proc/cpuinfo.
722       Work around broken-ness in /proc file system implementation.
723       fstat returns a zero size for /proc/cpuinfo although it is
724       claimed to be a regular file. */
725    num_bytes = 0;
726    file_buf_size = 1000;
727    file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
728    while (42) {
729       n = VG_(read)(fh, file_buf, file_buf_size);
730       if (n < 0) break;
731 
732       num_bytes += n;
733       if (n < file_buf_size) break;  /* reached EOF */
734    }
735 
736    if (n < 0) num_bytes = 0;   /* read error; ignore contents */
737 
738    if (num_bytes > file_buf_size) {
739       VG_(free)( file_buf );
740       VG_(lseek)( fh, 0, VKI_SEEK_SET );
741       file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
742       n = VG_(read)( fh, file_buf, num_bytes );
743       if (n < 0) num_bytes = 0;
744    }
745 
746    file_buf[num_bytes] = '\0';
747    VG_(close)(fh);
748 
749    /* Parse file */
750    if (VG_(strstr) (file_buf, search_Broadcom_str) != NULL)
751        return VEX_PRID_COMP_BROADCOM;
752    if (VG_(strstr) (file_buf, search_Netlogic_str) != NULL)
753        return VEX_PRID_COMP_NETLOGIC;
754    if (VG_(strstr)(file_buf, search_Cavium_str) != NULL)
755        return VEX_PRID_COMP_CAVIUM;
756    if (VG_(strstr) (file_buf, search_MIPS_str) != NULL)
757        return VEX_PRID_COMP_MIPS;
758 
759    /* Did not find string in the proc file. */
760    return -1;
761 }
762 
763 #endif
764 
765 /* Determine what insn set and insn set variant the host has, and
766    record it.  To be called once at system startup.  Returns False if
767    this a CPU incapable of running Valgrind.
768    Also determine information about the caches on this host. */
769 
VG_(machine_get_hwcaps)770 Bool VG_(machine_get_hwcaps)( void )
771 {
772    vg_assert(hwcaps_done == False);
773    hwcaps_done = True;
774 
775    // Whack default settings into vai, so that we only need to fill in
776    // any interesting bits.
777    LibVEX_default_VexArchInfo(&vai);
778 
779 #if defined(VGA_x86)
780    { Bool have_sse1, have_sse2, have_sse3, have_cx8, have_lzcnt, have_mmxext;
781      UInt eax, ebx, ecx, edx, max_extended;
782      HChar vstr[13];
783      vstr[0] = 0;
784 
785      if (!VG_(has_cpuid)())
786         /* we can't do cpuid at all.  Give up. */
787         return False;
788 
789      VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
790      if (eax < 1)
791         /* we can't ask for cpuid(x) for x > 0.  Give up. */
792         return False;
793 
794      /* Get processor ID string, and max basic/extended index
795         values. */
796      VG_(memcpy)(&vstr[0], &ebx, 4);
797      VG_(memcpy)(&vstr[4], &edx, 4);
798      VG_(memcpy)(&vstr[8], &ecx, 4);
799      vstr[12] = 0;
800 
801      VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
802      max_extended = eax;
803 
804      /* get capabilities bits into edx */
805      VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
806 
807      have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */
808      have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */
809      have_sse3 = (ecx & (1<<0)) != 0;  /* True => have sse3 insns */
810 
811      /* cmpxchg8b is a minimum requirement now; if we don't have it we
812         must simply give up.  But all CPUs since Pentium-I have it, so
813         that doesn't seem like much of a restriction. */
814      have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
815      if (!have_cx8)
816         return False;
817 
818      /* Figure out if this is an AMD that can do MMXEXT. */
819      have_mmxext = False;
820      if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
821          && max_extended >= 0x80000001) {
822         VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
823         /* Some older AMD processors support a sse1 subset (Integer SSE). */
824         have_mmxext = !have_sse1 && ((edx & (1<<22)) != 0);
825      }
826 
827      /* Figure out if this is an AMD or Intel that can do LZCNT. */
828      have_lzcnt = False;
829      if ((0 == VG_(strcmp)(vstr, "AuthenticAMD")
830           || 0 == VG_(strcmp)(vstr, "GenuineIntel"))
831          && max_extended >= 0x80000001) {
832         VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
833         have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
834      }
835 
836      /* Intel processors don't define the mmxext extension, but since it
837         is just a sse1 subset always define it when we have sse1. */
838      if (have_sse1)
839         have_mmxext = True;
840 
841      va = VexArchX86;
842      vai.endness = VexEndnessLE;
843 
844      if (have_sse3 && have_sse2 && have_sse1 && have_mmxext) {
845         vai.hwcaps  = VEX_HWCAPS_X86_MMXEXT;
846         vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
847         vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
848         vai.hwcaps |= VEX_HWCAPS_X86_SSE3;
849         if (have_lzcnt)
850            vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
851         VG_(machine_x86_have_mxcsr) = 1;
852      } else if (have_sse2 && have_sse1 && have_mmxext) {
853         vai.hwcaps  = VEX_HWCAPS_X86_MMXEXT;
854         vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
855         vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
856         if (have_lzcnt)
857            vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
858         VG_(machine_x86_have_mxcsr) = 1;
859      } else if (have_sse1 && have_mmxext) {
860         vai.hwcaps  = VEX_HWCAPS_X86_MMXEXT;
861         vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
862         VG_(machine_x86_have_mxcsr) = 1;
863      } else if (have_mmxext) {
864         vai.hwcaps  = VEX_HWCAPS_X86_MMXEXT; /*integer only sse1 subset*/
865         VG_(machine_x86_have_mxcsr) = 0;
866      } else {
867        vai.hwcaps = 0; /*baseline - no sse at all*/
868        VG_(machine_x86_have_mxcsr) = 0;
869      }
870 
871      VG_(machine_get_cache_info)(&vai);
872 
873      return True;
874    }
875 
876 #elif defined(VGA_amd64)
877    { Bool have_sse3, have_cx8, have_cx16;
878      Bool have_lzcnt, have_avx, have_bmi, have_avx2;
879      Bool have_rdtscp;
880      UInt eax, ebx, ecx, edx, max_basic, max_extended;
881      HChar vstr[13];
882      vstr[0] = 0;
883 
884      if (!VG_(has_cpuid)())
885         /* we can't do cpuid at all.  Give up. */
886         return False;
887 
888      VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
889      max_basic = eax;
890      if (max_basic < 1)
891         /* we can't ask for cpuid(x) for x > 0.  Give up. */
892         return False;
893 
894      /* Get processor ID string, and max basic/extended index
895         values. */
896      VG_(memcpy)(&vstr[0], &ebx, 4);
897      VG_(memcpy)(&vstr[4], &edx, 4);
898      VG_(memcpy)(&vstr[8], &ecx, 4);
899      vstr[12] = 0;
900 
901      VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
902      max_extended = eax;
903 
904      /* get capabilities bits into edx */
905      VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
906 
907      // we assume that SSE1 and SSE2 are available by default
908      have_sse3 = (ecx & (1<<0)) != 0;  /* True => have sse3 insns */
909      // ssse3   is ecx:9
910      // sse41   is ecx:19
911      // sse42   is ecx:20
912 
913      // osxsave is ecx:27
914      // avx     is ecx:28
915      // fma     is ecx:12
916      have_avx = False;
917      /* have_fma = False; */
918      if ( (ecx & ((1<<27)|(1<<28))) == ((1<<27)|(1<<28)) ) {
919         /* processor supports AVX instructions and XGETBV is enabled
920            by OS */
921         ULong w;
922         __asm__ __volatile__("movq $0,%%rcx ; "
923                              ".byte 0x0F,0x01,0xD0 ; " /* xgetbv */
924                              "movq %%rax,%0"
925                              :/*OUT*/"=r"(w) :/*IN*/
926                              :/*TRASH*/"rdx","rcx");
927         if ((w & 6) == 6) {
928            /* OS has enabled both XMM and YMM state support */
929            have_avx = True;
930            /* have_fma = (ecx & (1<<12)) != 0; */
931            /* have_fma: Probably correct, but gcc complains due to
932               unusedness. &*/
933         }
934      }
935 
936      /* cmpxchg8b is a minimum requirement now; if we don't have it we
937         must simply give up.  But all CPUs since Pentium-I have it, so
938         that doesn't seem like much of a restriction. */
939      have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
940      if (!have_cx8)
941         return False;
942 
943      /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */
944      have_cx16 = (ecx & (1<<13)) != 0; /* True => have cmpxchg16b */
945 
946      /* Figure out if this CPU can do LZCNT. */
947      have_lzcnt = False;
948      if (max_extended >= 0x80000001) {
949         VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
950         have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
951      }
952 
953      /* Can we do RDTSCP? */
954      have_rdtscp = False;
955      if (max_extended >= 0x80000001) {
956         VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
957         have_rdtscp = (edx & (1<<27)) != 0; /* True => have RDTSVCP */
958      }
959 
960      /* Check for BMI1 and AVX2. If we have AVX1 (plus OS support). */
961      have_bmi = False;
962      have_avx2 = False;
963      if (have_avx && max_basic >= 7) {
964         VG_(cpuid)(7, 0, &eax, &ebx, &ecx, &edx);
965         have_bmi = (ebx & (1<<3)) != 0; /* True => have BMI1 */
966         have_avx2 = (ebx & (1<<5)) != 0; /* True => have AVX2 */
967      }
968 
969      va          = VexArchAMD64;
970      vai.endness = VexEndnessLE;
971      vai.hwcaps  = (have_sse3   ? VEX_HWCAPS_AMD64_SSE3   : 0)
972                  | (have_cx16   ? VEX_HWCAPS_AMD64_CX16   : 0)
973                  | (have_lzcnt  ? VEX_HWCAPS_AMD64_LZCNT  : 0)
974                  | (have_avx    ? VEX_HWCAPS_AMD64_AVX    : 0)
975                  | (have_bmi    ? VEX_HWCAPS_AMD64_BMI    : 0)
976                  | (have_avx2   ? VEX_HWCAPS_AMD64_AVX2   : 0)
977                  | (have_rdtscp ? VEX_HWCAPS_AMD64_RDTSCP : 0);
978 
979      VG_(machine_get_cache_info)(&vai);
980 
981      return True;
982    }
983 
984 #elif defined(VGA_ppc32)
985    {
986      /* Find out which subset of the ppc32 instruction set is supported by
987         verifying whether various ppc32 instructions generate a SIGILL
988         or a SIGFPE. An alternative approach is to check the AT_HWCAP and
989         AT_PLATFORM entries in the ELF auxiliary table -- see also
990         the_iifii.client_auxv in m_main.c.
991       */
992      vki_sigset_t          saved_set, tmp_set;
993      vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
994      vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
995 
996      volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
997      volatile Bool have_isa_2_07;
998      Int r;
999 
1000      /* This is a kludge.  Really we ought to back-convert saved_act
1001         into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1002         since that's a no-op on all ppc32 platforms so far supported,
1003         it's not worth the typing effort.  At least include most basic
1004         sanity check: */
1005      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1006 
1007      VG_(sigemptyset)(&tmp_set);
1008      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1009      VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1010 
1011      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1012      vg_assert(r == 0);
1013 
1014      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1015      vg_assert(r == 0);
1016      tmp_sigill_act = saved_sigill_act;
1017 
1018      r = VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1019      vg_assert(r == 0);
1020      tmp_sigfpe_act = saved_sigfpe_act;
1021 
1022      /* NODEFER: signal handler does not return (from the kernel's point of
1023         view), hence if it is to successfully catch a signal more than once,
1024         we need the NODEFER flag. */
1025      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1026      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1027      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1028      tmp_sigill_act.ksa_handler = handler_unsup_insn;
1029      r = VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1030      vg_assert(r == 0);
1031 
1032      tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1033      tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1034      tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
1035      tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1036      r = VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1037      vg_assert(r == 0);
1038 
1039      /* standard FP insns */
1040      have_F = True;
1041      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1042         have_F = False;
1043      } else {
1044         __asm__ __volatile__(".long 0xFC000090"); /*fmr 0,0 */
1045      }
1046 
1047      /* Altivec insns */
1048      have_V = True;
1049      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1050         have_V = False;
1051      } else {
1052         /* Unfortunately some older assemblers don't speak Altivec (or
1053            choose not to), so to be safe we directly emit the 32-bit
1054            word corresponding to "vor 0,0,0".  This fixes a build
1055            problem that happens on Debian 3.1 (ppc32), and probably
1056            various other places. */
1057         __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
1058      }
1059 
1060      /* General-Purpose optional (fsqrt, fsqrts) */
1061      have_FX = True;
1062      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1063         have_FX = False;
1064      } else {
1065         __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */
1066      }
1067 
1068      /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1069      have_GX = True;
1070      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1071         have_GX = False;
1072      } else {
1073         __asm__ __volatile__(".long 0xFC000034"); /* frsqrte 0,0 */
1074      }
1075 
1076      /* VSX support implies Power ISA 2.06 */
1077      have_VX = True;
1078      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1079         have_VX = False;
1080      } else {
1081         __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1082      }
1083 
1084      /* Check for Decimal Floating Point (DFP) support. */
1085      have_DFP = True;
1086      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1087         have_DFP = False;
1088      } else {
1089         __asm__ __volatile__(".long 0xee4e8005"); /* dadd  FRT,FRA, FRB */
1090      }
1091 
1092      /* Check for ISA 2.07 support. */
1093      have_isa_2_07 = True;
1094      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1095         have_isa_2_07 = False;
1096      } else {
1097         __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
1098      }
1099 
1100      /* determine dcbz/dcbzl sizes while we still have the signal
1101       * handlers registered */
1102      find_ppc_dcbz_sz(&vai);
1103 
1104      r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1105      vg_assert(r == 0);
1106      r = VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
1107      vg_assert(r == 0);
1108      r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1109      vg_assert(r == 0);
1110      VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d\n",
1111                     (Int)have_F, (Int)have_V, (Int)have_FX,
1112                     (Int)have_GX, (Int)have_VX, (Int)have_DFP,
1113                     (Int)have_isa_2_07);
1114      /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */
1115      if (have_V && !have_F)
1116         have_V = False;
1117      if (have_FX && !have_F)
1118         have_FX = False;
1119      if (have_GX && !have_F)
1120         have_GX = False;
1121 
1122      VG_(machine_ppc32_has_FP)  = have_F ? 1 : 0;
1123      VG_(machine_ppc32_has_VMX) = have_V ? 1 : 0;
1124 
1125      va = VexArchPPC32;
1126      vai.endness = VexEndnessBE;
1127 
1128      vai.hwcaps = 0;
1129      if (have_F)  vai.hwcaps |= VEX_HWCAPS_PPC32_F;
1130      if (have_V)  vai.hwcaps |= VEX_HWCAPS_PPC32_V;
1131      if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC32_FX;
1132      if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC32_GX;
1133      if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC32_VX;
1134      if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC32_DFP;
1135      if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC32_ISA2_07;
1136 
1137      VG_(machine_get_cache_info)(&vai);
1138 
1139      /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be
1140         called before we're ready to go. */
1141      return True;
1142    }
1143 
1144 #elif defined(VGA_ppc64be)|| defined(VGA_ppc64le)
1145    {
1146      /* Same instruction set detection algorithm as for ppc32. */
1147      vki_sigset_t          saved_set, tmp_set;
1148      vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1149      vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
1150 
1151      volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
1152      volatile Bool have_isa_2_07;
1153      Int r;
1154 
1155      /* This is a kludge.  Really we ought to back-convert saved_act
1156         into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1157         since that's a no-op on all ppc64 platforms so far supported,
1158         it's not worth the typing effort.  At least include most basic
1159         sanity check: */
1160      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1161 
1162      VG_(sigemptyset)(&tmp_set);
1163      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1164      VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1165 
1166      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1167      vg_assert(r == 0);
1168 
1169      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1170      vg_assert(r == 0);
1171      tmp_sigill_act = saved_sigill_act;
1172 
1173      VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1174      tmp_sigfpe_act = saved_sigfpe_act;
1175 
1176      /* NODEFER: signal handler does not return (from the kernel's point of
1177         view), hence if it is to successfully catch a signal more than once,
1178         we need the NODEFER flag. */
1179      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1180      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1181      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1182      tmp_sigill_act.ksa_handler = handler_unsup_insn;
1183      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1184 
1185      tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1186      tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1187      tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
1188      tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1189      VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1190 
1191      /* standard FP insns */
1192      have_F = True;
1193      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1194         have_F = False;
1195      } else {
1196         __asm__ __volatile__("fmr 0,0");
1197      }
1198 
1199      /* Altivec insns */
1200      have_V = True;
1201      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1202         have_V = False;
1203      } else {
1204         __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
1205      }
1206 
1207      /* General-Purpose optional (fsqrt, fsqrts) */
1208      have_FX = True;
1209      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1210         have_FX = False;
1211      } else {
1212         __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/
1213      }
1214 
1215      /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1216      have_GX = True;
1217      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1218         have_GX = False;
1219      } else {
1220         __asm__ __volatile__(".long 0xFC000034"); /*frsqrte 0,0*/
1221      }
1222 
1223      /* VSX support implies Power ISA 2.06 */
1224      have_VX = True;
1225      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1226         have_VX = False;
1227      } else {
1228         __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1229      }
1230 
1231      /* Check for Decimal Floating Point (DFP) support. */
1232      have_DFP = True;
1233      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1234         have_DFP = False;
1235      } else {
1236         __asm__ __volatile__(".long 0xee4e8005"); /* dadd  FRT,FRA, FRB */
1237      }
1238 
1239      /* Check for ISA 2.07 support. */
1240      have_isa_2_07 = True;
1241      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1242         have_isa_2_07 = False;
1243      } else {
1244         __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
1245      }
1246 
1247      /* determine dcbz/dcbzl sizes while we still have the signal
1248       * handlers registered */
1249      find_ppc_dcbz_sz(&vai);
1250 
1251      VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1252      VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
1253      VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1254      VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d\n",
1255                     (Int)have_F, (Int)have_V, (Int)have_FX,
1256                     (Int)have_GX, (Int)have_VX, (Int)have_DFP,
1257                     (Int)have_isa_2_07);
1258      /* on ppc64be, if we don't even have FP, just give up. */
1259      if (!have_F)
1260         return False;
1261 
1262      VG_(machine_ppc64_has_VMX) = have_V ? 1 : 0;
1263 
1264      va = VexArchPPC64;
1265 #    if defined(VKI_LITTLE_ENDIAN)
1266      vai.endness = VexEndnessLE;
1267 #    elif defined(VKI_BIG_ENDIAN)
1268      vai.endness = VexEndnessBE;
1269 #    else
1270      vai.endness = VexEndness_INVALID;
1271 #    endif
1272 
1273      vai.hwcaps = 0;
1274      if (have_V)  vai.hwcaps |= VEX_HWCAPS_PPC64_V;
1275      if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC64_FX;
1276      if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC64_GX;
1277      if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC64_VX;
1278      if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC64_DFP;
1279      if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC64_ISA2_07;
1280 
1281      VG_(machine_get_cache_info)(&vai);
1282 
1283      /* But we're not done yet: VG_(machine_ppc64_set_clszB) must be
1284         called before we're ready to go. */
1285      return True;
1286    }
1287 
1288 #elif defined(VGA_s390x)
1289 
1290 #  include "libvex_s390x_common.h"
1291 
1292    {
1293      /* Instruction set detection code borrowed from ppc above. */
1294      vki_sigset_t          saved_set, tmp_set;
1295      vki_sigaction_fromK_t saved_sigill_act;
1296      vki_sigaction_toK_t     tmp_sigill_act;
1297 
1298      volatile Bool have_LDISP, have_STFLE;
1299      Int i, r, model;
1300 
1301      /* If the model is "unknown" don't treat this as an error. Assume
1302         this is a brand-new machine model for which we don't have the
1303         identification yet. Keeping fingers crossed. */
1304      model = VG_(get_machine_model)();
1305 
1306      /* Unblock SIGILL and stash away the old action for that signal */
1307      VG_(sigemptyset)(&tmp_set);
1308      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1309 
1310      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1311      vg_assert(r == 0);
1312 
1313      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1314      vg_assert(r == 0);
1315      tmp_sigill_act = saved_sigill_act;
1316 
1317      /* NODEFER: signal handler does not return (from the kernel's point of
1318         view), hence if it is to successfully catch a signal more than once,
1319         we need the NODEFER flag. */
1320      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1321      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1322      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1323      tmp_sigill_act.ksa_handler = handler_unsup_insn;
1324      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1325 
1326      /* Determine hwcaps. Note, we cannot use the stfle insn because it
1327         is not supported on z900. */
1328 
1329      have_LDISP = True;
1330      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1331         have_LDISP = False;
1332      } else {
1333        /* BASR loads the address of the next insn into r1. Needed to avoid
1334           a segfault in XY. */
1335         __asm__ __volatile__("basr %%r1,%%r0\n\t"
1336                              ".long  0xe3001000\n\t"  /* XY  0,0(%r1) */
1337                              ".short 0x0057" : : : "r0", "r1", "cc", "memory");
1338      }
1339 
1340      /* Check availability of STFLE. If available store facility bits
1341         in hoststfle. */
1342      ULong hoststfle[S390_NUM_FACILITY_DW];
1343 
1344      for (i = 0; i < S390_NUM_FACILITY_DW; ++i)
1345         hoststfle[i] = 0;
1346 
1347      have_STFLE = True;
1348      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1349         have_STFLE = False;
1350      } else {
1351          register ULong reg0 asm("0") = S390_NUM_FACILITY_DW - 1;
1352 
1353          __asm__ __volatile__(" .insn s,0xb2b00000,%0\n"   /* stfle */
1354                               : "=m" (hoststfle), "+d"(reg0)
1355                               : : "cc", "memory");
1356      }
1357 
1358      /* Restore signals */
1359      r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1360      vg_assert(r == 0);
1361      r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1362      vg_assert(r == 0);
1363      va = VexArchS390X;
1364      vai.endness = VexEndnessBE;
1365 
1366      vai.hwcaps = model;
1367      if (have_STFLE) vai.hwcaps |= VEX_HWCAPS_S390X_STFLE;
1368      if (have_LDISP) {
1369         /* Use long displacement only on machines >= z990. For all other
1370            machines it is millicoded and therefore slow. */
1371         if (model >= VEX_S390X_MODEL_Z990)
1372            vai.hwcaps |= VEX_HWCAPS_S390X_LDISP;
1373      }
1374 
1375      /* Detect presence of certain facilities using the STFLE insn.
1376         Note, that these facilities were introduced at the same time or later
1377         as STFLE, so the absence of STLFE implies the absence of the facility
1378         we're trying to detect. */
1379      struct fac_hwcaps_map {
1380         UInt installed;
1381         UInt facility_bit;
1382         UInt hwcaps_bit;
1383         const HChar name[6];   // may need adjustment for new facility names
1384      } fac_hwcaps[] = {
1385         { False, S390_FAC_EIMM,  VEX_HWCAPS_S390X_EIMM,  "EIMM"  },
1386         { False, S390_FAC_GIE,   VEX_HWCAPS_S390X_GIE,   "GIE"   },
1387         { False, S390_FAC_DFP,   VEX_HWCAPS_S390X_DFP,   "DFP"   },
1388         { False, S390_FAC_FPSE,  VEX_HWCAPS_S390X_FGX,   "FGX"   },
1389         { False, S390_FAC_ETF2,  VEX_HWCAPS_S390X_ETF2,  "ETF2"  },
1390         { False, S390_FAC_ETF3,  VEX_HWCAPS_S390X_ETF3,  "ETF3"  },
1391         { False, S390_FAC_STCKF, VEX_HWCAPS_S390X_STCKF, "STCKF" },
1392         { False, S390_FAC_FPEXT, VEX_HWCAPS_S390X_FPEXT, "FPEXT" },
1393         { False, S390_FAC_LSC,   VEX_HWCAPS_S390X_LSC,   "LSC"   },
1394         { False, S390_FAC_PFPO,  VEX_HWCAPS_S390X_PFPO,  "PFPO"  },
1395      };
1396 
1397      /* Set hwcaps according to the detected facilities */
1398      for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) {
1399         vg_assert(fac_hwcaps[i].facility_bit <= 63);  // for now
1400         if (hoststfle[0] & (1ULL << (63 - fac_hwcaps[i].facility_bit))) {
1401            fac_hwcaps[i].installed = True;
1402            vai.hwcaps |= fac_hwcaps[i].hwcaps_bit;
1403         }
1404      }
1405 
1406      /* Build up a string showing the probed-for facilities */
1407      HChar fac_str[(sizeof fac_hwcaps / sizeof fac_hwcaps[0]) *
1408                    (sizeof fac_hwcaps[0].name + 3) + //  %s %d
1409                    7 + 1 + 4 + 2  // machine %4d
1410                    + 1];  // \0
1411      HChar *p = fac_str;
1412      p += VG_(sprintf)(p, "machine %4d  ", model);
1413      for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) {
1414         p += VG_(sprintf)(p, " %s %1d", fac_hwcaps[i].name,
1415                           fac_hwcaps[i].installed);
1416      }
1417      *p++ = '\0';
1418 
1419      VG_(debugLog)(1, "machine", "%s\n", fac_str);
1420      VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
1421 
1422      VG_(machine_get_cache_info)(&vai);
1423 
1424      return True;
1425    }
1426 
1427 #elif defined(VGA_arm)
1428    {
1429      /* Same instruction set detection algorithm as for ppc32. */
1430      vki_sigset_t          saved_set, tmp_set;
1431      vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1432      vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
1433 
1434      volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON;
1435      volatile Int archlevel;
1436      Int r;
1437 
1438      /* This is a kludge.  Really we ought to back-convert saved_act
1439         into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1440         since that's a no-op on all ppc64 platforms so far supported,
1441         it's not worth the typing effort.  At least include most basic
1442         sanity check: */
1443      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1444 
1445      VG_(sigemptyset)(&tmp_set);
1446      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1447      VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1448 
1449      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1450      vg_assert(r == 0);
1451 
1452      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1453      vg_assert(r == 0);
1454      tmp_sigill_act = saved_sigill_act;
1455 
1456      VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1457      tmp_sigfpe_act = saved_sigfpe_act;
1458 
1459      /* NODEFER: signal handler does not return (from the kernel's point of
1460         view), hence if it is to successfully catch a signal more than once,
1461         we need the NODEFER flag. */
1462      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1463      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1464      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1465      tmp_sigill_act.ksa_handler = handler_unsup_insn;
1466      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1467 
1468      tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1469      tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1470      tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
1471      tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1472      VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1473 
1474      /* VFP insns */
1475      have_VFP = True;
1476      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1477         have_VFP = False;
1478      } else {
1479         __asm__ __volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */
1480      }
1481      /* There are several generation of VFP extension but they differs very
1482         little so for now we will not distinguish them. */
1483      have_VFP2 = have_VFP;
1484      have_VFP3 = have_VFP;
1485 
1486      /* NEON insns */
1487      have_NEON = True;
1488      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1489         have_NEON = False;
1490      } else {
1491         __asm__ __volatile__(".word 0xF2244154"); /* VMOV q2, q2 */
1492      }
1493 
1494      /* ARM architecture level */
1495      archlevel = 5; /* v5 will be base level */
1496      if (archlevel < 7) {
1497         archlevel = 7;
1498         if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1499            archlevel = 5;
1500         } else {
1501            __asm__ __volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */
1502         }
1503      }
1504      if (archlevel < 6) {
1505         archlevel = 6;
1506         if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1507            archlevel = 5;
1508         } else {
1509            __asm__ __volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */
1510         }
1511      }
1512 
1513      VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
1514      VG_(convert_sigaction_fromK_to_toK)(&saved_sigfpe_act, &tmp_sigfpe_act);
1515      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1516      VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1517      VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1518 
1519      VG_(debugLog)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n",
1520            archlevel, (Int)have_VFP, (Int)have_VFP2, (Int)have_VFP3,
1521            (Int)have_NEON);
1522 
1523      VG_(machine_arm_archlevel) = archlevel;
1524 
1525      va = VexArchARM;
1526      vai.endness = VexEndnessLE;
1527 
1528      vai.hwcaps = VEX_ARM_ARCHLEVEL(archlevel);
1529      if (have_VFP3) vai.hwcaps |= VEX_HWCAPS_ARM_VFP3;
1530      if (have_VFP2) vai.hwcaps |= VEX_HWCAPS_ARM_VFP2;
1531      if (have_VFP)  vai.hwcaps |= VEX_HWCAPS_ARM_VFP;
1532      if (have_NEON) vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1533 
1534      VG_(machine_get_cache_info)(&vai);
1535 
1536      return True;
1537    }
1538 
1539 #elif defined(VGA_arm64)
1540    {
1541      va = VexArchARM64;
1542      vai.endness = VexEndnessLE;
1543 
1544      /* So far there are no variants. */
1545      vai.hwcaps = 0;
1546 
1547      VG_(machine_get_cache_info)(&vai);
1548 
1549      /* 0 denotes 'not set'.  The range of legitimate values here,
1550         after being set that is, is 2 though 17 inclusive. */
1551      vg_assert(vai.arm64_dMinLine_lg2_szB == 0);
1552      vg_assert(vai.arm64_iMinLine_lg2_szB == 0);
1553      ULong ctr_el0;
1554      __asm__ __volatile__("mrs %0, ctr_el0" : "=r"(ctr_el0));
1555      vai.arm64_dMinLine_lg2_szB = ((ctr_el0 >> 16) & 0xF) + 2;
1556      vai.arm64_iMinLine_lg2_szB = ((ctr_el0 >>  0) & 0xF) + 2;
1557      VG_(debugLog)(1, "machine", "ARM64: ctr_el0.dMinLine_szB = %d, "
1558                       "ctr_el0.iMinLine_szB = %d\n",
1559                    1 << vai.arm64_dMinLine_lg2_szB,
1560                    1 << vai.arm64_iMinLine_lg2_szB);
1561 
1562      return True;
1563    }
1564 
1565 #elif defined(VGA_mips32)
1566    {
1567      /* Define the position of F64 bit in FIR register. */
1568 #    define FP64 22
1569      va = VexArchMIPS32;
1570      UInt model = VG_(get_machine_model)();
1571      if (model == -1)
1572          return False;
1573 
1574      vai.hwcaps = model;
1575 
1576 #    if defined(VKI_LITTLE_ENDIAN)
1577      vai.endness = VexEndnessLE;
1578 #    elif defined(VKI_BIG_ENDIAN)
1579      vai.endness = VexEndnessBE;
1580 #    else
1581      vai.endness = VexEndness_INVALID;
1582 #    endif
1583 
1584      /* Same instruction set detection algorithm as for ppc32/arm... */
1585      vki_sigset_t          saved_set, tmp_set;
1586      vki_sigaction_fromK_t saved_sigill_act;
1587      vki_sigaction_toK_t   tmp_sigill_act;
1588 
1589      volatile Bool have_DSP, have_DSPr2;
1590      Int r;
1591 
1592      vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1593 
1594      VG_(sigemptyset)(&tmp_set);
1595      VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1596 
1597      r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1598      vg_assert(r == 0);
1599 
1600      r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1601      vg_assert(r == 0);
1602      tmp_sigill_act = saved_sigill_act;
1603 
1604      /* NODEFER: signal handler does not return (from the kernel's point of
1605         view), hence if it is to successfully catch a signal more than once,
1606         we need the NODEFER flag. */
1607      tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1608      tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1609      tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1610      tmp_sigill_act.ksa_handler = handler_unsup_insn;
1611      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1612 
1613      if (model == VEX_PRID_COMP_MIPS) {
1614         /* DSPr2 instructions. */
1615         have_DSPr2 = True;
1616         if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1617            have_DSPr2 = False;
1618         } else {
1619            __asm__ __volatile__(".word 0x7d095351"); /* precr.qb.ph t2, t0, t1 */
1620         }
1621         if (have_DSPr2) {
1622            /* We assume it's 74K, since it can run DSPr2. */
1623            vai.hwcaps |= VEX_PRID_IMP_74K;
1624         } else {
1625            /* DSP instructions. */
1626            have_DSP = True;
1627            if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1628               have_DSP = False;
1629            } else {
1630               __asm__ __volatile__(".word 0x7c3f44b8"); /* rddsp t0, 0x3f */
1631            }
1632            if (have_DSP) {
1633               /* We assume it's 34K, since it has support for DSP. */
1634               vai.hwcaps |= VEX_PRID_IMP_34K;
1635            }
1636         }
1637      }
1638 
1639      /* Check if CPU has FPU and 32 dbl. prec. FP registers */
1640      int FIR = 0;
1641      __asm__ __volatile__(
1642         "cfc1 %0, $0"  "\n\t"
1643         : "=r" (FIR)
1644      );
1645      if (FIR & (1 << FP64)) {
1646         vai.hwcaps |= VEX_PRID_CPU_32FPR;
1647      }
1648 
1649      VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
1650      VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1651      VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1652 
1653      VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
1654      VG_(machine_get_cache_info)(&vai);
1655 
1656      return True;
1657    }
1658 
1659 #elif defined(VGA_mips64)
1660    {
1661      va = VexArchMIPS64;
1662      UInt model = VG_(get_machine_model)();
1663      if (model == -1)
1664          return False;
1665 
1666      vai.hwcaps = model;
1667 
1668 #    if defined(VKI_LITTLE_ENDIAN)
1669      vai.endness = VexEndnessLE;
1670 #    elif defined(VKI_BIG_ENDIAN)
1671      vai.endness = VexEndnessBE;
1672 #    else
1673      vai.endness = VexEndness_INVALID;
1674 #    endif
1675 
1676      VG_(machine_get_cache_info)(&vai);
1677 
1678      return True;
1679    }
1680 
1681 #elif defined(VGA_tilegx)
1682    {
1683      va = VexArchTILEGX;
1684      vai.hwcaps = VEX_HWCAPS_TILEGX_BASE;
1685      vai.endness = VexEndnessLE;
1686 
1687      VG_(machine_get_cache_info)(&vai);
1688 
1689      return True;
1690    }
1691 
1692 #else
1693 #  error "Unknown arch"
1694 #endif
1695 }
1696 
1697 /* Notify host cpu instruction cache line size. */
1698 #if defined(VGA_ppc32)
VG_(machine_ppc32_set_clszB)1699 void VG_(machine_ppc32_set_clszB)( Int szB )
1700 {
1701    vg_assert(hwcaps_done);
1702 
1703    /* Either the value must not have been set yet (zero) or we can
1704       tolerate it being set to the same value multiple times, as the
1705       stack scanning logic in m_main is a bit stupid. */
1706    vg_assert(vai.ppc_icache_line_szB == 0
1707              || vai.ppc_icache_line_szB == szB);
1708 
1709    vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128);
1710    vai.ppc_icache_line_szB = szB;
1711 }
1712 #endif
1713 
1714 
1715 /* Notify host cpu instruction cache line size. */
1716 #if defined(VGA_ppc64be)|| defined(VGA_ppc64le)
VG_(machine_ppc64_set_clszB)1717 void VG_(machine_ppc64_set_clszB)( Int szB )
1718 {
1719    vg_assert(hwcaps_done);
1720 
1721    /* Either the value must not have been set yet (zero) or we can
1722       tolerate it being set to the same value multiple times, as the
1723       stack scanning logic in m_main is a bit stupid. */
1724    vg_assert(vai.ppc_icache_line_szB == 0
1725              || vai.ppc_icache_line_szB == szB);
1726 
1727    vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128);
1728    vai.ppc_icache_line_szB = szB;
1729 }
1730 #endif
1731 
1732 
1733 /* Notify host's ability to handle NEON instructions. */
1734 #if defined(VGA_arm)
VG_(machine_arm_set_has_NEON)1735 void VG_(machine_arm_set_has_NEON)( Bool has_neon )
1736 {
1737    vg_assert(hwcaps_done);
1738    /* There's nothing else we can sanity check. */
1739 
1740    if (has_neon) {
1741       vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1742    } else {
1743       vai.hwcaps &= ~VEX_HWCAPS_ARM_NEON;
1744    }
1745 }
1746 #endif
1747 
1748 
1749 /* Fetch host cpu info, once established. */
VG_(machine_get_VexArchInfo)1750 void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa,
1751                                    /*OUT*/VexArchInfo* pVai )
1752 {
1753    vg_assert(hwcaps_done);
1754    if (pVa)  *pVa  = va;
1755    if (pVai) *pVai = vai;
1756 }
1757 
1758 
1759 /* Returns the size of the largest guest register that we will
1760    simulate in this run.  This depends on both the guest architecture
1761    and on the specific capabilities we are simulating for that guest
1762    (eg, AVX or non-AVX ?, for amd64).  Should return either 4, 8, 16
1763    or 32.  General rule: if in doubt, return a value larger than
1764    reality.
1765 
1766    This information is needed by Cachegrind and Callgrind to decide
1767    what the minimum cache line size they are prepared to simulate is.
1768    Basically require that the minimum cache line size is at least as
1769    large as the largest register that might get transferred to/from
1770    memory, so as to guarantee that any such transaction can straddle
1771    at most 2 cache lines.
1772 */
VG_(machine_get_size_of_largest_guest_register)1773 Int VG_(machine_get_size_of_largest_guest_register) ( void )
1774 {
1775    vg_assert(hwcaps_done);
1776    /* Once hwcaps_done is True, we can fish around inside va/vai to
1777       find the information we need. */
1778 
1779 #  if defined(VGA_x86)
1780    vg_assert(va == VexArchX86);
1781    /* We don't support AVX, so 32 is out.  At the other end, even if
1782       we don't support any SSE, the X87 can generate 10 byte
1783       transfers, so let's say 16 to be on the safe side.  Hence the
1784       answer is always 16. */
1785    return 16;
1786 
1787 #  elif defined(VGA_amd64)
1788    /* if AVX then 32 else 16 */
1789    return (vai.hwcaps & VEX_HWCAPS_AMD64_AVX) ? 32 : 16;
1790 
1791 #  elif defined(VGA_ppc32)
1792    /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
1793    if (vai.hwcaps & VEX_HWCAPS_PPC32_V) return 16;
1794    if (vai.hwcaps & VEX_HWCAPS_PPC32_VX) return 16;
1795    if (vai.hwcaps & VEX_HWCAPS_PPC32_DFP) return 16;
1796    return 8;
1797 
1798 #  elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
1799    /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
1800    if (vai.hwcaps & VEX_HWCAPS_PPC64_V) return 16;
1801    if (vai.hwcaps & VEX_HWCAPS_PPC64_VX) return 16;
1802    if (vai.hwcaps & VEX_HWCAPS_PPC64_DFP) return 16;
1803    return 8;
1804 
1805 #  elif defined(VGA_s390x)
1806    return 8;
1807 
1808 #  elif defined(VGA_arm)
1809    /* Really it depends whether or not we have NEON, but let's just
1810       assume we always do. */
1811    return 16;
1812 
1813 #  elif defined(VGA_arm64)
1814    /* ARM64 always has Neon, AFAICS. */
1815    return 16;
1816 
1817 #  elif defined(VGA_mips32)
1818    /* The guest state implies 4, but that can't really be true, can
1819       it? */
1820    return 8;
1821 
1822 #  elif defined(VGA_mips64)
1823    return 8;
1824 
1825 #  elif defined(VGA_tilegx)
1826    return 8;
1827 
1828 #  else
1829 #    error "Unknown arch"
1830 #  endif
1831 }
1832 
1833 
1834 // Given a pointer to a function as obtained by "& functionname" in C,
1835 // produce a pointer to the actual entry point for the function.
VG_(fnptr_to_fnentry)1836 void* VG_(fnptr_to_fnentry)( void* f )
1837 {
1838 #  if defined(VGP_x86_linux) || defined(VGP_amd64_linux)  \
1839       || defined(VGP_arm_linux) || defined(VGO_darwin)          \
1840       || defined(VGP_ppc32_linux) || defined(VGP_ppc64le_linux) \
1841       || defined(VGP_s390x_linux) || defined(VGP_mips32_linux) \
1842       || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \
1843       || defined(VGP_tilegx_linux)
1844    return f;
1845 #  elif defined(VGP_ppc64be_linux)
1846    /* ppc64-linux uses the AIX scheme, in which f is a pointer to a
1847       3-word function descriptor, of which the first word is the entry
1848       address. */
1849    UWord* descr = (UWord*)f;
1850    return (void*)(descr[0]);
1851 #  else
1852 #    error "Unknown platform"
1853 #  endif
1854 }
1855 
1856 /*--------------------------------------------------------------------*/
1857 /*--- end                                                          ---*/
1858 /*--------------------------------------------------------------------*/
1859