/*--------------------------------------------------------------------*/ /*--- Ptrcheck: a pointer-use checker. ---*/ /*--- This file checks stack and global array accesses. ---*/ /*--- sg_main.c ---*/ /*--------------------------------------------------------------------*/ /* This file is part of Ptrcheck, a Valgrind tool for checking pointer use in programs. Copyright (C) 2008-2015 OpenWorks Ltd info@open-works.co.uk This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. The GNU General Public License is contained in the file COPYING. Neither the names of the U.S. Department of Energy nor the University of California nor the names of its contributors may be used to endorse or promote products derived from this software without prior written permission. */ #include "pub_tool_basics.h" #include "pub_tool_libcbase.h" #include "pub_tool_libcassert.h" #include "pub_tool_libcprint.h" #include "pub_tool_tooliface.h" #include "pub_tool_wordfm.h" #include "pub_tool_xarray.h" #include "pub_tool_threadstate.h" #include "pub_tool_mallocfree.h" #include "pub_tool_machine.h" #include "pub_tool_debuginfo.h" #include "pub_tool_options.h" #include "pc_common.h" #include "sg_main.h" // self static void preen_global_Invars ( Addr a, SizeT len ); /*fwds*/ ////////////////////////////////////////////////////////////// // // // Basic Stuff // // // ////////////////////////////////////////////////////////////// static inline Bool is_sane_TId ( ThreadId tid ) { return tid >= 0 && tid < VG_N_THREADS && tid != VG_INVALID_THREADID; } static void* sg_malloc ( const HChar* cc, SizeT n ) { void* p; tl_assert(n > 0); p = VG_(malloc)( cc, n ); return p; } static void sg_free ( void* p ) { tl_assert(p); VG_(free)(p); } /* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the first interval is lower, 1 if the first interval is higher, and 0 if there is any overlap. Redundant paranoia with casting is there following what looked distinctly like a bug in gcc-4.1.2, in which some of the comparisons were done signedly instead of unsignedly. */ inline static Word cmp_nonempty_intervals ( Addr a1, SizeT n1, Addr a2, SizeT n2 ) { UWord a1w = (UWord)a1; UWord n1w = (UWord)n1; UWord a2w = (UWord)a2; UWord n2w = (UWord)n2; tl_assert(n1w > 0 && n2w > 0); if (a1w + n1w <= a2w) return -1L; if (a2w + n2w <= a1w) return 1L; return 0; } /* Return true iff [aSmall,aSmall+nSmall) is entirely contained within [aBig,aBig+nBig). */ inline static Bool is_subinterval_of ( Addr aBig, SizeT nBig, Addr aSmall, SizeT nSmall ) { tl_assert(nBig > 0 && nSmall > 0); return aBig <= aSmall && aSmall + nSmall <= aBig + nBig; } inline static Addr Addr__min ( Addr a1, Addr a2 ) { return a1 < a2 ? a1 : a2; } inline static Addr Addr__max ( Addr a1, Addr a2 ) { return a1 < a2 ? a2 : a1; } ////////////////////////////////////////////////////////////// // // // StackBlocks Persistent Cache // // // ////////////////////////////////////////////////////////////// /* We maintain a set of XArray* of StackBlocks. These are never freed. When a new StackBlock vector is acquired from VG_(di_get_local_blocks_at_ip), we compare it to the existing set. If not present, it is added. If present, the just-acquired one is freed and the copy used. This simplifies storage management elsewhere. It allows us to assume that a pointer to an XArray* of StackBlock is valid forever. It also means there are no duplicates anywhere, which could be important from a space point of view for programs that generate a lot of translations, or where translations are frequently discarded and re-made. Note that we normalise the arrays by sorting the elements according to an arbitrary total order, so as to avoid the situation that two vectors describe the same set of variables but are not structurally identical. */ static inline Bool StackBlock__sane ( const StackBlock* fb ) { if (fb->name[ sizeof(fb->name)-1 ] != 0) return False; if (fb->spRel != False && fb->spRel != True) return False; if (fb->isVec != False && fb->isVec != True) return False; return True; } /* Generate an arbitrary total ordering on StackBlocks. */ static Word StackBlock__cmp ( const StackBlock* fb1, const StackBlock* fb2 ) { Word r; tl_assert(StackBlock__sane(fb1)); tl_assert(StackBlock__sane(fb2)); /* Hopefully the .base test hits most of the time. For the blocks associated with any particular instruction, if the .base values are the same then probably it doesn't make sense for the other fields to be different. But this is supposed to be a completely general structural total order, so we have to compare everything anyway. */ if (fb1->base < fb2->base) return -1; if (fb1->base > fb2->base) return 1; /* compare sizes */ if (fb1->szB < fb2->szB) return -1; if (fb1->szB > fb2->szB) return 1; /* compare sp/fp flag */ if (fb1->spRel < fb2->spRel) return -1; if (fb1->spRel > fb2->spRel) return 1; /* compare is/is-not array-typed flag */ if (fb1->isVec < fb2->isVec) return -1; if (fb1->isVec > fb2->isVec) return 1; /* compare the name */ r = (Word)VG_(strcmp)(fb1->name, fb2->name); return r; } /* Returns True if all fields except .szB are the same. szBs may or may not be the same; they are simply not consulted. */ static Bool StackBlock__all_fields_except_szB_are_equal ( StackBlock* fb1, StackBlock* fb2 ) { tl_assert(StackBlock__sane(fb1)); tl_assert(StackBlock__sane(fb2)); return fb1->base == fb2->base && fb1->spRel == fb2->spRel && fb1->isVec == fb2->isVec && 0 == VG_(strcmp)(fb1->name, fb2->name); } /* Generate an arbitrary total ordering on vectors of StackBlocks. */ static Word StackBlocks__cmp ( XArray* fb1s, XArray* fb2s ) { Word i, r, n1, n2; n1 = VG_(sizeXA)( fb1s ); n2 = VG_(sizeXA)( fb2s ); if (n1 < n2) return -1; if (n1 > n2) return 1; for (i = 0; i < n1; i++) { StackBlock *fb1, *fb2; fb1 = VG_(indexXA)( fb1s, i ); fb2 = VG_(indexXA)( fb2s, i ); r = StackBlock__cmp( fb1, fb2 ); if (r != 0) return r; } tl_assert(i == n1 && i == n2); return 0; } static void pp_StackBlocks ( XArray* sbs ) { Word i, n = VG_(sizeXA)( sbs ); VG_(message)(Vg_DebugMsg, "<<< STACKBLOCKS\n" ); for (i = 0; i < n; i++) { StackBlock* sb = (StackBlock*)VG_(indexXA)( sbs, i ); VG_(message)(Vg_DebugMsg, " StackBlock{ off %ld szB %lu spRel:%c isVec:%c \"%s\" }\n", sb->base, sb->szB, sb->spRel ? 'Y' : 'N', sb->isVec ? 'Y' : 'N', &sb->name[0] ); } VG_(message)(Vg_DebugMsg, ">>> STACKBLOCKS\n" ); } /* ---------- The StackBlock vector cache ---------- */ static WordFM* /* XArray* of StackBlock -> nothing */ frameBlocks_set = NULL; static void init_StackBlocks_set ( void ) { tl_assert(!frameBlocks_set); frameBlocks_set = VG_(newFM)( sg_malloc, "di.sg_main.iSBs.1", sg_free, (Word(*)(UWord,UWord))StackBlocks__cmp ); tl_assert(frameBlocks_set); } /* Find the given StackBlock-vector in our collection thereof. If found, deallocate the supplied one, and return the address of the copy. If not found, add the supplied one to our collection and return its address. */ static XArray* /* of StackBlock */ StackBlocks__find_and_dealloc__or_add ( XArray* /* of StackBlock */ orig ) { UWord key, val; /* First, normalise, as per comments above. */ VG_(setCmpFnXA)( orig, (XACmpFn_t)StackBlock__cmp ); VG_(sortXA)( orig ); /* Now get rid of any exact duplicates. */ nuke_dups: { Word r, w, nEQ, n = VG_(sizeXA)( orig ); if (n >= 2) { w = 0; nEQ = 0; for (r = 0; r < n; r++) { if (r+1 < n) { StackBlock* pR0 = VG_(indexXA)( orig, r+0 ); StackBlock* pR1 = VG_(indexXA)( orig, r+1 ); Word c = StackBlock__cmp(pR0,pR1); tl_assert(c == -1 || c == 0); if (c == 0) { nEQ++; continue; } } if (w != r) { StackBlock* pW = VG_(indexXA)( orig, w ); StackBlock* pR = VG_(indexXA)( orig, r ); *pW = *pR; } w++; } tl_assert(r == n); tl_assert(w + nEQ == n); if (w < n) { VG_(dropTailXA)( orig, n-w ); } if (0) VG_(printf)("delta %ld\n", n-w); } } /* Deal with the following strangeness, where two otherwise identical blocks are claimed to have different sizes. In which case we use the larger size. */ /* StackBlock{ off 16 szB 66 spRel:Y isVec:Y "sz" } StackBlock{ off 16 szB 130 spRel:Y isVec:Y "sz" } StackBlock{ off 208 szB 16 spRel:Y isVec:Y "ar" } */ { Word i, n = VG_(sizeXA)( orig ); if (n >= 2) { for (i = 0; i < n-1; i++) { StackBlock* sb0 = VG_(indexXA)( orig, i+0 ); StackBlock* sb1 = VG_(indexXA)( orig, i+1 ); if (StackBlock__all_fields_except_szB_are_equal(sb0, sb1)) { /* They can't be identical because the previous tidying pass would have removed the duplicates. And they can't be > because the earlier sorting pass would have ordered otherwise-identical descriptors according to < on .szB fields. Hence: */ tl_assert(sb0->szB < sb1->szB); sb0->szB = sb1->szB; /* This makes the blocks identical, at the size of the larger one. Rather than go to all the hassle of sliding the rest down, simply go back to the remove-duplicates stage. The assertion guarantees that we eventually make progress, since the rm-dups stage will get rid of one of the blocks. This is expected to happen only exceedingly rarely. */ tl_assert(StackBlock__cmp(sb0,sb1) == 0); goto nuke_dups; } } } } /* If there are any blocks which overlap and have the same fpRel-ness, junk the whole descriptor; it's obviously bogus. Icc11 certainly generates bogus info from time to time. This check is pretty weak; really we ought to have a stronger sanity check. */ { Word i, n = VG_(sizeXA)( orig ); static Int moans = 3; for (i = 0; i < n-1; i++) { StackBlock* sb1 = (StackBlock*)VG_(indexXA)( orig, i ); StackBlock* sb2 = (StackBlock*)VG_(indexXA)( orig, i+1 ); if (sb1->spRel == sb2->spRel && (sb1->base >= sb2->base || sb1->base + sb1->szB > sb2->base)) { if (moans > 0 && !VG_(clo_xml)) { moans--; VG_(message)(Vg_UserMsg, "Warning: bogus DWARF3 info: " "overlapping stack blocks\n"); if (VG_(clo_verbosity) >= 2) pp_StackBlocks(orig); if (moans == 0) VG_(message)(Vg_UserMsg, "Further instances of this " "message will not be shown\n" ); } VG_(dropTailXA)( orig, VG_(sizeXA)( orig )); break; } } } /* Now, do we have it already? */ if (VG_(lookupFM)( frameBlocks_set, &key, &val, (UWord)orig )) { /* yes */ XArray* res; tl_assert(val == 0); tl_assert(key != (UWord)orig); VG_(deleteXA)(orig); res = (XArray*)key; return res; } else { /* no */ VG_(addToFM)( frameBlocks_set, (UWord)orig, 0 ); return orig; } } /* Top level function for getting the StackBlock vector for a given instruction. It is guaranteed that the returned pointer will be valid for the entire rest of the run, and also that the addresses of the individual elements of the array will not change. */ static XArray* /* of StackBlock */ get_StackBlocks_for_IP ( Addr ip ) { XArray* blocks = VG_(di_get_stack_blocks_at_ip)( ip, True/*arrays only*/ ); tl_assert(blocks); return StackBlocks__find_and_dealloc__or_add( blocks ); } ////////////////////////////////////////////////////////////// // // // GlobalBlocks Persistent Cache // // // ////////////////////////////////////////////////////////////// /* Generate an arbitrary total ordering on GlobalBlocks. */ static Word GlobalBlock__cmp ( GlobalBlock* gb1, GlobalBlock* gb2 ) { Word r; /* compare addrs */ if (gb1->addr < gb2->addr) return -1; if (gb1->addr > gb2->addr) return 1; /* compare sizes */ if (gb1->szB < gb2->szB) return -1; if (gb1->szB > gb2->szB) return 1; /* compare is/is-not array-typed flag */ if (gb1->isVec < gb2->isVec) return -1; if (gb1->isVec > gb2->isVec) return 1; /* compare the name */ r = (Word)VG_(strcmp)(gb1->name, gb2->name); if (r != 0) return r; /* compare the soname */ r = (Word)VG_(strcmp)(gb1->soname, gb2->soname); return r; } static WordFM* /* GlobalBlock* -> nothing */ globalBlock_set = NULL; static void init_GlobalBlock_set ( void ) { tl_assert(!globalBlock_set); globalBlock_set = VG_(newFM)( sg_malloc, "di.sg_main.iGBs.1", sg_free, (Word(*)(UWord,UWord))GlobalBlock__cmp ); tl_assert(globalBlock_set); } /* Top level function for making GlobalBlocks persistent. Call here with a non-persistent version, and the returned one is guaranteed to be valid for the entire rest of the run. The supplied one is copied, not stored, so can be freed after the call. */ static GlobalBlock* get_persistent_GlobalBlock ( GlobalBlock* orig ) { UWord key, val; /* Now, do we have it already? */ if (VG_(lookupFM)( globalBlock_set, &key, &val, (UWord)orig )) { /* yes, return the copy */ GlobalBlock* res; tl_assert(val == 0); res = (GlobalBlock*)key; tl_assert(res != orig); return res; } else { /* no. clone it, store the clone and return the clone's address. */ GlobalBlock* clone = sg_malloc( "di.sg_main.gpGB.1", sizeof(GlobalBlock) ); tl_assert(clone); *clone = *orig; VG_(addToFM)( globalBlock_set, (UWord)clone, 0 ); return clone; } } ////////////////////////////////////////////////////////////// // // // Interval tree of StackTreeBlock // // // ////////////////////////////////////////////////////////////// /* A node in a stack interval tree. Zero length intervals (.szB == 0) are not allowed. A stack interval tree is a (WordFM StackTreeNode* void). There is one stack interval tree for each thread. */ typedef struct { Addr addr; SizeT szB; /* copied from .descr->szB */ StackBlock* descr; /* it's an instance of this block */ UWord depth; /* depth of stack at time block was pushed */ } StackTreeNode; static void pp_StackTree ( WordFM* sitree, const HChar* who ) { UWord keyW, valW; VG_(printf)("<<< BEGIN pp_StackTree %s\n", who ); VG_(initIterFM)( sitree ); while (VG_(nextIterFM)( sitree, &keyW, &valW )) { StackTreeNode* nd = (StackTreeNode*)keyW; VG_(printf)(" [%#lx,+%lu) descr=%p %s %lu\n", nd->addr, nd->szB, nd->descr, nd->descr->name, nd->descr->szB); } VG_(printf)(">>> END pp_StackTree %s\n", who ); } /* Interval comparison function for StackTreeNode */ static Word cmp_intervals_StackTreeNode ( StackTreeNode* sn1, StackTreeNode* sn2 ) { return cmp_nonempty_intervals(sn1->addr, sn1->szB, sn2->addr, sn2->szB); } /* Find the node holding 'a', if any. */ static StackTreeNode* find_StackTreeNode ( WordFM* sitree, Addr a ) { UWord keyW, valW; StackTreeNode key; tl_assert(sitree); key.addr = a; key.szB = 1; if (VG_(lookupFM)( sitree, &keyW, &valW, (UWord)&key )) { StackTreeNode* res = (StackTreeNode*)keyW; tl_assert(valW == 0); tl_assert(res != &key); return res; } else { return NULL; } } /* Note that the supplied XArray of FrameBlock must have been made persistent already. */ __attribute__((noinline)) static void add_blocks_to_StackTree ( /*MOD*/WordFM* sitree, XArray* /* FrameBlock */ descrs, XArray* /* Addr */ bases, UWord depth ) { Bool debug = (Bool)0; Word i, nDescrs, nBases; nDescrs = VG_(sizeXA)( descrs ), nBases = VG_(sizeXA)( bases ); tl_assert(nDescrs == nBases); if (nDescrs == 0) return; tl_assert(sitree); if (debug) { VG_(printf)("\ndepth = %lu\n", depth); pp_StackTree( sitree, "add_blocks_to_StackTree-pre" ); pp_StackBlocks(descrs); } for (i = 0; i < nDescrs; i++) { Bool already_present; StackTreeNode* nyu; Addr addr = *(Addr*)VG_(indexXA)( bases, i ); StackBlock* descr = (StackBlock*)VG_(indexXA)( descrs, i ); tl_assert(descr->szB > 0); nyu = sg_malloc( "di.sg_main.abtST.1", sizeof(StackTreeNode) ); nyu->addr = addr; nyu->szB = descr->szB; nyu->descr = descr; nyu->depth = depth; if (debug) VG_(printf)("ADD %#lx %lu\n", addr, descr->szB); already_present = VG_(addToFM)( sitree, (UWord)nyu, 0 ); /* The interval can't already be there; else we have overlapping stack blocks. */ tl_assert(!already_present); if (debug) { pp_StackTree( sitree, "add_blocks_to_StackTree-step" ); } } if (debug) { pp_StackTree( sitree, "add_blocks_to_StackTree-post" ); VG_(printf)("\n"); } } static void del_blocks_from_StackTree ( /*MOD*/WordFM* sitree, XArray* /* Addr */ bases ) { UWord oldK, oldV; Word i, nBases = VG_(sizeXA)( bases ); for (i = 0; i < nBases; i++) { Bool b; Addr addr = *(Addr*)VG_(indexXA)( bases, i ); StackTreeNode* nd = find_StackTreeNode(sitree, addr); /* The interval must be there; we added it earlier when the associated frame was created. */ tl_assert(nd); b = VG_(delFromFM)( sitree, &oldK, &oldV, (UWord)nd ); /* we just found the block! */ tl_assert(b); tl_assert(oldV == 0); tl_assert(nd == (StackTreeNode*)oldK); sg_free(nd); } } static void delete_StackTree__kFin ( UWord keyW ) { StackTreeNode* nd = (StackTreeNode*)keyW; tl_assert(nd); sg_free(nd); } static void delete_StackTree__vFin ( UWord valW ) { tl_assert(valW == 0); } static void delete_StackTree ( WordFM* sitree ) { VG_(deleteFM)( sitree, delete_StackTree__kFin, delete_StackTree__vFin ); } static WordFM* new_StackTree ( void ) { return VG_(newFM)( sg_malloc, "di.sg_main.nST.1", sg_free, (Word(*)(UWord,UWord))cmp_intervals_StackTreeNode ); } ////////////////////////////////////////////////////////////// // // // Interval tree of GlobalTreeBlock // // // ////////////////////////////////////////////////////////////// /* A node in a global interval tree. Zero length intervals (.szB == 0) are not allowed. A global interval tree is a (WordFM GlobalTreeNode* void). There is one global interval tree for the entire process. */ typedef struct { Addr addr; /* copied from .descr->addr */ SizeT szB; /* copied from .descr->szB */ GlobalBlock* descr; /* it's this block */ } GlobalTreeNode; static void GlobalTreeNode__pp ( GlobalTreeNode* nd ) { tl_assert(nd->descr); VG_(printf)("GTNode [%#lx,+%lu) %s", nd->addr, nd->szB, nd->descr->name); } static void GlobalTree__pp ( WordFM* /* of (GlobalTreeNode,void) */ gitree, const HChar* who ) { UWord keyW, valW; GlobalTreeNode* nd; VG_(printf)("<<< GlobalBlockTree (%s)\n", who); VG_(initIterFM)( gitree ); while (VG_(nextIterFM)( gitree, &keyW, &valW )) { tl_assert(valW == 0); nd = (GlobalTreeNode*)keyW; VG_(printf)(" "); GlobalTreeNode__pp(nd); VG_(printf)("\n"); } VG_(doneIterFM)( gitree ); VG_(printf)(">>>\n"); } /* Interval comparison function for GlobalTreeNode */ static Word cmp_intervals_GlobalTreeNode ( GlobalTreeNode* gn1, GlobalTreeNode* gn2 ) { return cmp_nonempty_intervals( gn1->addr, gn1->szB, gn2->addr, gn2->szB ); } /* Find the node holding 'a', if any. */ static GlobalTreeNode* find_GlobalTreeNode ( WordFM* gitree, Addr a ) { UWord keyW, valW; GlobalTreeNode key; key.addr = a; key.szB = 1; if (VG_(lookupFM)( gitree, &keyW, &valW, (UWord)&key )) { GlobalTreeNode* res = (GlobalTreeNode*)keyW; tl_assert(valW == 0); tl_assert(res != &key); return res; } else { return NULL; } } /* Note that the supplied GlobalBlock must have been made persistent already. */ static void add_block_to_GlobalTree ( /*MOD*/WordFM* gitree, GlobalBlock* descr ) { Bool already_present; GlobalTreeNode *nyu, *nd; UWord keyW, valW; static Int moans = 3; tl_assert(descr->szB > 0); nyu = sg_malloc( "di.sg_main.abtG.1", sizeof(GlobalTreeNode) ); nyu->addr = descr->addr; nyu->szB = descr->szB; nyu->descr = descr; /* Basically it's an error to add a global block to the tree that is already in the tree. However, detect and ignore attempts to insert exact duplicates; they do appear for some reason (possible a bug in m_debuginfo?) */ already_present = VG_(lookupFM)( gitree, &keyW, &valW, (UWord)nyu ); if (already_present) { tl_assert(valW == 0); nd = (GlobalTreeNode*)keyW; tl_assert(nd); tl_assert(nd != nyu); tl_assert(nd->descr); tl_assert(nyu->descr); if (nd->addr == nyu->addr && nd->szB == nyu->szB /* && 0 == VG_(strcmp)(nd->descr->name, nyu->descr->name) */ /* Although it seems reasonable to demand that duplicate blocks have identical names, that is too strict. For example, reading debuginfo from glibc produces two otherwise identical blocks with names "tzname" and "__tzname". A constraint of the form "must be identical, or one must be a substring of the other" would fix that. However, such trickery is scuppered by the fact that we truncate all variable names to 15 characters to make storage management simpler, hence giving pairs like "__EI___pthread_" (truncated) vs "__pthread_keys". So it's simplest just to skip the name comparison completely. */ && 0 == VG_(strcmp)(nd->descr->soname, nyu->descr->soname)) { /* exact duplicate; ignore it */ sg_free(nyu); return; } /* else fall through; the assertion below will catch it */ } already_present = VG_(addToFM)( gitree, (UWord)nyu, 0 ); /* The interval can't already be there; else we have overlapping global blocks. */ /* Unfortunately (25 Jan 09) at least icc11 has been seen to generate overlapping block descriptions in the Dwarf3; clearly bogus. */ if (already_present && moans > 0 && !VG_(clo_xml)) { moans--; VG_(message)(Vg_UserMsg, "Warning: bogus DWARF3 info: " "overlapping global blocks\n"); if (VG_(clo_verbosity) >= 2) { GlobalTree__pp( gitree, "add_block_to_GlobalTree: non-exact duplicate" ); VG_(printf)("Overlapping block: "); GlobalTreeNode__pp(nyu); VG_(printf)("\n"); } if (moans == 0) VG_(message)(Vg_UserMsg, "Further instances of this " "message will not be shown\n" ); } /* tl_assert(!already_present); */ } static Bool del_GlobalTree_range ( /*MOD*/WordFM* gitree, Addr a, SizeT szB ) { /* One easy way to do this: look up [a,a+szB) in the tree. That will either succeed, producing a block which intersects that range, in which case we delete it and repeat; or it will fail, in which case there are no blocks intersecting the range, and we can bring the process to a halt. */ UWord keyW, valW, oldK, oldV; GlobalTreeNode key, *nd; Bool b, anyFound; tl_assert(szB > 0); anyFound = False; key.addr = a; key.szB = szB; while (VG_(lookupFM)( gitree, &keyW, &valW, (UWord)&key )) { anyFound = True; nd = (GlobalTreeNode*)keyW; tl_assert(valW == 0); tl_assert(nd != &key); tl_assert(cmp_nonempty_intervals(a, szB, nd->addr, nd->szB) == 0); b = VG_(delFromFM)( gitree, &oldK, &oldV, (UWord)&key ); tl_assert(b); tl_assert(oldV == 0); tl_assert(oldK == keyW); /* check we deleted the node we just found */ } return anyFound; } ////////////////////////////////////////////////////////////// // // // Invar // // // ////////////////////////////////////////////////////////////// /* An invariant, as resulting from watching the destination of a memory referencing instruction. Initially is Inv_Unset until the instruction makes a first access. */ typedef enum { Inv_Unset=1, /* not established yet */ Inv_Unknown, /* unknown location */ Inv_Stack0, /* array-typed stack block in innermost frame */ Inv_StackN, /* array-typed stack block in non-innermost frame */ Inv_Global, /* array-typed global block */ } InvarTag; typedef struct { InvarTag tag; union { struct { } Unset; struct { } Unknown; struct { Addr addr; SizeT szB; StackBlock* descr; } Stack0; /* innermost stack frame */ struct { /* Pointer to a node in the interval tree for this thread. */ StackTreeNode* nd; } StackN; /* non-innermost stack frame */ struct { /* Pointer to a GlobalBlock in the interval tree of global blocks. */ GlobalTreeNode* nd; } Global; } Inv; } Invar; /* Partial debugging printing for an Invar. */ static void pp_Invar ( Invar* i ) { switch (i->tag) { case Inv_Unset: VG_(printf)("Unset"); break; case Inv_Unknown: VG_(printf)("Unknown"); break; case Inv_Stack0: VG_(printf)("Stack0 [%#lx,+%lu)", i->Inv.Stack0.addr, i->Inv.Stack0.szB); break; case Inv_StackN: VG_(printf)("StackN [%#lx,+%lu)", i->Inv.StackN.nd->addr, i->Inv.StackN.nd->szB); break; case Inv_Global: VG_(printf)("Global [%#lx,+%lu)", i->Inv.Global.nd->addr, i->Inv.Global.nd->szB); break; default: tl_assert(0); } } /* Compare two Invars for equality. */ static Bool eq_Invar ( Invar* i1, Invar* i2 ) { if (i1->tag != i2->tag) return False; switch (i1->tag) { case Inv_Unset: return True; case Inv_Unknown: return True; case Inv_Stack0: return i1->Inv.Stack0.addr == i2->Inv.Stack0.addr && i1->Inv.Stack0.szB == i2->Inv.Stack0.szB; case Inv_StackN: return i1->Inv.StackN.nd == i2->Inv.StackN.nd; case Inv_Global: return i1->Inv.Global.nd == i2->Inv.Global.nd; default: tl_assert(0); } /*NOTREACHED*/ tl_assert(0); } /* Generate a piece of text showing 'ea' is relative to 'invar', if known. If unknown, generate an empty string. 'buf' must be at least 32 bytes in size. Also return the absolute value of the delta, if known, or zero if not known. */ static void gen_delta_str ( /*OUT*/HChar* buf, /*OUT*/UWord* absDelta, Invar* inv, Addr ea ) { Addr block = 0; SizeT szB = 0; buf[0] = 0; *absDelta = 0; switch (inv->tag) { case Inv_Unknown: case Inv_Unset: return; /* unknown */ case Inv_Stack0: block = inv->Inv.Stack0.addr; szB = inv->Inv.Stack0.szB; break; case Inv_StackN: block = inv->Inv.StackN.nd->addr; szB = inv->Inv.StackN.nd->szB; break; case Inv_Global: block = inv->Inv.Global.nd->addr; szB = inv->Inv.Global.nd->szB; break; default: tl_assert(0); } tl_assert(szB > 0); if (ea < block) { *absDelta = block - ea; VG_(sprintf)(buf, "%'lu before", *absDelta); } else if (ea >= block + szB) { *absDelta = ea - (block + szB); VG_(sprintf)(buf, "%'lu after", *absDelta); } else { // Leave *absDelta at zero. VG_(sprintf)(buf, "%'lu inside", ea - block); } } /* Print selected parts of an Invar, suitable for use in error messages. */ static void show_Invar( HChar* buf, Word nBuf, Invar* inv, Word depth ) { const HChar* str; tl_assert(nBuf >= 128); buf[0] = 0; switch (inv->tag) { case Inv_Unknown: VG_(sprintf)(buf, "%s", "unknown"); break; case Inv_Stack0: str = "array"; VG_(sprintf)(buf, "stack %s \"%s\" of size %'lu in this frame", str, inv->Inv.Stack0.descr->name, inv->Inv.Stack0.szB ); break; case Inv_StackN: str = "array"; VG_(sprintf)(buf, "stack %s \"%s\" of size %'lu in frame %lu back from here", str, inv->Inv.StackN.nd->descr->name, inv->Inv.StackN.nd->descr->szB, depth - inv->Inv.StackN.nd->depth ); break; case Inv_Global: str = "array"; VG_(sprintf)(buf, "global %s \"%s\" of size %'lu in object with soname \"%s\"", str, inv->Inv.Global.nd->descr->name, inv->Inv.Global.nd->descr->szB, inv->Inv.Global.nd->descr->soname ); break; case Inv_Unset: VG_(sprintf)(buf, "%s", "Unset!"); break; default: tl_assert(0); } } ////////////////////////////////////////////////////////////// // // // our globals // // // ////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////// /// #define N_QCACHE 16 /* Powers of two only, else the result will be chaos */ #define QCACHE_ADVANCE_EVERY 16 /* Per-thread query cache. Note that the invar can only be Inv_StackN (but not Inv_Stack0), Inv_Global or Inv_Unknown. */ typedef struct { Addr addr; SizeT szB; Invar inv; } QCElem; typedef struct { Word nInUse; QCElem elems[N_QCACHE]; } QCache; static void QCache__invalidate ( QCache* qc ) { tl_assert(qc->nInUse >= 0); qc->nInUse = 0; } static void QCache__pp ( QCache* qc, const HChar* who ) { Word i; VG_(printf)("<<< QCache with %ld elements (%s)\n", qc->nInUse, who); for (i = 0; i < qc->nInUse; i++) { VG_(printf)(" [%#lx,+%#lx) ", qc->elems[i].addr, qc->elems[i].szB); pp_Invar(&qc->elems[i].inv); VG_(printf)("\n"); } VG_(printf)(">>>\n"); } static ULong stats__qcache_queries = 0; static ULong stats__qcache_misses = 0; static ULong stats__qcache_probes = 0; /// ////////////////////////////////////////////////////////////// /* Each thread has: * a shadow stack of StackFrames, which is a double-linked list * an stack block interval tree */ static struct _StackFrame** shadowStacks; static WordFM** /* StackTreeNode */ siTrees; static QCache* qcaches; /* Additionally, there is one global variable interval tree for the entire process. */ static WordFM* /* GlobalTreeNode */ giTree; static void invalidate_all_QCaches ( void ) { Word i; for (i = 0; i < VG_N_THREADS; i++) { QCache__invalidate( &qcaches[i] ); } } static void ourGlobals_init ( void ) { Word i; shadowStacks = sg_malloc( "di.sg_main.oGi.2", VG_N_THREADS * sizeof shadowStacks[0] ); siTrees = sg_malloc( "di.sg_main.oGi.3", VG_N_THREADS * sizeof siTrees[0] ); qcaches = sg_malloc( "di.sg_main.oGi.4", VG_N_THREADS * sizeof qcaches[0] ); for (i = 0; i < VG_N_THREADS; i++) { shadowStacks[i] = NULL; siTrees[i] = NULL; qcaches[i] = (QCache){}; } invalidate_all_QCaches(); giTree = VG_(newFM)( sg_malloc, "di.sg_main.oGi.1", sg_free, (Word(*)(UWord,UWord))cmp_intervals_GlobalTreeNode ); } ////////////////////////////////////////////////////////////// // // // Handle global variable load/unload events // // // ////////////////////////////////////////////////////////////// static void acquire_globals ( ULong di_handle ) { Word n, i; XArray* /* of GlobalBlock */ gbs; if (0) VG_(printf)("ACQUIRE GLOBALS %llu\n", di_handle ); gbs = VG_(di_get_global_blocks_from_dihandle) (di_handle, True/*arrays only*/); if (0) VG_(printf)(" GOT %ld globals\n", VG_(sizeXA)( gbs )); n = VG_(sizeXA)( gbs ); for (i = 0; i < n; i++) { GlobalBlock* gbp; GlobalBlock* gb = VG_(indexXA)( gbs, i ); if (0) VG_(printf)(" new Global size %2lu at %#lx: %s %s\n", gb->szB, gb->addr, gb->soname, gb->name ); tl_assert(gb->szB > 0); /* Make a persistent copy of each GlobalBlock, and add it to the tree. */ gbp = get_persistent_GlobalBlock( gb ); add_block_to_GlobalTree( giTree, gbp ); } VG_(deleteXA)( gbs ); } /* We only intercept these two because we need to see any di_handles that might arise from the mappings/allocations. */ void sg_new_mem_mmap( Addr a, SizeT len, Bool rr, Bool ww, Bool xx, ULong di_handle ) { if (di_handle > 0) acquire_globals(di_handle); } void sg_new_mem_startup( Addr a, SizeT len, Bool rr, Bool ww, Bool xx, ULong di_handle ) { if (di_handle > 0) acquire_globals(di_handle); } void sg_die_mem_munmap ( Addr a, SizeT len ) { Bool debug = (Bool)0; Bool overlap = False; if (debug) VG_(printf)("MUNMAP %#lx %lu\n", a, len ); if (len == 0) return; overlap = del_GlobalTree_range(giTree, a, len); { /* redundant sanity check */ UWord keyW, valW; VG_(initIterFM)( giTree ); while (VG_(nextIterFM)( giTree, &keyW, &valW )) { GlobalTreeNode* nd = (GlobalTreeNode*)keyW; tl_assert(valW == 0); tl_assert(nd->szB > 0); tl_assert(nd->addr + nd->szB <= a || a + len <= nd->addr); } VG_(doneIterFM)( giTree ); } if (!overlap) return; /* Ok, the range contained some blocks. Therefore we'll need to visit all the Invars in all the thread shadow stacks, and convert all Inv_Global entries that intersect [a,a+len) to Inv_Unknown. */ tl_assert(len > 0); preen_global_Invars( a, len ); invalidate_all_QCaches(); } ////////////////////////////////////////////////////////////// // // // StackFrame // // // ////////////////////////////////////////////////////////////// static ULong stats__total_accesses = 0; static ULong stats__classify_Stack0 = 0; static ULong stats__classify_StackN = 0; static ULong stats__classify_Global = 0; static ULong stats__classify_Unknown = 0; static ULong stats__Invars_preened = 0; static ULong stats__Invars_changed = 0; static ULong stats__t_i_b_empty = 0; static ULong stats__htab_fast = 0; static ULong stats__htab_searches = 0; static ULong stats__htab_probes = 0; static ULong stats__htab_resizes = 0; /* A dynamic instance of an instruction */ typedef struct { /* IMMUTABLE */ Addr insn_addr; /* NB! zero means 'not in use' */ XArray* blocks; /* XArray* of StackBlock, or NULL if none */ /* MUTABLE */ Invar invar; } IInstance; #define N_HTAB_FIXED 64 typedef struct _StackFrame { /* The sp when the frame was created, so we know when to get rid of it. */ Addr creation_sp; /* The stack frames for a thread are arranged as a doubly linked list. Obviously the outermost frame in the stack has .outer as NULL and the innermost in theory has .inner as NULL. However, when a function returns, we don't delete the just-vacated StackFrame. Instead, it is retained in the list and will be re-used when the next call happens. This is so as to avoid constantly having to dynamically allocate and deallocate frames. */ struct _StackFrame* inner; struct _StackFrame* outer; Word depth; /* 0 for outermost; increases inwards */ /* Information for each memory referencing instruction, for this instantiation of the function. The iinstances array is operated as a simple linear-probe hash table, which is dynamically expanded as necessary. Once critical thing is that an IInstance with a .insn_addr of zero is interpreted to mean that hash table slot is unused. This means we can't store an IInstance for address zero. */ /* Note that htab initially points to htab_fixed. If htab_fixed turns out not to be big enough then htab is made to point to dynamically allocated memory. But it's often the case that htab_fixed is big enough, so this optimisation saves a huge number of sg_malloc/sg_free call pairs. */ IInstance* htab; UWord htab_size; /* size of hash table, MAY ONLY BE A POWER OF 2 */ UWord htab_used; /* number of hash table slots currently in use */ /* If this frame is currently making a call, then the following are relevant. */ Addr sp_at_call; Addr fp_at_call; XArray* /* of Addr */ blocks_added_by_call; /* See comment just above */ IInstance htab_fixed[N_HTAB_FIXED]; } StackFrame; /* Move this somewhere else? */ /* Visit all Invars in the entire system. If 'isHeap' is True, change all Inv_Heap Invars that intersect [a,a+len) to Inv_Unknown. If 'isHeap' is False, do the same but to the Inv_Global{S,V} Invars instead. */ __attribute__((noinline)) static void preen_global_Invar ( Invar* inv, Addr a, SizeT len ) { stats__Invars_preened++; tl_assert(len > 0); tl_assert(inv); switch (inv->tag) { case Inv_Global: tl_assert(inv->Inv.Global.nd); tl_assert(inv->Inv.Global.nd->szB > 0); if (0) VG_(printf)("preen_Invar Global %#lx %lu\n", inv->Inv.Global.nd->addr, inv->Inv.Global.nd->szB); if (0 == cmp_nonempty_intervals(a, len, inv->Inv.Global.nd->addr, inv->Inv.Global.nd->szB)) { inv->tag = Inv_Unknown; stats__Invars_changed++; } break; case Inv_Stack0: case Inv_StackN: case Inv_Unknown: break; case Inv_Unset: /* this should never happen */ /* fallthrough */ default: tl_assert(0); } } __attribute__((noinline)) static void preen_global_Invars ( Addr a, SizeT len ) { Int i; UWord u; StackFrame* frame; tl_assert(len > 0); for (i = 0; i < VG_N_THREADS; i++) { frame = shadowStacks[i]; if (!frame) continue; /* no frames for this thread */ /* start from the innermost frame */ while (frame->inner) frame = frame->inner; tl_assert(frame->outer); /* work through the frames from innermost to outermost. The order isn't important; we just need to ensure we visit each frame once (including those which are not actually active, more 'inner' than the 'innermost active frame', viz, just hanging around waiting to be used, when the current innermost active frame makes more calls. See comments on definition of struct _StackFrame. */ for (; frame; frame = frame->outer) { UWord xx = 0; /* sanity check only; count of used htab entries */ if (!frame->htab) continue; /* frame not in use. See shadowStack_unwind(). */ for (u = 0; u < frame->htab_size; u++) { IInstance* ii = &frame->htab[u]; if (ii->insn_addr == 0) continue; /* not in use */ if (0) { pp_Invar(&ii->invar); VG_(printf)(" x\n"); } preen_global_Invar( &ii->invar, a, len ); xx++; } tl_assert(xx == frame->htab_used); } } } /* XXX this should be >> 2 on ppc32/64 since the bottom two bits of the ip are guaranteed to be zero */ inline static UWord compute_II_hash ( Addr ip, UWord htab_size ) { return (ip >> 0) & (htab_size - 1); } __attribute__((noinline)) static void initialise_II_hash_table ( StackFrame* sf ) { UWord i; sf->htab_size = N_HTAB_FIXED; /* initial hash table size */ sf->htab = &sf->htab_fixed[0]; tl_assert(sf->htab); sf->htab_used = 0; for (i = 0; i < sf->htab_size; i++) sf->htab[i].insn_addr = 0; /* NOT IN USE */ } __attribute__((noinline)) static void resize_II_hash_table ( StackFrame* sf ) { UWord i, j, ix, old_size, new_size; IInstance *old_htab, *new_htab, *old; tl_assert(sf && sf->htab); old_size = sf->htab_size; new_size = 2 * old_size; old_htab = sf->htab; new_htab = sg_malloc( "di.sg_main.rIht.1", new_size * sizeof(IInstance) ); for (i = 0; i < new_size; i++) { new_htab[i].insn_addr = 0; /* NOT IN USE */ } for (i = 0; i < old_size; i++) { old = &old_htab[i]; if (old->insn_addr == 0 /* NOT IN USE */) continue; ix = compute_II_hash(old->insn_addr, new_size); /* find out where to put this, in the new table */ j = new_size; while (1) { if (new_htab[ix].insn_addr == 0) break; /* This can't ever happen, because it would mean the new table is full; that isn't allowed -- even the old table is only allowed to become half full. */ tl_assert(j > 0); j--; ix++; if (ix == new_size) ix = 0; } /* copy the old entry to this location */ tl_assert(ix < new_size); tl_assert(new_htab[ix].insn_addr == 0); new_htab[ix] = *old; tl_assert(new_htab[ix].insn_addr != 0); } /* all entries copied; free old table. */ if (old_htab != &sf->htab_fixed[0]) sg_free(old_htab); sf->htab = new_htab; sf->htab_size = new_size; /* check sf->htab_used is correct. Optional and a bit expensive but anyway: */ j = 0; for (i = 0; i < new_size; i++) { if (new_htab[i].insn_addr != 0) { j++; } } tl_assert(j == sf->htab_used); if (0) VG_(printf)("resized tab for SF %p to %lu\n", sf, new_size); } __attribute__((noinline)) static IInstance* find_or_create_IInstance_SLOW ( StackFrame* sf, Addr ip, XArray* /* StackBlock */ ip_frameblocks ) { UWord i, ix; stats__htab_searches++; tl_assert(sf); tl_assert(sf->htab); /* Make sure the table loading doesn't get too high. */ if (UNLIKELY(2 * sf->htab_used >= 1 * sf->htab_size)) { stats__htab_resizes++; resize_II_hash_table(sf); } tl_assert(2 * sf->htab_used <= sf->htab_size); ix = compute_II_hash(ip, sf->htab_size); i = sf->htab_size; while (1) { stats__htab_probes++; /* Note that because of the way the fast-case handler works, these two tests are actually redundant in the first iteration of this loop. (Except they aren't redundant if the code just above resized the table first. :-) */ if (sf->htab[ix].insn_addr == ip) return &sf->htab[ix]; if (sf->htab[ix].insn_addr == 0) break; /* If i ever gets to zero and we have found neither what we're looking for nor an empty slot, the table must be full. Which isn't possible -- we monitor the load factor to ensure it doesn't get above say 50%; if that ever does happen the table is resized. */ tl_assert(i > 0); i--; ix++; if (ix == sf->htab_size) ix = 0; } /* So now we've found a free slot at ix, and we can use that. */ tl_assert(sf->htab[ix].insn_addr == 0); /* Add a new record in this slot. */ tl_assert(ip != 0); /* CAN'T REPRESENT THIS */ sf->htab[ix].insn_addr = ip; sf->htab[ix].blocks = ip_frameblocks; sf->htab[ix].invar.tag = Inv_Unset; sf->htab_used++; return &sf->htab[ix]; } inline static IInstance* find_or_create_IInstance ( StackFrame* sf, Addr ip, XArray* /* StackBlock */ ip_frameblocks ) { UWord ix = compute_II_hash(ip, sf->htab_size); /* Is it in the first slot we come to? */ if (LIKELY(sf->htab[ix].insn_addr == ip)) { stats__htab_fast++; return &sf->htab[ix]; } /* If the first slot we come to is empty, bag it. */ if (LIKELY(sf->htab[ix].insn_addr == 0)) { stats__htab_fast++; tl_assert(ip != 0); sf->htab[ix].insn_addr = ip; sf->htab[ix].blocks = ip_frameblocks; sf->htab[ix].invar.tag = Inv_Unset; sf->htab_used++; return &sf->htab[ix]; } /* Otherwise we hand off to the slow case, which searches other slots, and optionally resizes the table if necessary. */ return find_or_create_IInstance_SLOW( sf, ip, ip_frameblocks ); } __attribute__((noinline)) static Addr calculate_StackBlock_EA ( StackBlock* descr, Addr sp, Addr fp ) { UWord w1 = (UWord)descr->base; UWord w2 = (UWord)(descr->spRel ? sp : fp); UWord ea = w1 + w2; return ea; } /* Given an array of StackBlocks, return an array of Addrs, holding their effective addresses. Caller deallocates result array. */ __attribute__((noinline)) static XArray* /* Addr */ calculate_StackBlock_EAs ( XArray* /* StackBlock */ blocks, Addr sp, Addr fp ) { XArray* res; Word i, n = VG_(sizeXA)( blocks ); tl_assert(n > 0); res = VG_(newXA)( sg_malloc, "di.sg_main.cSBE.1", sg_free, sizeof(Addr) ); for (i = 0; i < n; i++) { StackBlock* blk = VG_(indexXA)( blocks, i ); Addr ea = calculate_StackBlock_EA( blk, sp, fp ); VG_(addToXA)( res, &ea ); } return res; } /* Try to classify the block into which a memory access falls, and write the result in 'inv'. This writes all relevant fields of 'inv'. */ __attribute__((noinline)) static void classify_address ( /*OUT*/Invar* inv, ThreadId tid, Addr ea, Addr sp, Addr fp, UWord szB, XArray* /* of StackBlock */ thisInstrBlocks ) { tl_assert(szB > 0); /* First, look in the stack blocks accessible in this instruction's frame. */ { Word i, nBlocks = VG_(sizeXA)( thisInstrBlocks ); if (nBlocks == 0) stats__t_i_b_empty++; for (i = 0; i < nBlocks; i++) { StackBlock* descr = VG_(indexXA)( thisInstrBlocks, i ); Addr bea = calculate_StackBlock_EA( descr, sp, fp ); if (bea <= ea && ea + szB <= bea + descr->szB) { /* found it */ inv->tag = Inv_Stack0; inv->Inv.Stack0.addr = bea; inv->Inv.Stack0.szB = descr->szB; inv->Inv.Stack0.descr = descr; stats__classify_Stack0++; return; } } } /* Look in this thread's query cache */ { Word i; QCache* cache = &qcaches[tid]; static UWord ctr = 0; stats__qcache_queries++; for (i = 0; i < cache->nInUse; i++) { if (0) /* expensive in a loop like this */ tl_assert(cache->elems[i].addr + cache->elems[i].szB != 0); stats__qcache_probes++; if (is_subinterval_of(cache->elems[i].addr, cache->elems[i].szB, ea, szB)) { if (i > 0 && (ctr++ & (QCACHE_ADVANCE_EVERY-1)) == 0) { QCElem tmp; tmp = cache->elems[i-1]; cache->elems[i-1] = cache->elems[i]; cache->elems[i] = tmp; i--; } *inv = cache->elems[i].inv; return; } } stats__qcache_misses++; } /* Ok, so it's not a block in the top frame. Perhaps it's a block in some calling frame? Consult this thread's stack-block interval tree to find out. */ { StackTreeNode* nd = find_StackTreeNode( siTrees[tid], ea ); /* We know that [ea,ea+1) is in the block, but we need to restrict to the case where the whole access falls within it. */ if (nd && !is_subinterval_of(nd->addr, nd->szB, ea, szB)) { nd = NULL; } if (nd) { /* found it */ inv->tag = Inv_StackN; inv->Inv.StackN.nd = nd; stats__classify_StackN++; goto out; } } /* Not in a stack block. Try the global pool. */ { GlobalTreeNode* nd = find_GlobalTreeNode(giTree, ea); /* We know that [ea,ea+1) is in the block, but we need to restrict to the case where the whole access falls within it. */ if (nd && !is_subinterval_of(nd->addr, nd->szB, ea, szB)) { nd = NULL; } if (nd) { /* found it */ inv->tag = Inv_Global; inv->Inv.Global.nd = nd; stats__classify_Global++; goto out; } } /* No idea - give up. */ inv->tag = Inv_Unknown; stats__classify_Unknown++; /* Update the cache */ out: { Addr toadd_addr = 0; SizeT toadd_szB = 0; QCache* cache = &qcaches[tid]; static UWord ctr = 0; Bool show = False; if (0 && 0 == (ctr++ & 0x1FFFFF)) show = True; if (show) QCache__pp(cache, "before upd"); switch (inv->tag) { case Inv_Global: toadd_addr = inv->Inv.Global.nd->addr; toadd_szB = inv->Inv.Global.nd->szB; break; case Inv_StackN: toadd_addr = inv->Inv.StackN.nd->addr; toadd_szB = inv->Inv.StackN.nd->szB; break; case Inv_Unknown: { /* This is more complex. We need to figure out the intersection of the "holes" in the global and stack interval trees into which [ea,ea+szB) falls. This is further complicated by the fact that [ea,ea+szB) might not fall cleanly into a hole; it may instead fall across the boundary of a stack or global block. In that case we just ignore it and don't update the cache, since we have no way to represent this situation precisely. */ StackTreeNode sNegInf, sPosInf, sKey, *sLB, *sUB; GlobalTreeNode gNegInf, gPosInf, gKey, *gLB, *gUB; Addr gMin, gMax, sMin, sMax, uMin, uMax; Bool sOK, gOK; sNegInf.addr = 0; sNegInf.szB = 1; sPosInf.addr = ~(UWord)0; sPosInf.szB = 1; gNegInf.addr = 0; gNegInf.szB = 1; gPosInf.addr = ~(UWord)0; gPosInf.szB = 1; sKey.addr = ea; sKey.szB = szB; gKey.addr = ea; gKey.szB = szB; if (0) VG_(printf)("Tree sizes %lu %lu\n", VG_(sizeFM)(siTrees[tid]), VG_(sizeFM)(giTree)); sOK = VG_(findBoundsFM)( siTrees[tid], (UWord*)&sLB, NULL/*unused*/, (UWord*)&sUB, NULL/*unused*/, (UWord)&sNegInf, 0/*unused*/, (UWord)&sPosInf, 0/*unused*/, (UWord)&sKey ); gOK = VG_(findBoundsFM)( giTree, (UWord*)&gLB, NULL/*unused*/, (UWord*)&gUB, NULL/*unused*/, (UWord)&gNegInf, 0/*unused*/, (UWord)&gPosInf, 0/*unused*/, (UWord)&gKey ); if (!(sOK && gOK)) { /* If this happens, then [ea,ea+szB) partially overlaps a heap or stack block. We can't represent that, so just forget it (should be very rare). However, do maximum sanity checks first. In such a partial overlap case, it can't be the case that both [ea] and [ea+szB-1] overlap the same block, since if that were indeed the case then it wouldn't be a partial overlap; rather it would simply fall inside that block entirely and we shouldn't be inside this conditional at all. */ if (!sOK) { StackTreeNode *ndFirst, *ndLast; ndFirst = find_StackTreeNode( siTrees[tid], ea ); ndLast = find_StackTreeNode( siTrees[tid], ea+szB-1 ); /* if both ends of the range fall inside a block, they can't be in the same block. */ if (ndFirst && ndLast) tl_assert(ndFirst != ndLast); /* for each end of the range, if it is in a block, the range as a whole can't be entirely within the block. */ if (ndFirst) tl_assert(!is_subinterval_of(ndFirst->addr, ndFirst->szB, ea, szB)); if (ndLast) tl_assert(!is_subinterval_of(ndLast->addr, ndLast->szB, ea, szB)); } if (!gOK) { GlobalTreeNode *ndFirst, *ndLast; ndFirst = find_GlobalTreeNode( giTree, ea ); ndLast = find_GlobalTreeNode( giTree, ea+szB-1 ); /* if both ends of the range fall inside a block, they can't be in the same block. */ if (ndFirst && ndLast) tl_assert(ndFirst != ndLast); /* for each end of the range, if it is in a block, the range as a whole can't be entirely within the block. */ if (ndFirst) tl_assert(!is_subinterval_of(ndFirst->addr, ndFirst->szB, ea, szB)); if (ndLast) tl_assert(!is_subinterval_of(ndLast->addr, ndLast->szB, ea, szB)); } if (0) VG_(printf)("overlapping blocks in cache\n"); return; } sMin = sLB == &sNegInf ? 0 : (sLB->addr + sLB->szB); sMax = sUB == &sPosInf ? ~(UWord)0 : (sUB->addr - 1); gMin = gLB == &gNegInf ? 0 : (gLB->addr + gLB->szB); gMax = gUB == &gPosInf ? ~(UWord)0 : (gUB->addr - 1); if (0) VG_(printf)("sMin %lx sMax %lx gMin %lx gMax %lx\n", sMin, sMax, gMin, gMax); /* [sMin,sMax] and [gMin,gMax] must both contain [ea,ea+szB) (right?) That implies they must overlap at at least over [ea,ea+szB). */ tl_assert(sMin <= ea && ea+szB-1 <= sMax); tl_assert(gMin <= ea && ea+szB-1 <= gMax); /* So now compute their intersection. */ uMin = Addr__max( sMin, gMin ); uMax = Addr__min( sMax, gMax ); if (0) VG_(printf)("uMin %lx uMax %lx\n", uMin, uMax); tl_assert(uMin <= uMax); tl_assert(uMin <= ea && ea+szB-1 <= uMax); /* Finally, we can park [uMin,uMax] in the cache. However, if uMax is ~0, we can't represent the difference; hence fudge uMax. */ if (uMin < uMax && uMax == ~(UWord)0) uMax--; toadd_addr = uMin; toadd_szB = uMax - uMin + 1; break; } default: /* We should only be caching info for the above 3 cases */ tl_assert(0); } /* switch (inv->tag) */ { /* and actually add this to the cache, finally */ Word i; Word ip = cache->nInUse / 2; /* doesn't seem critical */ if (cache->nInUse < N_QCACHE) cache->nInUse++; for (i = cache->nInUse-1; i > ip; i--) { cache->elems[i] = cache->elems[i-1]; } tl_assert(toadd_szB > 0); cache->elems[ip].addr = toadd_addr; cache->elems[ip].szB = toadd_szB; cache->elems[ip].inv = *inv; } if (show) QCache__pp(cache, "after upd"); } } /* CALLED FROM GENERATED CODE */ static VG_REGPARM(3) void helperc__mem_access ( /* Known only at run time: */ Addr ea, Addr sp, Addr fp, /* Known at translation time: */ Word sszB, Addr ip, XArray* ip_frameBlocks ) { UWord szB; IInstance* iinstance; Invar* inv; Invar new_inv; ThreadId tid = VG_(get_running_tid)(); StackFrame* frame; HChar bufE[160], bufA[160], bufD[32]; stats__total_accesses++; tl_assert(is_sane_TId(tid)); frame = shadowStacks[tid]; tl_assert(frame); /* Find the instance info for this instruction. */ tl_assert(ip_frameBlocks); iinstance = find_or_create_IInstance( frame, ip, ip_frameBlocks ); tl_assert(iinstance); tl_assert(iinstance->blocks == ip_frameBlocks); szB = (sszB < 0) ? (-sszB) : sszB; tl_assert(szB > 0); inv = &iinstance->invar; /* Deal with first uses of instruction instances. */ if (inv->tag == Inv_Unset) { /* This is the first use of this instance of the instruction, so we can't make any check; we merely record what we saw, so we can compare it against what happens for 2nd and subsequent accesses. */ classify_address( inv, tid, ea, sp, fp, szB, iinstance->blocks ); tl_assert(inv->tag != Inv_Unset); return; } /* So generate an Invar and see if it's different from what we had before. */ classify_address( &new_inv, tid, ea, sp, fp, szB, iinstance->blocks ); tl_assert(new_inv.tag != Inv_Unset); /* Did we see something different from before? If no, then there's no error. */ tl_assert(inv->tag != Inv_Unset); if (LIKELY(eq_Invar(&new_inv, inv))) return; VG_(memset)(bufE, 0, sizeof(bufE)); show_Invar( bufE, sizeof(bufE)-1, inv, frame->depth ); VG_(memset)(bufA, 0, sizeof(bufA)); show_Invar( bufA, sizeof(bufA)-1, &new_inv, frame->depth ); VG_(memset)(bufD, 0, sizeof(bufD)); UWord absDelta; gen_delta_str( bufD, &absDelta, inv, ea ); if (absDelta < 1024) sg_record_error_SorG( tid, ea, sszB, bufE, bufA, bufD ); /* And now install the new observation as "standard", so as to make future error messages make more sense. */ *inv = new_inv; } //////////////////////////////////////// /* Primary push-a-new-frame routine. Called indirectly from generated code. */ static UWord stats__max_sitree_size = 0; static UWord stats__max_gitree_size = 0; static void shadowStack_new_frame ( ThreadId tid, Addr sp_at_call_insn, Addr sp_post_call_insn, Addr fp_at_call_insn, Addr ip_post_call_insn, XArray* descrs_at_call_insn ) { StackFrame *callee, *caller; tl_assert(is_sane_TId(tid)); caller = shadowStacks[tid]; tl_assert(caller); if (caller->outer) { /* "this is not the outermost frame" */ tl_assert(caller->outer->inner == caller); tl_assert(caller->outer->depth >= 0); tl_assert(1 + caller->outer->depth == caller->depth); } else { tl_assert(caller->depth == 0); } caller->sp_at_call = sp_at_call_insn; caller->fp_at_call = fp_at_call_insn; if (descrs_at_call_insn) { tl_assert( VG_(sizeXA)(descrs_at_call_insn) > 0 ); caller->blocks_added_by_call = calculate_StackBlock_EAs( descrs_at_call_insn, sp_at_call_insn, fp_at_call_insn ); if (caller->blocks_added_by_call) add_blocks_to_StackTree( siTrees[tid], descrs_at_call_insn, caller->blocks_added_by_call, caller->depth /* stack depth at which these blocks are considered to exist*/ ); if (1) { UWord s = VG_(sizeFM)( siTrees[tid] ); UWord g = VG_(sizeFM)( giTree ); Bool sb = s > stats__max_sitree_size; Bool gb = g > stats__max_gitree_size; if (sb) stats__max_sitree_size = s; if (gb) stats__max_gitree_size = g; if (0 && (sb || gb)) VG_(message)(Vg_DebugMsg, "exp-sgcheck: new max tree sizes: " "StackTree %lu, GlobalTree %lu\n", stats__max_sitree_size, stats__max_gitree_size ); } } else { caller->blocks_added_by_call = NULL; } /* caller->blocks_added_by_call is used again (and then freed) when this frame is removed from the stack. */ if (caller->inner) { callee = caller->inner; } else { callee = sg_malloc("di.sg_main.sSnf.1", sizeof(StackFrame)); VG_(memset)(callee, 0, sizeof(StackFrame)); callee->outer = caller; caller->inner = callee; callee->depth = 1 + caller->depth; tl_assert(callee->inner == NULL); } /* This sets up .htab, .htab_size and .htab_used */ initialise_II_hash_table( callee ); callee->creation_sp = sp_post_call_insn; callee->sp_at_call = 0; // not actually required .. callee->fp_at_call = 0; // .. these 3 initialisations are .. callee->blocks_added_by_call = NULL; // .. just for cleanness /* record the new running stack frame */ shadowStacks[tid] = callee; /* and this thread's query cache is now invalid */ QCache__invalidate( &qcaches[tid] ); if (0) { Word d = callee->depth; const HChar *fnname; Bool ok; Addr ip = ip_post_call_insn; ok = VG_(get_fnname_w_offset)( ip, &fnname ); while (d > 0) { VG_(printf)(" "); d--; } VG_(printf)("> %s %#lx\n", ok ? fnname : "???", ip); } } /* CALLED FROM GENERATED CODE */ static VG_REGPARM(3) void helperc__new_frame ( Addr sp_post_call_insn, Addr fp_at_call_insn, Addr ip_post_call_insn, XArray* blocks_at_call_insn, Word sp_adjust ) { ThreadId tid = VG_(get_running_tid)(); Addr sp_at_call_insn = sp_post_call_insn + sp_adjust; shadowStack_new_frame( tid, sp_at_call_insn, sp_post_call_insn, fp_at_call_insn, ip_post_call_insn, blocks_at_call_insn ); } //////////////////////////////////////// /* Primary remove-frame(s) routine. Called indirectly from generated code. */ __attribute__((noinline)) static void shadowStack_unwind ( ThreadId tid, Addr sp_now ) { StackFrame *innermost, *innermostOrig; tl_assert(is_sane_TId(tid)); innermost = shadowStacks[tid]; tl_assert(innermost); innermostOrig = innermost; //VG_(printf)("UNWIND sp_new = %p\n", sp_now); while (1) { if (!innermost->outer) break; if (innermost->inner) tl_assert(innermost->inner->outer == innermost); tl_assert(innermost->outer->inner == innermost); tl_assert(innermost->blocks_added_by_call == NULL); if (sp_now <= innermost->creation_sp) break; //VG_(printf)("UNWIND dump %p\n", innermost->creation_sp); tl_assert(innermost->htab); if (innermost->htab != &innermost->htab_fixed[0]) sg_free(innermost->htab); /* be on the safe side */ innermost->creation_sp = 0; innermost->htab = NULL; innermost->htab_size = 0; innermost->htab_used = 0; innermost->sp_at_call = 0; innermost->fp_at_call = 0; innermost->blocks_added_by_call = NULL; innermost = innermost->outer; /* So now we're "back" in the calling frame. Remove from this thread's stack-interval-tree, the blocks added at the time of the call. */ if (innermost->outer) { /* not at the outermost frame */ if (innermost->blocks_added_by_call == NULL) { } else { del_blocks_from_StackTree( siTrees[tid], innermost->blocks_added_by_call ); VG_(deleteXA)( innermost->blocks_added_by_call ); innermost->blocks_added_by_call = NULL; } } /* That completes the required tidying of the interval tree associated with the frame we just removed. */ if (0) { Word d = innermost->depth; while (d > 0) { VG_(printf)(" "); d--; } VG_(printf)("X\n"); } } tl_assert(innermost); if (innermost != innermostOrig) { shadowStacks[tid] = innermost; /* this thread's query cache is now invalid */ QCache__invalidate( &qcaches[tid] ); } } ////////////////////////////////////////////////////////////// // // // Instrumentation // // // ////////////////////////////////////////////////////////////// /* What does instrumentation need to do? - at each Call transfer, generate a call to shadowStack_new_frame do this by manually inspecting the IR - at each sp change, if the sp change is negative, call shadowStack_unwind do this by asking for SP-change analysis - for each memory referencing instruction, call helperc__mem_access */ /* A complication: sg_ instrumentation and h_ instrumentation need to be interleaved. Since the latter is a lot more complex than the former, we split the sg_ instrumentation here into four functions and let the h_ instrumenter call the four functions as it goes. Hence the h_ instrumenter drives the sg_ instrumenter. To make this viable, the sg_ instrumenter carries what running state it needs in 'struct _SGEnv'. This is exported only abstractly from this file. */ struct _SGEnv { /* the current insn's IP */ Addr curr_IP; /* whether the above is actually known */ Bool curr_IP_known; /* if we find a mem ref, is it the first for this insn? Used for detecting insns which make more than one memory ref, a situation we basically can't really handle properly; and so we ignore all but the first ref. */ Bool firstRef; /* READONLY */ IRTemp (*newIRTemp_cb)(IRType,void*); void* newIRTemp_opaque; }; /* --- Helper fns for instrumentation --- */ static IRTemp gen_Get_SP ( struct _SGEnv* sge, IRSB* bbOut, const VexGuestLayout* layout, Int hWordTy_szB ) { IRExpr* sp_expr; IRTemp sp_temp; IRType sp_type; /* This in effect forces the host and guest word sizes to be the same. */ tl_assert(hWordTy_szB == layout->sizeof_SP); sp_type = layout->sizeof_SP == 8 ? Ity_I64 : Ity_I32; sp_expr = IRExpr_Get( layout->offset_SP, sp_type ); sp_temp = sge->newIRTemp_cb( sp_type, sge->newIRTemp_opaque ); addStmtToIRSB( bbOut, IRStmt_WrTmp( sp_temp, sp_expr ) ); return sp_temp; } static IRTemp gen_Get_FP ( struct _SGEnv* sge, IRSB* bbOut, const VexGuestLayout* layout, Int hWordTy_szB ) { IRExpr* fp_expr; IRTemp fp_temp; IRType fp_type; /* This in effect forces the host and guest word sizes to be the same. */ tl_assert(hWordTy_szB == layout->sizeof_SP); fp_type = layout->sizeof_FP == 8 ? Ity_I64 : Ity_I32; fp_expr = IRExpr_Get( layout->offset_FP, fp_type ); fp_temp = sge->newIRTemp_cb( fp_type, sge->newIRTemp_opaque ); addStmtToIRSB( bbOut, IRStmt_WrTmp( fp_temp, fp_expr ) ); return fp_temp; } static void instrument_mem_access ( struct _SGEnv* sge, IRSB* bbOut, IRExpr* addr, Int szB, Bool isStore, Int hWordTy_szB, Addr curr_IP, const VexGuestLayout* layout ) { IRType tyAddr = Ity_INVALID; XArray* frameBlocks = NULL; tl_assert(isIRAtom(addr)); tl_assert(hWordTy_szB == 4 || hWordTy_szB == 8); tyAddr = typeOfIRExpr( bbOut->tyenv, addr ); tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64); #if defined(VGA_x86) { UChar* p = (UChar*)curr_IP; // pop %ebp; RET if (p[0] == 0xc3 && p[-1] == 0x5d) return; // pop %ebp; RET $imm16 if (p[0] == 0xc2 && p[-1] == 0x5d) return; // PUSH %EBP; mov %esp,%ebp if (p[0] == 0x55 && p[1] == 0x89 && p[2] == 0xe5) return; } #endif /* First off, find or create the StackBlocks for this instruction. */ frameBlocks = get_StackBlocks_for_IP( curr_IP ); tl_assert(frameBlocks); //if (VG_(sizeXA)( frameBlocks ) == 0) // frameBlocks = NULL; /* Generate a call to "helperc__mem_access", passing: addr current_SP current_FP szB curr_IP frameBlocks */ { IRTemp t_SP = gen_Get_SP( sge, bbOut, layout, hWordTy_szB ); IRTemp t_FP = gen_Get_FP( sge, bbOut, layout, hWordTy_szB ); IRExpr** args = mkIRExprVec_6( addr, IRExpr_RdTmp(t_SP), IRExpr_RdTmp(t_FP), mkIRExpr_HWord( isStore ? (-szB) : szB ), mkIRExpr_HWord( curr_IP ), mkIRExpr_HWord( (HWord)frameBlocks ) ); IRDirty* di = unsafeIRDirty_0_N( 3/*regparms*/, "helperc__mem_access", VG_(fnptr_to_fnentry)( &helperc__mem_access ), args ); addStmtToIRSB( bbOut, IRStmt_Dirty(di) ); } } /* --- Instrumentation main (4 fns) --- */ struct _SGEnv * sg_instrument_init ( IRTemp (*newIRTemp_cb)(IRType,void*), void* newIRTemp_opaque ) { struct _SGEnv * env = sg_malloc("di.sg_main.sii.1", sizeof(struct _SGEnv)); tl_assert(env); env->curr_IP = 0; env->curr_IP_known = False; env->firstRef = True; env->newIRTemp_cb = newIRTemp_cb; env->newIRTemp_opaque = newIRTemp_opaque; return env; } void sg_instrument_fini ( struct _SGEnv * env ) { sg_free(env); } /* Add instrumentation for 'st' to 'sbOut', and possibly modify 'env' as required. This must be called before 'st' itself is added to 'sbOut'. */ void sg_instrument_IRStmt ( /*MOD*/struct _SGEnv * env, /*MOD*/IRSB* sbOut, IRStmt* st, const VexGuestLayout* layout, IRType gWordTy, IRType hWordTy ) { if (!sg_clo_enable_sg_checks) return; tl_assert(st); tl_assert(isFlatIRStmt(st)); switch (st->tag) { case Ist_NoOp: case Ist_AbiHint: case Ist_Put: case Ist_PutI: case Ist_MBE: /* None of these can contain any memory references. */ break; case Ist_Exit: tl_assert(st->Ist.Exit.jk != Ijk_Call); /* else we must deal with a conditional call */ break; case Ist_IMark: env->curr_IP_known = True; env->curr_IP = st->Ist.IMark.addr; env->firstRef = True; break; case Ist_Store: tl_assert(env->curr_IP_known); if (env->firstRef) { instrument_mem_access( env, sbOut, st->Ist.Store.addr, sizeofIRType(typeOfIRExpr(sbOut->tyenv, st->Ist.Store.data)), True/*isStore*/, sizeofIRType(hWordTy), env->curr_IP, layout ); env->firstRef = False; } break; case Ist_WrTmp: { IRExpr* data = st->Ist.WrTmp.data; if (data->tag == Iex_Load) { tl_assert(env->curr_IP_known); if (env->firstRef) { instrument_mem_access( env, sbOut, data->Iex.Load.addr, sizeofIRType(data->Iex.Load.ty), False/*!isStore*/, sizeofIRType(hWordTy), env->curr_IP, layout ); env->firstRef = False; } } break; } case Ist_Dirty: { Int dataSize; IRDirty* d = st->Ist.Dirty.details; if (d->mFx != Ifx_None) { /* This dirty helper accesses memory. Collect the details. */ tl_assert(env->curr_IP_known); if (env->firstRef) { tl_assert(d->mAddr != NULL); tl_assert(d->mSize != 0); dataSize = d->mSize; if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) { instrument_mem_access( env, sbOut, d->mAddr, dataSize, False/*!isStore*/, sizeofIRType(hWordTy), env->curr_IP, layout ); } if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) { instrument_mem_access( env, sbOut, d->mAddr, dataSize, True/*isStore*/, sizeofIRType(hWordTy), env->curr_IP, layout ); } env->firstRef = False; } } else { tl_assert(d->mAddr == NULL); tl_assert(d->mSize == 0); } break; } case Ist_CAS: { /* We treat it as a read and a write of the location. I think that is the same behaviour as it was before IRCAS was introduced, since prior to that point, the Vex front ends would translate a lock-prefixed instruction into a (normal) read followed by a (normal) write. */ if (env->firstRef) { Int dataSize; IRCAS* cas = st->Ist.CAS.details; tl_assert(cas->addr != NULL); tl_assert(cas->dataLo != NULL); dataSize = sizeofIRType(typeOfIRExpr(sbOut->tyenv, cas->dataLo)); if (cas->dataHi != NULL) dataSize *= 2; /* since it's a doubleword-CAS */ instrument_mem_access( env, sbOut, cas->addr, dataSize, False/*!isStore*/, sizeofIRType(hWordTy), env->curr_IP, layout ); instrument_mem_access( env, sbOut, cas->addr, dataSize, True/*isStore*/, sizeofIRType(hWordTy), env->curr_IP, layout ); env->firstRef = False; } break; } default: tl_assert(0); } /* switch (st->tag) */ } /* Add instrumentation for the final jump of an IRSB 'sbOut', and possibly modify 'env' as required. This must be the last instrumentation statement in the block. */ void sg_instrument_final_jump ( /*MOD*/struct _SGEnv * env, /*MOD*/IRSB* sbOut, IRExpr* next, IRJumpKind jumpkind, const VexGuestLayout* layout, IRType gWordTy, IRType hWordTy ) { if (!sg_clo_enable_sg_checks) return; if (jumpkind == Ijk_Call) { // Assumes x86 or amd64 IRTemp sp_post_call_insn, fp_post_call_insn; XArray* frameBlocks; IRExpr** args; IRDirty* di; sp_post_call_insn = gen_Get_SP( env, sbOut, layout, sizeofIRType(hWordTy) ); fp_post_call_insn = gen_Get_FP( env, sbOut, layout, sizeofIRType(hWordTy) ); tl_assert(env->curr_IP_known); frameBlocks = get_StackBlocks_for_IP( env->curr_IP ); tl_assert(frameBlocks); if (VG_(sizeXA)(frameBlocks) == 0) frameBlocks = NULL; args = mkIRExprVec_5( IRExpr_RdTmp(sp_post_call_insn), IRExpr_RdTmp(fp_post_call_insn), /* assume the call doesn't change FP */ next, mkIRExpr_HWord( (HWord)frameBlocks ), mkIRExpr_HWord( sizeofIRType(gWordTy) ) ); di = unsafeIRDirty_0_N( 3/*regparms*/, "helperc__new_frame", VG_(fnptr_to_fnentry)( &helperc__new_frame ), args ); addStmtToIRSB( sbOut, IRStmt_Dirty(di) ); } } ////////////////////////////////////////////////////////////// // // // end Instrumentation // // // ////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////// // // // misc // // // ////////////////////////////////////////////////////////////// /* Make a new empty stack frame that is suitable for being the outermost frame in a stack. It has a creation_sp of effectively infinity, so it can never be removed. */ static StackFrame* new_root_StackFrame ( void ) { StackFrame* sframe = sg_malloc("di.sg_main.nrS.1", sizeof(StackFrame)); VG_(memset)( sframe, 0, sizeof(*sframe) ); sframe->creation_sp = ~0UL; /* This sets up .htab, .htab_size and .htab_used */ initialise_II_hash_table( sframe ); /* ->depth, ->outer, ->inner are 0, NULL, NULL */ return sframe; } /* Primary routine for setting up the shadow stack for a new thread. Note that this is used to create not only child thread stacks, but the root thread's stack too. We create a new stack with .creation_sp set to infinity, so that the outermost frame can never be removed (by shadowStack_unwind). The core calls this function as soon as a thread is created. We cannot yet get its SP value, since that may not yet be set. */ static void shadowStack_thread_create ( ThreadId parent, ThreadId child ) { tl_assert(is_sane_TId(child)); if (parent == VG_INVALID_THREADID) { /* creating the main thread's stack */ } else { tl_assert(is_sane_TId(parent)); tl_assert(parent != child); tl_assert(shadowStacks[parent] != NULL); tl_assert(siTrees[parent] != NULL); } /* Create the child's stack. Bear in mind we may be re-using it. */ if (shadowStacks[child] == NULL) { /* First use of this stack. Just allocate an initial frame. */ tl_assert(siTrees[child] == NULL); } else { StackFrame *frame, *frame2; /* re-using a stack. */ /* get rid of the interval tree */ tl_assert(siTrees[child] != NULL); delete_StackTree( siTrees[child] ); siTrees[child] = NULL; /* Throw away all existing frames. */ frame = shadowStacks[child]; while (frame->outer) frame = frame->outer; tl_assert(frame->depth == 0); while (frame) { frame2 = frame->inner; if (frame2) tl_assert(1 + frame->depth == frame2->depth); sg_free(frame); frame = frame2; } shadowStacks[child] = NULL; } tl_assert(shadowStacks[child] == NULL); tl_assert(siTrees[child] == NULL); /* Set up the initial stack frame. */ shadowStacks[child] = new_root_StackFrame(); /* and set up the child's stack block interval tree. */ siTrees[child] = new_StackTree(); } /* Once a thread is ready to go, the core calls here. We take the opportunity to push a second frame on its stack, with the presumably valid SP value that is going to be used for the thread's startup. Hence we should always wind up with a valid outermost frame for the thread. */ static void shadowStack_set_initial_SP ( ThreadId tid ) { StackFrame* sf; tl_assert(is_sane_TId(tid)); sf = shadowStacks[tid]; tl_assert(sf != NULL); tl_assert(sf->outer == NULL); tl_assert(sf->inner == NULL); tl_assert(sf->creation_sp == ~0UL); shadowStack_new_frame( tid, 0, VG_(get_SP)(tid), 0, VG_(get_IP)(tid), NULL ); } ////////////////////////////////////////////////////////////// // // // main-ish // // // ////////////////////////////////////////////////////////////// /* CALLED indirectly FROM GENERATED CODE. Calls here are created by sp-change analysis, as requested in pc_pre_clo_int(). */ void sg_die_mem_stack ( Addr old_SP, SizeT len ) { ThreadId tid = VG_(get_running_tid)(); shadowStack_unwind( tid, old_SP+len ); } void sg_pre_clo_init ( void ) { ourGlobals_init(); init_StackBlocks_set(); init_GlobalBlock_set(); } void sg_post_clo_init ( void ) { } void sg_pre_thread_ll_create ( ThreadId parent, ThreadId child ) { shadowStack_thread_create(parent, child); } void sg_pre_thread_first_insn ( ThreadId tid ) { shadowStack_set_initial_SP(tid); } void sg_fini(Int exitcode) { if (VG_(clo_stats)) { VG_(message)(Vg_DebugMsg, " sg_: %'llu total accesses, of which:\n", stats__total_accesses); VG_(message)(Vg_DebugMsg, " sg_: stack0: %'12llu classify\n", stats__classify_Stack0); VG_(message)(Vg_DebugMsg, " sg_: stackN: %'12llu classify\n", stats__classify_StackN); VG_(message)(Vg_DebugMsg, " sg_: global: %'12llu classify\n", stats__classify_Global); VG_(message)(Vg_DebugMsg, " sg_: unknown: %'12llu classify\n", stats__classify_Unknown); VG_(message)(Vg_DebugMsg, " sg_: %'llu Invars preened, of which %'llu changed\n", stats__Invars_preened, stats__Invars_changed); VG_(message)(Vg_DebugMsg, " sg_: t_i_b_MT: %'12llu\n", stats__t_i_b_empty); VG_(message)(Vg_DebugMsg, " sg_: qcache: %'llu searches, %'llu probes, %'llu misses\n", stats__qcache_queries, stats__qcache_probes, stats__qcache_misses); VG_(message)(Vg_DebugMsg, " sg_: htab-fast: %'llu hits\n", stats__htab_fast); VG_(message)(Vg_DebugMsg, " sg_: htab-slow: %'llu searches, %'llu probes, %'llu resizes\n", stats__htab_searches, stats__htab_probes, stats__htab_resizes); } } /*--------------------------------------------------------------------*/ /*--- end sg_main.c ---*/ /*--------------------------------------------------------------------*/