1 /* -*- mode: C; c-basic-offset: 3; -*- */
2 
3 /*--------------------------------------------------------------------*/
4 /*--- Segment name management                 aspacemgr-segnames.c ---*/
5 /*--------------------------------------------------------------------*/
6 
7 /*
8    This file is part of Valgrind, a dynamic binary instrumentation
9    framework.
10 
11    Copyright (C) 2015-2015  Florian Krohm
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26    02111-1307, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 */
30 
31 /* Segment names are stored in a string table.
32 
33    The string table is organised into slots of varying length. Slots are
34    adjacent and there are no holes between slots.
35    A slot consists of two parts:
36 
37    (1) a fixed size overhead of length 4 bytes
38    (2) a variable size payload of up to 65535 bytes
39 
40    The segment name is stored in the payload area. Therefore:
41    a segment name cannot be longer than 65535 bytes including the '\0'
42    terminator. This looks like a reasonable limitation.
43 
44    Overall slot layout:
45 
46        |          4 bytes            |    max 65535 bytes      |
47        +-----------------------------+-------------------------+
48        |          overhead           |         payload         |
49        +-----------------------------+-------------------------+
50        ^                             ^
51        |                             |
52       -4                             +----- seg->fnIdx
53 
54    Each slot is uniquely identified by an index which points to the first
55    byte of the payload area. It is this value that is stored in seg->fnIdx.
56    Note, that this value is at least 4.
57 
58    A slot either holds a string or it is free. The status of a slot is
59    identified by the leftmost bit in the overhead field, the so called F-bit.
60    F-bit == 1 means that slot is free; otherwise it is occupied and holds a
61    string.
62 
63    Slot containing a string (segment name):
64 
65   bits | 1 |      15      |    16    |
66        +---+--------------+----------+-------------------------+
67        | 0 |   refcount   | slotsize | the string including \0 |
68        +---+--------------+----------+-------------------------+
69        ^                  ^          ^
70        |                  |          |
71       -4                 -2          +----- seg->fnIdx
72 
73    Segment names are reference counted. 15 bits are available which allows
74    for up to 32767 references. If the string is referenced more than 32767
75    times, the reference count will be frozen and the slot can never
76    become free. I'm not unduly concerned.
77    Two bytes are reserved to hold the size of the slot. Well, it's actually
78    the size of the payload aread (i.e. the size of the slot minus the
79    overhead). Ah well -- the name sticks.
80    With two bytes to store the size, the payload area can be at most 65535
81    bytes large.
82 
83    A free slot looks like this:
84 
85   bits | 1 |           31            |    16    |
86        +---+-------------------------+----------+--------------+
87        | 1 | index of next free slot | slotsize | .. unused .. |
88        +---+-------------------------+----------+--------------+
89        ^                             ^
90        |                             |
91       -4                             +----- seg->fnIdx
92 
93    Free slots are chained together in a singly linked list. An index of
94    zero indicates the end of the chain. Note that zero cannot conflict
95    with an index into the string table as the minumum index is at least
96    four (see above).
97 
98    The typical way to traverse the segment names is:
99 
100    for (ix = overhead; (size = get_slotsize(ix)) != 0; ix += size + overhead) {
101       if (is_freeslot(ix))
102          do this
103       else
104          do that
105    }
106 
107    Important detail: there is a sentinel at the end of the list, namely a
108    slot with a zero-sized payload area.
109 
110    Whenever a new segment name needs to be stashed away, the list of free
111    slots is traversed and the first slot which is large enough is being taken
112    (first fit). There will be no splitting of slots, as that complicates
113    matters and without slot coalescing would lead to memory fragmentation.
114    So we leave it as is until a use case comes up that needs something better.
115 */
116 
117 #include "pub_core_basics.h"     // types
118 #include "priv_aspacemgr.h"
119 
120 // A few constants.
121 enum {
122    refcount_size = sizeof(UShort),
123    slotsize_size = sizeof(UShort),
124    overhead = refcount_size + slotsize_size,
125    max_refcount  = 0x7fff,      // 2 bytes - F-bit
126    max_slotsize  = 0xffff,      // 2 bytes
127    max_slotindex = 0x7fffffff,  // 4 bytes - F-bit
128    fbit_mask = 0x80,
129    end_of_chain = 0
130 };
131 
132 /* The old segname implementation allowed for 1000 names on Android and
133    6000 names on other platforms. Each name was allowed to be 1000 characters
134    long. That was very wasteful. */
135 #define VG_TABLE_SIZE 1000000
136 
137 /* String table for segment names */
138 
139 static HChar segnames[VG_TABLE_SIZE];  /* her majesty, the string table */
140 static SizeT segnames_used = 0;        /* number of bytes used */
141 static UInt  num_segnames = 0;         /* number of names in string table */
142 static UInt  num_slots = 0;            /* number of slots in string table */
143 static UInt  freeslot_chain = end_of_chain;
144 
145 static Bool
is_freeslot(UInt ix)146 is_freeslot(UInt ix)
147 {
148    aspacem_assert(ix >= overhead && ix <= segnames_used);
149    return (segnames[ix - 4] & fbit_mask) != 0;
150 }
151 
152 static void
put_slotindex(UInt ix,UInt slotindex)153 put_slotindex(UInt ix, UInt slotindex)
154 {
155    aspacem_assert(ix >= overhead && ix <= segnames_used);
156    if (slotindex != 0)
157       aspacem_assert(slotindex >= overhead && slotindex <= segnames_used);
158 
159    slotindex |= fbit_mask << 24;
160    segnames[ix - 1] = slotindex & 0xFF;   slotindex >>= 8;
161    segnames[ix - 2] = slotindex & 0xFF;   slotindex >>= 8;
162    segnames[ix - 3] = slotindex & 0xFF;   slotindex >>= 8;
163    segnames[ix - 4] = slotindex & 0xFF;
164 }
165 
166 static UInt
get_slotindex(UInt ix)167 get_slotindex(UInt ix)
168 {
169    aspacem_assert(ix >= overhead && ix <= segnames_used);
170    aspacem_assert(is_freeslot(ix));
171 
172    // Avoid unexpected sign extension
173    const UChar *unames = (const UChar *)segnames;
174 
175    UInt slotindex = 0;
176    slotindex |= unames[ix - 4];   slotindex <<= 8;
177    slotindex |= unames[ix - 3];   slotindex <<= 8;
178    slotindex |= unames[ix - 2];   slotindex <<= 8;
179    slotindex |= unames[ix - 1];
180 
181    return slotindex & max_slotindex;   // removes the F-bit
182 }
183 
184 static void
put_slotsize(UInt ix,UInt size)185 put_slotsize(UInt ix, UInt size)
186 {
187    aspacem_assert(ix >= overhead && ix <= segnames_used);
188    aspacem_assert(size <= max_slotsize);
189    segnames[ix - 1] = size & 0xff;
190    segnames[ix - 2] = size >> 8;
191 }
192 
193 static UInt
get_slotsize(UInt ix)194 get_slotsize(UInt ix)
195 {
196    aspacem_assert(ix >= overhead && ix <= segnames_used);
197 
198    // Avoid unexpected sign extension
199    const UChar *unames = (const UChar *)segnames;
200    if (is_freeslot(ix))
201       return (unames[ix] << 8) | unames[ix+1];
202    else
203       return (unames[ix - 2] << 8) | unames[ix - 1];
204 }
205 
206 static void
put_refcount(UInt ix,UInt rc)207 put_refcount(UInt ix, UInt rc)
208 {
209    aspacem_assert(ix >= overhead && ix <= segnames_used);
210    aspacem_assert(rc <= max_refcount);
211    // rc <= max_refcount ensures that the F-bit is zero
212    segnames[ix - 3] = rc & 0xff;
213    segnames[ix - 4] = rc >> 8;
214 }
215 
216 static UInt
get_refcount(UInt ix)217 get_refcount(UInt ix)
218 {
219    aspacem_assert(ix >= overhead && ix <= segnames_used);
220    // must not be a free slot
221    aspacem_assert(! is_freeslot(ix));
222 
223    // Avoid unexpected sign extension
224    const UChar *unames = (const UChar *)segnames;
225    return (unames[ix - 4] << 8) | unames[ix - 3];
226 }
227 
228 static void
inc_refcount(UInt ix)229 inc_refcount(UInt ix)
230 {
231    aspacem_assert(ix >= overhead && ix <= segnames_used);
232    UInt rc = get_refcount(ix);
233    if (rc != max_refcount)
234       put_refcount(ix, rc + 1);
235 }
236 
237 static void
dec_refcount(UInt ix)238 dec_refcount(UInt ix)
239 {
240    aspacem_assert(ix >= overhead && ix <= segnames_used);
241    UInt rc = get_refcount(ix);
242    aspacem_assert(rc > 0);
243    if (rc != max_refcount) {
244       --rc;
245       if (rc != 0) {
246          put_refcount(ix, rc);
247       } else {
248          UInt size = get_slotsize(ix);
249          /* Chain this slot in the freelist */
250          put_slotindex(ix, freeslot_chain);
251          get_slotindex(ix);
252          put_slotsize(ix + slotsize_size, size);
253          get_slotindex(ix);
254          freeslot_chain = ix;
255          --num_segnames;
256          if (0) VG_(am_show_nsegments)(0, "AFTER DECREASE rc -> 0");
257       }
258    }
259 }
260 
261 static void
put_sentinel(UInt ix)262 put_sentinel(UInt ix)
263 {
264    aspacem_assert(ix >= overhead && ix <= segnames_used);
265 
266    put_refcount(ix, 0);
267    put_slotsize(ix, 0);
268 }
269 
270 
271 /* Searches the string table to find an index for the given name.
272    If none is found, an index is allocated and the name stored.
273    If running ouf of memory, return -1. */
274 Int
ML_(am_allocate_segname)275 ML_(am_allocate_segname)(const HChar *name)
276 {
277    UInt len, ix, size, next_freeslot;
278 
279    aspacem_assert(name);
280 
281    if (0) VG_(debugLog)(0, "aspacem", "allocate_segname %s\n", name);
282 
283    len = VG_(strlen)(name);
284 
285    /* First see if we already have the name. */
286    for (ix = overhead; (size = get_slotsize(ix)) != 0; ix += size + overhead) {
287       if (is_freeslot(ix)) continue;
288       if (VG_(strcmp)(name, segnames + ix) == 0) {
289          inc_refcount(ix);
290          return ix;
291       }
292    }
293 
294    /* Is there a free slot in the string table from a previously "freed"
295       segment name ? */
296    Int prev;
297    for (prev = -1, ix = freeslot_chain; ix != end_of_chain;
298         prev = ix, ix = next_freeslot) {
299       next_freeslot = get_slotindex(ix);  // next in chain
300       size = get_slotsize(ix);
301 
302       if (size >= len + 1) {
303          /* Note, if the size of the slot is a lot larger than the length
304             of the string we're about to store in it, we could split the
305             slot into two. But that complicates matters and as we're not
306             doing any coalescing of adjacent free slots this could lead to
307             fragmentation. */
308          if (prev == -1)
309             freeslot_chain = next_freeslot;
310          else
311             put_slotindex(prev, next_freeslot);
312          put_refcount(ix, 1);
313          put_slotsize(ix, size);
314          VG_(strcpy)(segnames + ix, name);
315          ++num_segnames;
316          return ix;
317       }
318    }
319 
320    /* We need to add a new name. */
321 
322    /* Note, that we need at least two bytes in the payload. The reason is
323       that the payload area will be used to store the size of the slot when
324       the slot is on the freelist. */
325    if (len == 0) len = 1;
326 
327    /* Is there enough room in the string table? The OVERHEAD is for the
328       sentinel following the payload of new slot. */
329    SizeT need = len + 1 + overhead;
330    if (need > (sizeof segnames) - segnames_used) {
331       return -1;
332    }
333 
334    ++num_segnames;
335    ++num_slots;
336 
337    /* copy it in */
338    ix = segnames_used;
339    put_refcount(ix, 1);
340    put_slotsize(ix, len + 1);
341    VG_(strcpy)(segnames + ix, name);
342    segnames_used += need;
343 
344    /* Add sentinel at end of segment name list */
345    put_sentinel(segnames_used);
346 
347    return ix;
348 }
349 
350 /* Debugging output */
351 void
ML_(am_show_segnames)352 ML_(am_show_segnames)(Int logLevel, const HChar *prefix)
353 {
354    UInt size, ix, i;
355 
356    VG_(debugLog)(logLevel, "aspacem", "%u segment names in %u slots\n",
357                  num_segnames, num_slots);
358 
359    if (freeslot_chain == end_of_chain)
360       VG_(debugLog)(logLevel, "aspacem", "freelist is empty\n");
361    else
362       VG_(debugLog)(logLevel, "aspacem", "freelist begins at %u\n",
363                     freeslot_chain);
364    for (i = 0, ix = overhead; (size = get_slotsize(ix)) != 0;
365         ix += size + overhead, ++i) {
366       if (is_freeslot(ix))
367          VG_(debugLog)(logLevel, "aspacem",
368                        "(%u,%u,0) [free slot: size=%u  next=%u]\n", i, ix,
369                        get_slotsize(ix), get_slotindex(ix));
370       else
371          VG_(debugLog)(logLevel, "aspacem",
372                        "(%u,%u,%u) %s\n", i, ix, get_refcount(ix),
373                        segnames + ix);
374    }
375 }
376 
377 /* Returns a sequence number for the fnIdx position in segnames.
378    Used in aspacemgr debug output to associate a segment with
379    a segment name. */
380 Int
ML_(am_segname_get_seqnr)381 ML_(am_segname_get_seqnr)(Int fnIdx)
382 {
383    SizeT ix, size;
384    Int seqnr = -1;
385 
386    if (fnIdx == -1) return -1;   // shortcut
387 
388    for (ix = overhead; (size = get_slotsize(ix)) != 0; ix += size + overhead) {
389       seqnr++;
390       if (ix == fnIdx)
391          return seqnr;
392    }
393 
394    // We should always find the given index; something's busted
395    aspacem_assert(0);
396    return -1;
397 }
398 
399 /* Initialise the string table for segment names. It contains an empty
400    string which is not referenced. */
401 void
ML_(am_segnames_init)402 ML_(am_segnames_init)(void)
403 {
404    aspacem_assert(sizeof segnames >= overhead);
405 
406    segnames_used = overhead;
407    put_sentinel(segnames_used);
408 }
409 
410 /* Increase reference count of segment name identified by IX. */
411 void
ML_(am_inc_refcount)412 ML_(am_inc_refcount)(Int ix)
413 {
414    if (ix != -1)
415       inc_refcount(ix);
416 }
417 
418 /* Decrease reference count of segment name identified by IX. */
419 void
ML_(am_dec_refcount)420 ML_(am_dec_refcount)(Int ix)
421 {
422    if (ix != -1)
423       dec_refcount(ix);
424 }
425 
426 Bool
ML_(am_sane_segname)427 ML_(am_sane_segname)(Int ix)
428 {
429    return ix == -1 || (ix >= overhead && ix < segnames_used);
430 }
431 
432 const HChar *
ML_(am_get_segname)433 ML_(am_get_segname)(Int ix)
434 {
435    return (ix == -1) ? NULL : segnames + ix;
436 }
437 
438 /*--------------------------------------------------------------------*/
439 /*--- end                                                          ---*/
440 /*--------------------------------------------------------------------*/
441