1 /* -*- mode: C; c-basic-offset: 3; -*- */
2 
3 /*--------------------------------------------------------------------*/
4 /*--- Segment name management                 aspacemgr-segnames.c ---*/
5 /*--------------------------------------------------------------------*/
6 
7 /*
8    This file is part of Valgrind, a dynamic binary instrumentation
9    framework.
10 
11    Copyright (C) 2015-2015  Florian Krohm
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26    02111-1307, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 */
30 
31 /* Segment names are stored in a string table.
32 
33    The string table is organised into slots of varying length. Slots are
34    adjacent and there are no holes between slots.
35    A slot consists of two parts:
36 
37    (1) a fixed size overhead of length 4 bytes
38    (2) a variable size payload of up to 65535 bytes
39 
40    The segment name is stored in the payload area. Therefore:
41    a segment name cannot be longer than 65535 bytes including the '\0'
42    terminator. This looks like a reasonable limitation.
43 
44    Overall slot layout:
45 
46        |          4 bytes            |    max 65535 bytes      |
47        +-----------------------------+-------------------------+
48        |          overhead           |         payload         |
49        +-----------------------------+-------------------------+
50        ^                             ^
51        |                             |
52       -4                             +----- seg->fnIdx
53 
54    Each slot is uniquely identified by an index which points to the first
55    byte of the payload area. It is this value that is stored in seg->fnIdx.
56    Note, that this value is at least 4.
57 
58    A slot either holds a string or it is free. The status of a slot is
59    identified by the leftmost bit in the overhead field, the so called F-bit.
60    F-bit == 1 means that slot is free; otherwise it is occupied and holds a
61    string.
62 
63    Slot containing a string (segment name):
64 
65   bits | 1 |      15      |    16    |
66        +---+--------------+----------+-------------------------+
67        | 0 |   refcount   | slotsize | the string including \0 |
68        +---+--------------+----------+-------------------------+
69        ^                  ^          ^
70        |                  |          |
71       -4                 -2          +----- seg->fnIdx
72 
73    Segment names are reference counted. 15 bits are available which allows
74    for up to 32767 references. If the string is referenced more than 32767
75    times, the reference count will be frozen and the slot can never
76    become free. I'm not unduly concerned.
77    Two bytes are reserved to hold the size of the slot. Well, it's actually
78    the size of the payload aread (i.e. the size of the slot minus the
79    overhead). Ah well -- the name sticks.
80    With two bytes to store the size, the payload area can be at most 65535
81    bytes large.
82 
83    A free slot looks like this:
84 
85   bits | 1 |           31            |    16    |
86        +---+-------------------------+----------+--------------+
87        | 1 | index of next free slot | slotsize | .. unused .. |
88        +---+-------------------------+----------+--------------+
89        ^                             ^
90        |                             |
91       -4                             +----- seg->fnIdx
92 
93    Free slots are chained together in a singly linked list. An index of
94    zero indicates the end of the chain. Note that zero cannot conflict
95    with an index into the string table as the minimum index is at least
96    four (see above).
97 
98    The typical way to traverse the segment names is:
99 
100    for (ix = overhead; (size = get_slotsize(ix)) != 0; ix += size + overhead) {
101       if (is_freeslot(ix))
102          do this
103       else
104          do that
105    }
106 
107    Important detail: there is a sentinel at the end of the list, namely a
108    slot with a zero-sized payload area.
109 
110    Whenever a new segment name needs to be stashed away, the list of free
111    slots is traversed and the first slot which is large enough is being taken
112    (first fit). There will be no splitting of slots, as that complicates
113    matters and without slot coalescing would lead to memory fragmentation.
114    So we leave it as is until a use case comes up that needs something better.
115 */
116 
117 #include "pub_core_basics.h"     // types
118 #include "priv_aspacemgr.h"
119 
120 // A few constants.
121 enum {
122    refcount_size = sizeof(UShort),
123    slotsize_size = sizeof(UShort),
124    overhead = refcount_size + slotsize_size,
125    max_refcount  = 0x7fff,      // 2 bytes - F-bit
126    max_slotsize  = 0xffff,      // 2 bytes
127    max_slotindex = 0x7fffffff,  // 4 bytes - F-bit
128    fbit_mask_value = 0x80,
129    end_of_chain = 0
130 };
131 
132 static const UInt fbit_mask = fbit_mask_value;
133 
134 /* The old segname implementation allowed for 1000 names on Android and
135    6000 names on other platforms. Each name was allowed to be 1000 characters
136    long. That was very wasteful. */
137 #define VG_TABLE_SIZE 1000000
138 
139 /* String table for segment names */
140 
141 static HChar segnames[VG_TABLE_SIZE];  /* her majesty, the string table */
142 static SizeT segnames_used = 0;        /* number of bytes used */
143 static UInt  num_segnames = 0;         /* number of names in string table */
144 static UInt  num_slots = 0;            /* number of slots in string table */
145 static UInt  freeslot_chain = end_of_chain;
146 
147 static Bool
is_freeslot(UInt ix)148 is_freeslot(UInt ix)
149 {
150    aspacem_assert(ix >= overhead && ix <= segnames_used);
151    return (segnames[ix - 4] & fbit_mask) != 0;
152 }
153 
154 static void
put_slotindex(UInt ix,UInt slotindex)155 put_slotindex(UInt ix, UInt slotindex)
156 {
157    aspacem_assert(ix >= overhead && ix <= segnames_used);
158    if (slotindex != 0)
159       aspacem_assert(slotindex >= overhead && slotindex <= segnames_used);
160 
161    slotindex |= fbit_mask << 24;
162    segnames[ix - 1] = slotindex & 0xFF;   slotindex >>= 8;
163    segnames[ix - 2] = slotindex & 0xFF;   slotindex >>= 8;
164    segnames[ix - 3] = slotindex & 0xFF;   slotindex >>= 8;
165    segnames[ix - 4] = slotindex & 0xFF;
166 }
167 
168 static UInt
get_slotindex(UInt ix)169 get_slotindex(UInt ix)
170 {
171    aspacem_assert(ix >= overhead && ix <= segnames_used);
172    aspacem_assert(is_freeslot(ix));
173 
174    // Avoid unexpected sign extension
175    const UChar *unames = (const UChar *)segnames;
176 
177    UInt slotindex = 0;
178    slotindex |= unames[ix - 4];   slotindex <<= 8;
179    slotindex |= unames[ix - 3];   slotindex <<= 8;
180    slotindex |= unames[ix - 2];   slotindex <<= 8;
181    slotindex |= unames[ix - 1];
182 
183    return slotindex & max_slotindex;   // removes the F-bit
184 }
185 
186 static void
put_slotsize(UInt ix,UInt size)187 put_slotsize(UInt ix, UInt size)
188 {
189    aspacem_assert(ix >= overhead && ix <= segnames_used);
190    aspacem_assert(size <= max_slotsize);
191    segnames[ix - 1] = size & 0xff;
192    segnames[ix - 2] = size >> 8;
193 }
194 
195 static UInt
get_slotsize(UInt ix)196 get_slotsize(UInt ix)
197 {
198    aspacem_assert(ix >= overhead && ix <= segnames_used);
199 
200    // Avoid unexpected sign extension
201    const UChar *unames = (const UChar *)segnames;
202    if (is_freeslot(ix))
203       return (unames[ix] << 8) | unames[ix+1];
204    else
205       return (unames[ix - 2] << 8) | unames[ix - 1];
206 }
207 
208 static void
put_refcount(UInt ix,UInt rc)209 put_refcount(UInt ix, UInt rc)
210 {
211    aspacem_assert(ix >= overhead && ix <= segnames_used);
212    aspacem_assert(rc <= max_refcount);
213    // rc <= max_refcount ensures that the F-bit is zero
214    segnames[ix - 3] = rc & 0xff;
215    segnames[ix - 4] = rc >> 8;
216 }
217 
218 static UInt
get_refcount(UInt ix)219 get_refcount(UInt ix)
220 {
221    aspacem_assert(ix >= overhead && ix <= segnames_used);
222    // must not be a free slot
223    aspacem_assert(! is_freeslot(ix));
224 
225    // Avoid unexpected sign extension
226    const UChar *unames = (const UChar *)segnames;
227    return (unames[ix - 4] << 8) | unames[ix - 3];
228 }
229 
230 static void
inc_refcount(UInt ix)231 inc_refcount(UInt ix)
232 {
233    aspacem_assert(ix >= overhead && ix <= segnames_used);
234    UInt rc = get_refcount(ix);
235    if (rc != max_refcount)
236       put_refcount(ix, rc + 1);
237 }
238 
239 static void
dec_refcount(UInt ix)240 dec_refcount(UInt ix)
241 {
242    aspacem_assert(ix >= overhead && ix <= segnames_used);
243    UInt rc = get_refcount(ix);
244    aspacem_assert(rc > 0);
245    if (rc != max_refcount) {
246       --rc;
247       if (rc != 0) {
248          put_refcount(ix, rc);
249       } else {
250          UInt size = get_slotsize(ix);
251          /* Chain this slot in the freelist */
252          put_slotindex(ix, freeslot_chain);
253          get_slotindex(ix);
254          put_slotsize(ix + slotsize_size, size);
255          get_slotindex(ix);
256          freeslot_chain = ix;
257          --num_segnames;
258          if (0) VG_(am_show_nsegments)(0, "AFTER DECREASE rc -> 0");
259       }
260    }
261 }
262 
263 static void
put_sentinel(UInt ix)264 put_sentinel(UInt ix)
265 {
266    aspacem_assert(ix >= overhead && ix <= segnames_used);
267 
268    put_refcount(ix, 0);
269    put_slotsize(ix, 0);
270 }
271 
272 
273 /* Searches the string table to find an index for the given name.
274    If none is found, an index is allocated and the name stored.
275    If running ouf of memory, return -1. */
276 Int
ML_(am_allocate_segname)277 ML_(am_allocate_segname)(const HChar *name)
278 {
279    UInt len, ix, size, next_freeslot;
280 
281    aspacem_assert(name);
282 
283    if (0) VG_(debugLog)(0, "aspacem", "allocate_segname %s\n", name);
284 
285    len = VG_(strlen)(name);
286 
287    /* First see if we already have the name. */
288    for (ix = overhead; (size = get_slotsize(ix)) != 0; ix += size + overhead) {
289       if (is_freeslot(ix)) continue;
290       if (VG_(strcmp)(name, segnames + ix) == 0) {
291          inc_refcount(ix);
292          return ix;
293       }
294    }
295 
296    /* Is there a free slot in the string table from a previously "freed"
297       segment name ? */
298    Int prev;
299    for (prev = -1, ix = freeslot_chain; ix != end_of_chain;
300         prev = ix, ix = next_freeslot) {
301       next_freeslot = get_slotindex(ix);  // next in chain
302       size = get_slotsize(ix);
303 
304       if (size >= len + 1) {
305          /* Note, if the size of the slot is a lot larger than the length
306             of the string we're about to store in it, we could split the
307             slot into two. But that complicates matters and as we're not
308             doing any coalescing of adjacent free slots this could lead to
309             fragmentation. */
310          if (prev == -1)
311             freeslot_chain = next_freeslot;
312          else
313             put_slotindex(prev, next_freeslot);
314          put_refcount(ix, 1);
315          put_slotsize(ix, size);
316          VG_(strcpy)(segnames + ix, name);
317          ++num_segnames;
318          return ix;
319       }
320    }
321 
322    /* We need to add a new name. */
323 
324    /* Note, that we need at least two bytes in the payload. The reason is
325       that the payload area will be used to store the size of the slot when
326       the slot is on the freelist. */
327    if (len == 0) len = 1;
328 
329    /* Is there enough room in the string table? The OVERHEAD is for the
330       sentinel following the payload of new slot. */
331    SizeT need = len + 1 + overhead;
332    if (need > (sizeof segnames) - segnames_used) {
333       return -1;
334    }
335 
336    ++num_segnames;
337    ++num_slots;
338 
339    /* copy it in */
340    ix = segnames_used;
341    put_refcount(ix, 1);
342    put_slotsize(ix, len + 1);
343    VG_(strcpy)(segnames + ix, name);
344    segnames_used += need;
345 
346    /* Add sentinel at end of segment name list */
347    put_sentinel(segnames_used);
348 
349    return ix;
350 }
351 
352 /* Debugging output */
353 void
ML_(am_show_segnames)354 ML_(am_show_segnames)(Int logLevel, const HChar *prefix)
355 {
356    UInt size, ix, i;
357 
358    VG_(debugLog)(logLevel, "aspacem", "%u segment names in %u slots\n",
359                  num_segnames, num_slots);
360 
361    if (freeslot_chain == end_of_chain)
362       VG_(debugLog)(logLevel, "aspacem", "freelist is empty\n");
363    else
364       VG_(debugLog)(logLevel, "aspacem", "freelist begins at %u\n",
365                     freeslot_chain);
366    for (i = 0, ix = overhead; (size = get_slotsize(ix)) != 0;
367         ix += size + overhead, ++i) {
368       if (is_freeslot(ix))
369          VG_(debugLog)(logLevel, "aspacem",
370                        "(%u,%u,0) [free slot: size=%u  next=%u]\n", i, ix,
371                        get_slotsize(ix), get_slotindex(ix));
372       else
373          VG_(debugLog)(logLevel, "aspacem",
374                        "(%u,%u,%u) %s\n", i, ix, get_refcount(ix),
375                        segnames + ix);
376    }
377 }
378 
379 /* Returns a sequence number for the fnIdx position in segnames.
380    Used in aspacemgr debug output to associate a segment with
381    a segment name. */
382 Int
ML_(am_segname_get_seqnr)383 ML_(am_segname_get_seqnr)(Int fnIdx)
384 {
385    SizeT ix, size;
386    Int seqnr = -1;
387 
388    if (fnIdx == -1) return -1;   // shortcut
389 
390    for (ix = overhead; (size = get_slotsize(ix)) != 0; ix += size + overhead) {
391       seqnr++;
392       if (ix == fnIdx)
393          return seqnr;
394    }
395 
396    // We should always find the given index; something's busted
397    aspacem_assert(0);
398    return -1;
399 }
400 
401 /* Initialise the string table for segment names. It contains an empty
402    string which is not referenced. */
403 void
ML_(am_segnames_init)404 ML_(am_segnames_init)(void)
405 {
406    aspacem_assert(sizeof segnames >= overhead);
407 
408    segnames_used = overhead;
409    put_sentinel(segnames_used);
410 }
411 
412 /* Increase reference count of segment name identified by IX. */
413 void
ML_(am_inc_refcount)414 ML_(am_inc_refcount)(Int ix)
415 {
416    if (ix != -1)
417       inc_refcount(ix);
418 }
419 
420 /* Decrease reference count of segment name identified by IX. */
421 void
ML_(am_dec_refcount)422 ML_(am_dec_refcount)(Int ix)
423 {
424    if (ix != -1)
425       dec_refcount(ix);
426 }
427 
428 Bool
ML_(am_sane_segname)429 ML_(am_sane_segname)(Int ix)
430 {
431    return ix == -1 || (ix >= overhead && ix < segnames_used);
432 }
433 
434 const HChar *
ML_(am_get_segname)435 ML_(am_get_segname)(Int ix)
436 {
437    return (ix == -1) ? NULL : segnames + ix;
438 }
439 
440 /*--------------------------------------------------------------------*/
441 /*--- end                                                          ---*/
442 /*--------------------------------------------------------------------*/
443