1 /* -*- mode: C; c-basic-offset: 3; -*- */
2 
3 /*--------------------------------------------------------------------*/
4 /*--- An abstraction that provides a file-reading mechanism.       ---*/
5 /*---                                                      image.c ---*/
6 /*--------------------------------------------------------------------*/
7 
8 /*
9    This file is part of Valgrind, a dynamic binary instrumentation
10    framework.
11 
12    Copyright (C) 2013-2013 Mozilla Foundation
13 
14    This program is free software; you can redistribute it and/or
15    modify it under the terms of the GNU General Public License as
16    published by the Free Software Foundation; either version 2 of the
17    License, or (at your option) any later version.
18 
19    This program is distributed in the hope that it will be useful, but
20    WITHOUT ANY WARRANTY; without even the implied warranty of
21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22    General Public License for more details.
23 
24    You should have received a copy of the GNU General Public License
25    along with this program; if not, write to the Free Software
26    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27    02111-1307, USA.
28 
29    The GNU General Public License is contained in the file COPYING.
30 */
31 
32 /* Contributed by Julian Seward <jseward@acm.org> */
33 
34 /* See the corresponding auxprogs/valgrind-di-server.c for a list of
35    cleanups for this file and itself. */
36 
37 #include "pub_core_basics.h"
38 #include "pub_core_vki.h"
39 #include "pub_core_libcbase.h"
40 #include "pub_core_libcassert.h"
41 #include "pub_core_libcprint.h"
42 #include "pub_core_libcproc.h"     /* VG_(read_millisecond_timer) */
43 #include "pub_core_libcfile.h"
44 #include "priv_misc.h"             /* dinfo_zalloc/free/strdup */
45 #include "priv_image.h"            /* self */
46 
47 #include "minilzo.h"
48 
49 /* These values (1024 entries of 8192 bytes each) gives a cache
50    size of 8MB. */
51 #define CACHE_ENTRY_SIZE_BITS (12+1)
52 #define CACHE_N_ENTRIES       1024
53 
54 #define CACHE_ENTRY_SIZE      (1 << CACHE_ENTRY_SIZE_BITS)
55 
56 /* An entry in the cache. */
57 typedef
58    struct {
59       DiOffT off; // file offset for data[0]
60       SizeT  used; // 1 .. sizeof(data), or 0 to denote not-in-use
61       UChar  data[CACHE_ENTRY_SIZE];
62    }
63    CEnt;
64 
65 /* Source for files */
66 typedef
67    struct {
68       // True: img is of local file.  False: img is from a server.
69       Bool  is_local;
70       // The fd for the local file, or sd for a remote server.
71       Int   fd;
72       // The name.  In ML_(dinfo_zalloc)'d space.  Used only for printing
73       // error messages; hence it doesn't really matter what this contains.
74       HChar* name;
75       // The rest of these fields are only valid when using remote files
76       // (that is, using a debuginfo server; hence when is_local==False)
77       // Session ID allocated to us by the server.  Cannot be zero.
78       ULong session_id;
79    }
80    Source;
81 
82 struct _DiImage {
83    // The source -- how to get hold of the file we are reading
84    Source source;
85    // Total size of the image.
86    SizeT size;
87    // The number of entries used.  0 .. CACHE_N_ENTRIES
88    UInt  ces_used;
89    // Pointers to the entries.  ces[0 .. ces_used-1] are non-NULL.
90    // ces[ces_used .. CACHE_N_ENTRIES-1] are NULL.
91    // The non-NULL entries may be arranged arbitrarily.  We expect to use
92    // a pseudo-LRU scheme though.
93    CEnt* ces[CACHE_N_ENTRIES];
94 };
95 
96 /* A frame.  The first 4 bytes of |data| give the kind of the frame,
97    and the rest of it is kind-specific data. */
98 typedef  struct { UChar* data; SizeT n_data; }  Frame;
99 
write_UInt_le(UChar * dst,UInt n)100 static void write_UInt_le ( /*OUT*/UChar* dst, UInt n )
101 {
102    Int i;
103    for (i = 0; i <= 3; i++) {
104       dst[i] = (UChar)(n & 0xFF);
105       n >>= 8;
106    }
107 }
108 
read_UInt_le(const UChar * src)109 static UInt read_UInt_le ( const UChar* src )
110 {
111    UInt r = 0;
112    Int i;
113    for (i = 3; i >= 0; i--) {
114       r <<= 8;
115       r += (UInt)src[i];
116    }
117    return r;
118 }
119 
write_ULong_le(UChar * dst,ULong n)120 static void write_ULong_le ( /*OUT*/UChar* dst, ULong n )
121 {
122    Int i;
123    for (i = 0; i <= 7; i++) {
124       dst[i] = (UChar)(n & 0xFF);
125       n >>= 8;
126    }
127 }
128 
read_ULong_le(const UChar * src)129 static ULong read_ULong_le ( const UChar* src )
130 {
131    ULong r = 0;
132    Int i;
133    for (i = 7; i >= 0; i--) {
134       r <<= 8;
135       r += (ULong)src[i];
136    }
137    return r;
138 }
139 
140 
141 /* Set |sd| to be blocking.  Returns True on success. */
set_blocking(int sd)142 static Bool set_blocking ( int sd )
143 {
144    Int res;
145    res = VG_(fcntl)(sd, VKI_F_GETFL, 0/*ignored*/);
146    if (res != -1)
147       res = VG_(fcntl)(sd, VKI_F_SETFL, res & ~VKI_O_NONBLOCK);
148    return (res != -1);
149 }
150 
151 /* Tries to read 'len' bytes from fd, blocking if necessary.  Assumes
152    fd has been set in blocking mode.  If it returns with the number of
153    bytes read < len, it means that either fd was closed, or there was
154    an error on it. */
my_read(Int fd,UChar * buf,Int len)155 static Int my_read ( Int fd, UChar* buf, Int len )
156 {
157    Int nRead = 0;
158    while (1) {
159       if (nRead == len) return nRead;
160       vg_assert(nRead < len);
161       Int nNeeded = len - nRead;
162       vg_assert(nNeeded > 0);
163       Int n = VG_(read)(fd, &buf[nRead], nNeeded);
164       if (n <= 0) return nRead; /* error or EOF */
165       nRead += n;
166    }
167 }
168 
169 /* Tries to write 'len' bytes to fd, blocking if necessary.  Assumes
170    fd has been set in blocking mode.  If it returns with the number of
171    bytes written < len, it means that either fd was closed, or there was
172    an error on it. */
my_write(Int fd,const UChar * buf,Int len)173 static Int my_write ( Int fd, const UChar* buf, Int len )
174 {
175    Int nWritten = 0;
176    while (1) {
177       if (nWritten == len) return nWritten;
178       vg_assert(nWritten < len);
179       Int nStillToDo = len - nWritten;
180       vg_assert(nStillToDo > 0);
181       Int n = VG_(write_socket)(fd, &buf[nWritten], nStillToDo);
182       if (n < 0) return nWritten; /* error or EOF */
183       nWritten += n;
184    }
185 }
186 
187 /* If we lost communication with the remote server, just give up.
188    Recovering is too difficult. */
give_up__comms_lost(void)189 static void give_up__comms_lost(void)
190 {
191    VG_(umsg)("\n");
192    VG_(umsg)(
193       "Valgrind: debuginfo reader: Lost communication with the remote\n");
194    VG_(umsg)(
195       "Valgrind: debuginfo server.  I can't recover.  Giving up.  Sorry.\n");
196    VG_(umsg)("\n");
197    VG_(exit)(1);
198    /*NOTREACHED*/
199 }
200 
give_up__image_overrun(void)201 static void give_up__image_overrun(void)
202 {
203    VG_(umsg)("\n");
204    VG_(umsg)(
205       "Valgrind: debuginfo reader: Possibly corrupted debuginfo file.\n");
206    VG_(umsg)(
207       "Valgrind: I can't recover.  Giving up.  Sorry.\n");
208    VG_(umsg)("\n");
209    VG_(exit)(1);
210    /*NOTREACHED*/
211 }
212 
213 /* "Do" a transaction: that is, send the given frame to the server and
214    return the frame it sends back.  Caller owns the resulting frame
215    and must free it.  A NULL return means the transaction failed for
216    some reason. */
do_transaction(Int sd,const Frame * req)217 static Frame* do_transaction ( Int sd, const Frame* req )
218 {
219    if (0) VG_(printf)("CLIENT: send %c%c%c%c\n",
220                       req->data[0], req->data[1], req->data[2], req->data[3]);
221 
222    /* What goes on the wire is:
223          adler(le32) n_data(le32) data[0 .. n_data-1]
224       where the checksum covers n_data as well as data[].
225    */
226    /* The initial Adler-32 value */
227    UInt adler = VG_(adler32)(0, NULL, 0);
228 
229    /* Fold in the length field, encoded as le32. */
230    UChar wr_first8[8];
231    write_UInt_le(&wr_first8[4], req->n_data);
232    adler = VG_(adler32)(adler, &wr_first8[4], 4);
233    /* Fold in the data values */
234    adler = VG_(adler32)(adler, req->data, req->n_data);
235    write_UInt_le(&wr_first8[0], adler);
236 
237    Int r = my_write(sd, &wr_first8[0], 8);
238    if (r != 8) return NULL;
239    vg_assert(req->n_data >= 4); // else ill formed -- no KIND field
240    r = my_write(sd, req->data, req->n_data);
241    if (r != req->n_data) return NULL;
242 
243    /* So, the request is sent.  Now get a request of the same format
244       out of the channel. */
245    UChar rd_first8[8];  // adler32; length32
246    r = my_read(sd, &rd_first8[0], 8);
247    if (r != 8) return NULL;
248    UInt rd_adler = read_UInt_le(&rd_first8[0]);
249    UInt rd_len   = read_UInt_le(&rd_first8[4]);
250    /* Allocate a Frame to hold the result data, and read into it. */
251    // Reject obviously-insane length fields.
252    if (rd_len < 4 || rd_len > 4*1024*1024) return NULL;
253    Frame* res = ML_(dinfo_zalloc)("di.do_transaction.1", sizeof(Frame));
254    res->n_data = rd_len;
255    res->data = ML_(dinfo_zalloc)("di.do_transaction.2", rd_len);
256    r = my_read(sd, res->data, res->n_data);
257    if (r != rd_len) return NULL;
258 
259    if (0) VG_(printf)("CLIENT: recv %c%c%c%c\n",
260                       res->data[0], res->data[1], res->data[2], res->data[3]);
261 
262    /* Compute the checksum for the received data, and check it. */
263    adler = VG_(adler32)(0, NULL, 0); // initial value
264    adler = VG_(adler32)(adler, &rd_first8[4], 4);
265    if (res->n_data > 0)
266       adler = VG_(adler32)(adler, res->data, res->n_data);
267 
268    if (adler/*computed*/ != rd_adler/*expected*/) return NULL;
269    return res;
270 }
271 
free_Frame(Frame * fr)272 static void free_Frame ( Frame* fr )
273 {
274    vg_assert(fr && fr->data);
275    ML_(dinfo_free)(fr->data);
276    ML_(dinfo_free)(fr);
277 }
278 
mk_Frame_noargs(const HChar * tag)279 static Frame* mk_Frame_noargs ( const HChar* tag )
280 {
281    vg_assert(VG_(strlen)(tag) == 4);
282    Frame* f = ML_(dinfo_zalloc)("di.mFn.1", sizeof(Frame));
283    f->n_data = 4;
284    f->data = ML_(dinfo_zalloc)("di.mFn.2", f->n_data);
285    VG_(memcpy)(&f->data[0], tag, 4);
286    return f;
287 }
288 
mk_Frame_le64_le64_le64(const HChar * tag,ULong n1,ULong n2,ULong n3)289 static Frame* mk_Frame_le64_le64_le64 ( const HChar* tag,
290                                         ULong n1, ULong n2, ULong n3 )
291 {
292    vg_assert(VG_(strlen)(tag) == 4);
293    Frame* f = ML_(dinfo_zalloc)("di.mFlll.1", sizeof(Frame));
294    f->n_data = 4 + 3*8;
295    f->data = ML_(dinfo_zalloc)("di.mFlll.2", f->n_data);
296    VG_(memcpy)(&f->data[0], tag, 4);
297    write_ULong_le(&f->data[4 + 0*8], n1);
298    write_ULong_le(&f->data[4 + 1*8], n2);
299    write_ULong_le(&f->data[4 + 2*8], n3);
300    return f;
301 }
302 
mk_Frame_asciiz(const HChar * tag,const HChar * str)303 static Frame* mk_Frame_asciiz ( const HChar* tag, const HChar* str )
304 {
305    vg_assert(VG_(strlen)(tag) == 4);
306    Frame* f = ML_(dinfo_zalloc)("di.mFa.1", sizeof(Frame));
307    SizeT n_str = VG_(strlen)(str);
308    f->n_data = 4 + n_str + 1;
309    f->data = ML_(dinfo_zalloc)("di.mFa.2", f->n_data);
310    VG_(memcpy)(&f->data[0], tag, 4);
311    VG_(memcpy)(&f->data[4], str, n_str);
312    vg_assert(f->data[4 + n_str] == 0);
313    return f;
314 }
315 
parse_Frame_le64(const Frame * fr,const HChar * tag,ULong * n1)316 static Bool parse_Frame_le64 ( const Frame* fr, const HChar* tag,
317                                /*OUT*/ULong* n1 )
318 {
319    vg_assert(VG_(strlen)(tag) == 4);
320    if (!fr || !fr->data) return False;
321    if (fr->n_data < 4) return False;
322    if (VG_(memcmp)(&fr->data[0], tag, 4) != 0) return False;
323    if (fr->n_data != 4 + 1*8) return False;
324    *n1 = read_ULong_le(&fr->data[4 + 0*8]);
325    return True;
326 }
327 
parse_Frame_le64_le64(const Frame * fr,const HChar * tag,ULong * n1,ULong * n2)328 static Bool parse_Frame_le64_le64 ( const Frame* fr, const HChar* tag,
329                                     /*OUT*/ULong* n1, /*OUT*/ULong* n2 )
330 {
331    vg_assert(VG_(strlen)(tag) == 4);
332    if (!fr || !fr->data) return False;
333    if (fr->n_data < 4) return False;
334    if (VG_(memcmp)(&fr->data[0], tag, 4) != 0) return False;
335    if (fr->n_data != 4 + 2*8) return False;
336    *n1 = read_ULong_le(&fr->data[4 + 0*8]);
337    *n2 = read_ULong_le(&fr->data[4 + 1*8]);
338    return True;
339 }
340 
parse_Frame_asciiz(const Frame * fr,const HChar * tag,UChar ** str)341 static Bool parse_Frame_asciiz ( const Frame* fr, const HChar* tag,
342                                  /*OUT*/UChar** str )
343 {
344    vg_assert(VG_(strlen)(tag) == 4);
345    if (!fr || !fr->data) return False;
346    if (fr->n_data < 4) return False;
347    if (VG_(memcmp)(&fr->data[0], tag, 4) != 0) return False;
348    if (fr->n_data < 5) return False; // else there isn't even enough
349                                      // space for the terminating zero
350    /* Find the terminating zero and ensure it's right at the end
351       of the data.  If not, the frame is malformed. */
352    SizeT i = 4;
353    while (True) {
354       if (i >= fr->n_data) break;
355       if (fr->data[i] == 0) break;
356       i++;
357    }
358    vg_assert(i <= fr->n_data);
359    if (i == fr->n_data-1 && fr->data[i] == 0) {
360       *str = &fr->data[4];
361       return True;
362    } else {
363       return False;
364    }
365 }
366 
parse_Frame_le64_le64_le64_bytes(const Frame * fr,const HChar * tag,ULong * n1,ULong * n2,ULong * n3,UChar ** data,ULong * n_data)367 static Bool parse_Frame_le64_le64_le64_bytes (
368                const Frame* fr, const HChar* tag,
369                /*OUT*/ULong* n1, /*OUT*/ULong* n2, /*OUT*/ULong* n3,
370                /*OUT*/UChar** data, /*OUT*/ULong* n_data
371             )
372 {
373    vg_assert(VG_(strlen)(tag) == 4);
374    if (!fr || !fr->data) return False;
375    if (fr->n_data < 4) return False;
376    if (VG_(memcmp)(&fr->data[0], tag, 4) != 0) return False;
377    if (fr->n_data < 4 + 3*8) return False;
378    *n1 = read_ULong_le(&fr->data[4 + 0*8]);
379    *n2 = read_ULong_le(&fr->data[4 + 1*8]);
380    *n3 = read_ULong_le(&fr->data[4 + 2*8]);
381    *data   = &fr->data[4 + 3*8];
382    *n_data = fr->n_data - (4 + 3*8);
383    vg_assert(fr->n_data >= 4 + 3*8);
384    return True;
385 }
386 
block_round_down(DiOffT i)387 static DiOffT block_round_down ( DiOffT i )
388 {
389    return i & ((DiOffT)~(CACHE_ENTRY_SIZE-1));
390 }
391 
392 /* Is this offset inside this CEnt? */
is_in_CEnt(const CEnt * cent,DiOffT off)393 static inline Bool is_in_CEnt ( const CEnt* cent, DiOffT off )
394 {
395    /* This assertion is checked by set_CEnt, so checking it here has
396       no benefit, whereas skipping it does remove it from the hottest
397       path. */
398    /* vg_assert(cent->used > 0 && cent->used <= CACHE_ENTRY_SIZE); */
399    /* What we want to return is:
400         cent->off <= off && off < cent->off + cent->used;
401       This is however a very hot path, so here's alternative that uses
402       only one conditional branch, using the following transformation,
403       where all quantities are unsigned:
404               x >= LO && x < LO+N
405          -->  x-LO >= 0 && x-LO < LO+N-LO
406          -->  x-LO >= 0 && x-LO < N
407          -->  x-LO < N
408       This is however only valid when the original bounds, that is, LO
409       .. LO+N-1, do not wrap around the end of the address space.  That
410       is, we require that LO <= LO+N-1.  But that's OK .. we don't
411       expect wraparounds in CEnts or for that matter any object
412       allocated from C-land.  See Hacker's Delight, Chapter 4.1,
413       "Checking Bounds of Integers", for more details.
414    */
415    return off - cent->off < cent->used;
416 }
417 
418 /* Allocate a new CEnt, connect it to |img|, and return its index. */
alloc_CEnt(DiImage * img)419 static UInt alloc_CEnt ( DiImage* img )
420 {
421    vg_assert(img);
422    vg_assert(img->ces_used < CACHE_N_ENTRIES);
423    UInt entNo = img->ces_used;
424    img->ces_used++;
425    vg_assert(img->ces[entNo] == NULL);
426    img->ces[entNo] = ML_(dinfo_zalloc)("di.alloc_CEnt.1", sizeof(CEnt));
427    return entNo;
428 }
429 
430 /* Move the given entry to the top and slide those above it down by 1,
431    to make space. */
move_CEnt_to_top(DiImage * img,UInt entNo)432 static void move_CEnt_to_top ( DiImage* img, UInt entNo )
433 {
434    vg_assert(img->ces_used <= CACHE_N_ENTRIES);
435    vg_assert(entNo > 0 && entNo < img->ces_used);
436    CEnt* tmp = img->ces[entNo];
437    while (entNo > 0) {
438       img->ces[entNo] = img->ces[entNo-1];
439       entNo--;
440    }
441    img->ces[0] = tmp;
442 }
443 
444 /* Set the given entry so that it has a chunk of the file containing
445    the given offset.  It is this function that brings data into the
446    cache, either by reading the local file or pulling it from the
447    remote server. */
set_CEnt(const DiImage * img,UInt entNo,DiOffT off)448 static void set_CEnt ( const DiImage* img, UInt entNo, DiOffT off )
449 {
450    SizeT len;
451    DiOffT off_orig = off;
452    vg_assert(img);
453    vg_assert(img->ces_used <= CACHE_N_ENTRIES);
454    vg_assert(entNo >= 0 && entNo < img->ces_used);
455    vg_assert(off < img->size);
456    vg_assert(img->ces[entNo] != NULL);
457    /* Compute [off, +len) as the slice we are going to read. */
458    off = block_round_down(off);
459    len = img->size - off;
460    if (len > CACHE_ENTRY_SIZE) len = CACHE_ENTRY_SIZE;
461    /* It is conceivable that the 'len > 0' bit could fail if we make
462       an image with a zero sized file.  But then no 'get' request on
463       that image would be valid. */
464    vg_assert(len > 0 && len <= CACHE_ENTRY_SIZE);
465    vg_assert(off + len <= img->size);
466    vg_assert(off <= off_orig && off_orig < off+len);
467    /* So, read  off .. off+len-1  into the entry. */
468    CEnt* ce = img->ces[entNo];
469 
470    if (0) {
471       static UInt t_last = 0;
472       static ULong nread = 0;
473       UInt now = VG_(read_millisecond_timer)();
474       UInt delay = now - t_last;
475       t_last = now;
476       nread += len;
477       VG_(printf)("XXXXXXXX (tot %'lld)  read %'ld  offset %'lld  delay %'u\n",
478                   nread, len, off, delay);
479    }
480 
481    if (img->source.is_local) {
482       // Simple: just read it
483       SysRes sr = VG_(pread)(img->source.fd, &ce->data[0], (Int)len, off);
484       vg_assert(!sr_isError(sr));
485    } else {
486       // Not so simple: poke the server
487       vg_assert(img->source.session_id > 0);
488       Frame* req
489          = mk_Frame_le64_le64_le64("READ", img->source.session_id, off, len);
490       Frame* res = do_transaction(img->source.fd, req);
491       free_Frame(req); req = NULL;
492       if (!res) goto server_fail;
493       ULong  rx_session_id = 0, rx_off = 0, rx_len = 0, rx_zdata_len = 0;
494       UChar* rx_data = NULL;
495       /* Pretty confusing.  rx_sessionid, rx_off and rx_len are copies
496          of the values that we requested in the READ frame just above,
497          so we can be sure that the server is responding to the right
498          request.  It just copies them from the request into the
499          response.  rx_data is the actual data, and rx_zdata_len is
500          its compressed length.  Hence rx_len must equal len, but
501          rx_zdata_len can be different -- smaller, hopefully.. */
502       if (!parse_Frame_le64_le64_le64_bytes
503           (res, "RDOK", &rx_session_id, &rx_off,
504                         &rx_len, &rx_data, &rx_zdata_len))
505          goto server_fail;
506       if (rx_session_id != img->source.session_id
507           || rx_off != off || rx_len != len || rx_data == NULL)
508          goto server_fail;
509 
510       //VG_(memcpy)(&ce->data[0], rx_data, len);
511       // Decompress into the destination buffer
512       // Tell the lib the max number of output bytes it can write.
513       // After the call, this holds the number of bytes actually written,
514       // and it's an error if it is different.
515       lzo_uint out_len = len;
516       Int lzo_rc = lzo1x_decompress_safe(rx_data, rx_zdata_len,
517                                          &ce->data[0], &out_len,
518                                          NULL);
519       Bool ok = lzo_rc == LZO_E_OK && out_len == len;
520       if (!ok) goto server_fail;
521 
522       free_Frame(res); res = NULL;
523       goto end_of_else_clause;
524      server_fail:
525       /* The server screwed up somehow.  Now what? */
526       if (res) {
527          UChar* reason = NULL;
528          if (parse_Frame_asciiz(res, "FAIL", &reason)) {
529             VG_(umsg)("set_CEnt (reading data from DI server): fail: "
530                       "%s\n", reason);
531          } else {
532             VG_(umsg)("set_CEnt (reading data from DI server): fail: "
533                       "unknown reason\n");
534          }
535          free_Frame(res); res = NULL;
536       } else {
537          VG_(umsg)("set_CEnt (reading data from DI server): fail: "
538                    "server unexpectedly closed the connection\n");
539       }
540       give_up__comms_lost();
541       /* NOTREACHED */
542       vg_assert(0);
543      end_of_else_clause:
544       {}
545    }
546 
547    ce->off  = off;
548    ce->used = len;
549    vg_assert(ce->used > 0 && ce->used <= CACHE_ENTRY_SIZE);
550 }
551 
552 __attribute__((noinline))
get_slowcase(DiImage * img,DiOffT off)553 static UChar get_slowcase ( DiImage* img, DiOffT off )
554 {
555    /* Stay sane .. */
556    vg_assert(off < img->size);
557    vg_assert(img->ces_used <= CACHE_N_ENTRIES);
558    UInt i;
559    /* Start the search at entry 1, since the fast-case function
560       checked slot zero already. */
561    for (i = 1; i < img->ces_used; i++) {
562       vg_assert(img->ces[i]);
563       if (is_in_CEnt(img->ces[i], off))
564          break;
565    }
566    vg_assert(i <= img->ces_used);
567    if (i == img->ces_used) {
568       /* It's not in any entry.  Either allocate a new entry or
569          recycle the LRU one. */
570       if (img->ces_used == CACHE_N_ENTRIES) {
571          /* All entries in use.  Recycle the (ostensibly) LRU one. */
572          set_CEnt(img, CACHE_N_ENTRIES-1, off);
573          i = CACHE_N_ENTRIES-1;
574       } else {
575          /* Allocate a new one, and fill it in. */
576          UInt entNo = alloc_CEnt(img);
577          set_CEnt(img, entNo, off);
578          i = entNo;
579       }
580    } else {
581       /* We found it at position 'i'. */
582       vg_assert(i > 0);
583    }
584    if (i > 0) {
585       move_CEnt_to_top(img, i);
586       i = 0;
587    }
588    vg_assert(is_in_CEnt(img->ces[i], off));
589    return img->ces[i]->data[ off - img->ces[i]->off ];
590 }
591 
592 // This is called a lot, so do the usual fast/slow split stuff on it. */
get(DiImage * img,DiOffT off)593 static inline UChar get ( DiImage* img, DiOffT off )
594 {
595    /* Most likely case is, it's in the ces[0] position. */
596    /* ML_(img_from_local_file) requests a read for ces[0] when
597       creating the image.  Hence slot zero is always non-NULL, so we
598       can skip this test. */
599    if (LIKELY(/* img->ces[0] != NULL && */
600               is_in_CEnt(img->ces[0], off))) {
601       return img->ces[0]->data[ off - img->ces[0]->off ];
602    }
603    /* Else we'll have to fish around for it. */
604    return get_slowcase(img, off);
605 }
606 
607 /* Create an image from a file in the local filesystem.  This is
608    relatively straightforward. */
ML_(img_from_local_file)609 DiImage* ML_(img_from_local_file)(const HChar* fullpath)
610 {
611    SysRes         fd;
612    struct vg_stat stat_buf;
613    DiOffT         size;
614 
615    fd = VG_(open)(fullpath, VKI_O_RDONLY, 0);
616    if (sr_isError(fd))
617       return NULL;
618 
619    if (VG_(fstat)(sr_Res(fd), &stat_buf) != 0) {
620       VG_(close)(sr_Res(fd));
621       return NULL;
622    }
623 
624    size = stat_buf.size;
625    if (size == 0 || size == DiOffT_INVALID
626        || /* size is unrepresentable as a SizeT */
627           size != (DiOffT)(SizeT)(size)) {
628       VG_(close)(sr_Res(fd));
629       return NULL;
630    }
631 
632    DiImage* img = ML_(dinfo_zalloc)("di.image.ML_iflf.1", sizeof(DiImage));
633    img->source.is_local = True;
634    img->source.fd       = sr_Res(fd);
635    img->size            = size;
636    img->ces_used        = 0;
637    img->source.name     = ML_(dinfo_strdup)("di.image.ML_iflf.2", fullpath);
638    /* img->ces is already zeroed out */
639    vg_assert(img->source.fd >= 0);
640 
641    /* Force the zeroth entry to be the first chunk of the file.
642       That's likely to be the first part that's requested anyway, and
643       loading it at this point forcing img->cent[0] to always be
644       non-empty, thereby saving us an is-it-empty check on the fast
645       path in get(). */
646    UInt entNo = alloc_CEnt(img);
647    vg_assert(entNo == 0);
648    set_CEnt(img, 0, 0);
649 
650    return img;
651 }
652 
653 
654 /* Create an image from a file on a remote debuginfo server.  This is
655    more complex.  There are lots of ways in which it can fail. */
ML_(img_from_di_server)656 DiImage* ML_(img_from_di_server)(const HChar* filename,
657                                  const HChar* serverAddr)
658 {
659    if (filename == NULL || serverAddr == NULL)
660       return NULL;
661 
662    /* The filename must be a plain filename -- no slashes at all. */
663    if (VG_(strchr)(filename, '/') != NULL)
664       return NULL;
665 
666    /* Try to connect to the server.  A side effect of this is to parse
667       and reject, if syntactically invalid, |serverAddr|.  Reasons why
668       this could fail:
669       - serverAddr is not of the form d.d.d.d:d or d.d.d.d
670       - attempt to connect to that address:port failed
671    */
672    Int sd = VG_(connect_via_socket)(serverAddr);
673    if (sd < 0)
674       return NULL;
675    if (!set_blocking(sd))
676       return NULL;
677    Int one = 1;
678    Int sr = VG_(setsockopt)(sd, VKI_IPPROTO_TCP, VKI_TCP_NODELAY,
679                             &one, sizeof(one));
680    vg_assert(sr == 0);
681 
682    /* Ok, we got a connection.  Ask it for version string, so as to be
683       reasonably sure we're talking to an instance of
684       auxprogs/valgrind-di-server and not to some other random program
685       that happens to be listening on that port. */
686    Frame* req = mk_Frame_noargs("VERS");
687    Frame* res = do_transaction(sd, req);
688    if (res == NULL)
689       goto fail; // do_transaction failed?!
690    UChar* vstr = NULL;
691    if (!parse_Frame_asciiz(res, "VEOK", &vstr))
692       goto fail; // unexpected response kind, or invalid ID string
693    vg_assert(vstr);
694    if (VG_(strcmp)("Valgrind Debuginfo Server, Version 1",
695                    (const HChar*)vstr) != 0)
696       goto fail; // wrong version string
697    free_Frame(req);
698    free_Frame(res);
699    req = NULL;
700    res = NULL;
701 
702    /* Server seems plausible.  Present it with the name of the file we
703       want and see if it'll give us back a session ID for it. */
704    req = mk_Frame_asciiz("OPEN", filename);
705    res = do_transaction(sd, req);
706    if (res == NULL)
707       goto fail;
708    ULong session_id = 0, size = 0;
709    if (!parse_Frame_le64_le64(res, "OPOK", &session_id, &size))
710       goto fail;
711    free_Frame(req);
712    free_Frame(res);
713    req = NULL;
714    res = NULL;
715 
716    /* We have a session ID.  We're ready to roll. */
717    DiImage* img = ML_(dinfo_zalloc)("di.image.ML_ifds.1", sizeof(DiImage));
718    img->source.is_local   = False;
719    img->source.fd         = sd;
720    img->source.session_id = session_id;
721    img->size              = size;
722    img->ces_used          = 0;
723    img->source.name       = ML_(dinfo_zalloc)("di.image.ML_ifds.2",
724                                               20 + VG_(strlen)(filename)
725                                                  + VG_(strlen)(serverAddr));
726    VG_(sprintf)(img->source.name, "%s at %s", filename, serverAddr);
727 
728    /* img->ces is already zeroed out */
729    vg_assert(img->source.fd >= 0);
730 
731    /* See comment on equivalent bit in ML_(img_from_local_file) for
732       rationale. */
733    UInt entNo = alloc_CEnt(img);
734    vg_assert(entNo == 0);
735    set_CEnt(img, 0, 0);
736 
737    return img;
738 
739   fail:
740    free_Frame(req);
741    if (res) {
742       UChar* reason = NULL;
743       if (parse_Frame_asciiz(res, "FAIL", &reason)) {
744          // HACK: if it's just telling us that the file can't
745          // be opened, don't print it, else we'll get flooded with
746          // such complaints, one for each main object for which there
747          // isn't a debuginfo file on the server.
748          if (0 != VG_(strcmp)((const HChar*)reason, "OPEN: cannot open file"))
749             VG_(umsg)("ML_(img_from_di_server): fail: %s\n", reason);
750       } else {
751          VG_(umsg)("ML_(img_from_di_server): fail: unknown reason\n");
752       }
753       free_Frame(res);
754    }
755    VG_(close)(sd);
756    return NULL;
757 }
758 
ML_(img_done)759 void ML_(img_done)(DiImage* img)
760 {
761    vg_assert(img);
762    if (img->source.is_local) {
763       /* Close the file; nothing else to do. */
764       vg_assert(img->source.session_id == 0);
765       VG_(close)(img->source.fd);
766    } else {
767       /* Close the socket.  The server can detect this and will scrub
768          the connection when it happens, so there's no need to tell it
769          explicitly by sending it a "CLOSE" message, or any such. */
770       vg_assert(img->source.session_id != 0);
771       VG_(close)(img->source.fd);
772    }
773 
774    /* Free up the cache entries, ultimately |img| itself. */
775    UInt i;
776    vg_assert(img->ces_used <= CACHE_N_ENTRIES);
777    for (i = 0; i < img->ces_used; i++) {
778       ML_(dinfo_free)(img->ces[i]);
779    }
780    /* Take the opportunity to sanity check the rest. */
781    for (i = i; i < img->ces_used; i++) {
782       vg_assert(img->ces[i] == NULL);
783    }
784    ML_(dinfo_free)(img->source.name);
785    ML_(dinfo_free)(img);
786 }
787 
ML_(img_size)788 DiOffT ML_(img_size)(const DiImage* img)
789 {
790    vg_assert(img);
791    return img->size;
792 }
793 
ML_(img_valid)794 inline Bool ML_(img_valid)(const DiImage* img, DiOffT offset, SizeT size)
795 {
796    vg_assert(img);
797    vg_assert(offset != DiOffT_INVALID);
798    return img->size > 0 && offset + size <= (DiOffT)img->size;
799 }
800 
801 /* Check the given range is valid, and if not, shut down the system.
802    An invalid range would imply that we're trying to read outside the
803    image, which normally means the image is corrupted somehow, or the
804    caller is buggy.  Recovering is too complex, and we have
805    probably-corrupt debuginfo, so just give up. */
ensure_valid(const DiImage * img,DiOffT offset,SizeT size,const HChar * caller)806 static void ensure_valid(const DiImage* img, DiOffT offset, SizeT size,
807                          const HChar* caller)
808 {
809    if (LIKELY(ML_(img_valid)(img, offset, size)))
810       return;
811    VG_(umsg)("Valgrind: debuginfo reader: ensure_valid failed:\n");
812    VG_(umsg)("Valgrind:   during call to %s\n", caller);
813    VG_(umsg)("Valgrind:   request for range [%llu, +%llu) exceeds\n",
814              (ULong)offset, (ULong)size);
815    VG_(umsg)("Valgrind:   valid image size of %llu for image:\n",
816              (ULong)img->size);
817    VG_(umsg)("Valgrind:   \"%s\"\n", img->source.name);
818    give_up__image_overrun();
819 }
820 
821 
ML_(img_get)822 void ML_(img_get)(/*OUT*/void* dst,
823                   DiImage* img, DiOffT offset, SizeT size)
824 {
825    vg_assert(img);
826    vg_assert(size > 0);
827    ensure_valid(img, offset, size, "ML_(img_get)");
828    SizeT i;
829    for (i = 0; i < size; i++) {
830       ((UChar*)dst)[i] = get(img, offset + i);
831    }
832 }
833 
ML_(img_get_some)834 SizeT ML_(img_get_some)(/*OUT*/void* dst,
835                         DiImage* img, DiOffT offset, SizeT size)
836 {
837    vg_assert(img);
838    vg_assert(size > 0);
839    ensure_valid(img, offset, size, "ML_(img_get_some)");
840    UChar* dstU = (UChar*)dst;
841    /* Use |get| in the normal way to get the first byte of the range.
842       This guarantees to put the cache entry containing |offset| in
843       position zero. */
844    dstU[0] = get(img, offset);
845    /* Now just read as many bytes as we can (or need) directly out of
846       entry zero, without bothering to call |get| each time. */
847    const CEnt* ce = img->ces[0];
848    vg_assert(ce && ce->used >= 1);
849    vg_assert(is_in_CEnt(ce, offset));
850    SizeT nToCopy = size - 1;
851    SizeT nAvail  = (SizeT)(ce->used - (offset + 1 - ce->off));
852    vg_assert(nAvail >= 0 && nAvail <= ce->used-1);
853    if (nAvail < nToCopy) nToCopy = nAvail;
854    VG_(memcpy)(&dstU[1], &ce->data[offset + 1 - ce->off], nToCopy);
855    return nToCopy + 1;
856 }
857 
858 
ML_(img_strlen)859 SizeT ML_(img_strlen)(DiImage* img, DiOffT off)
860 {
861    ensure_valid(img, off, 1, "ML_(img_strlen)");
862    SizeT i = 0;
863    while (get(img, off + i) != 0) i++;
864    return i;
865 }
866 
ML_(img_strdup)867 HChar* ML_(img_strdup)(DiImage* img, const HChar* cc, DiOffT offset)
868 {
869    ensure_valid(img, offset, 1, "ML_(img_strdup)");
870    SizeT  len = ML_(img_strlen)(img, offset);
871    HChar* res = ML_(dinfo_zalloc)(cc, len+1);
872    SizeT  i;
873    for (i = 0; i < len; i++) {
874       res[i] = get(img, offset+i);
875    }
876    vg_assert(res[len] == 0);
877    return res;
878 }
879 
ML_(img_strcmp)880 Int ML_(img_strcmp)(DiImage* img, DiOffT off1, DiOffT off2)
881 {
882    ensure_valid(img, off1, 1, "ML_(img_strcmp)(first arg)");
883    ensure_valid(img, off2, 1, "ML_(img_strcmp)(second arg)");
884    while (True) {
885       UChar c1 = get(img, off1);
886       UChar c2 = get(img, off2);
887       if (c1 < c2) return -1;
888       if (c1 > c2) return 1;
889       if (c1 == 0) return 0;
890       off1++; off2++;
891    }
892 }
893 
ML_(img_strcmp_c)894 Int ML_(img_strcmp_c)(DiImage* img, DiOffT off1, const HChar* str2)
895 {
896    ensure_valid(img, off1, 1, "ML_(img_strcmp_c)");
897    while (True) {
898       UChar c1 = get(img, off1);
899       UChar c2 = *(const UChar*)str2;
900       if (c1 < c2) return -1;
901       if (c1 > c2) return 1;
902       if (c1 == 0) return 0;
903       off1++; str2++;
904    }
905 }
906 
ML_(img_get_UChar)907 UChar ML_(img_get_UChar)(DiImage* img, DiOffT offset)
908 {
909    ensure_valid(img, offset, 1, "ML_(img_get_UChar)");
910    return get(img, offset);
911 }
912 
ML_(img_get_UShort)913 UShort ML_(img_get_UShort)(DiImage* img, DiOffT offset)
914 {
915    UShort r;
916    ML_(img_get)(&r, img, offset, sizeof(r));
917    return r;
918 }
919 
ML_(img_get_UInt)920 UInt ML_(img_get_UInt)(DiImage* img, DiOffT offset)
921 {
922    UInt r;
923    ML_(img_get)(&r, img, offset, sizeof(r));
924    return r;
925 }
926 
ML_(img_get_ULong)927 ULong ML_(img_get_ULong)(DiImage* img, DiOffT offset)
928 {
929    ULong r;
930    ML_(img_get)(&r, img, offset, sizeof(r));
931    return r;
932 }
933 
934 
935 /*
936  * This routine for calculating the CRC for a separate debug file
937  * is GPLed code borrowed from GNU binutils.
938  */
ML_(img_calc_gnu_debuglink_crc32)939 UInt ML_(img_calc_gnu_debuglink_crc32)(DiImage* img)
940 {
941   static const UInt crc32_table[256] =
942     {
943       0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419,
944       0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4,
945       0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07,
946       0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
947       0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856,
948       0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
949       0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4,
950       0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
951       0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3,
952       0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a,
953       0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599,
954       0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
955       0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190,
956       0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f,
957       0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e,
958       0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
959       0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed,
960       0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
961       0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3,
962       0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
963       0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a,
964       0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5,
965       0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010,
966       0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
967       0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17,
968       0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6,
969       0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615,
970       0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
971       0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344,
972       0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
973       0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a,
974       0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
975       0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1,
976       0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c,
977       0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef,
978       0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
979       0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe,
980       0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31,
981       0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c,
982       0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
983       0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b,
984       0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
985       0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1,
986       0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
987       0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278,
988       0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7,
989       0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66,
990       0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
991       0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605,
992       0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8,
993       0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b,
994       0x2d02ef8d
995     };
996 
997    vg_assert(img);
998 
999    /* If the image is local, calculate the CRC here directly.  If it's
1000       remote, forward the request to the server. */
1001    if (img->source.is_local) {
1002       /* Work through the image in 1 KB chunks. */
1003       UInt   crc      = 0xFFFFFFFF;
1004       DiOffT img_szB  = ML_(img_size)(img);
1005       DiOffT curr_off = 0;
1006       while (1) {
1007          vg_assert(curr_off >= 0 && curr_off <= img_szB);
1008          if (curr_off == img_szB) break;
1009          DiOffT avail = img_szB - curr_off;
1010          vg_assert(avail > 0 && avail <= img_szB);
1011          if (avail > 1024) avail = 1024;
1012          UChar buf[1024];
1013          SizeT nGot = ML_(img_get_some)(buf, img, curr_off, avail);
1014          vg_assert(nGot >= 1 && nGot <= avail);
1015          UInt i;
1016          for (i = 0; i < (UInt)nGot; i++)
1017             crc = crc32_table[(crc ^ buf[i]) & 0xff] ^ (crc >> 8);
1018          curr_off += nGot;
1019       }
1020       return ~crc & 0xFFFFFFFF;
1021    } else {
1022       Frame* req = mk_Frame_noargs("CRC3");
1023       Frame* res = do_transaction(img->source.fd, req);
1024       if (!res) goto remote_crc_fail;
1025       ULong crc32 = 0;
1026       if (!parse_Frame_le64(res, "CROK", &crc32)) goto remote_crc_fail;
1027       if ((crc32 & ~0xFFFFFFFFULL) != 0) goto remote_crc_fail;
1028       free_Frame(req);
1029       free_Frame(res);
1030       return (UInt)crc32;
1031      remote_crc_fail:
1032 
1033       // XXXX common this up with the READ diagnostic cases
1034       if (res) {
1035          UChar* reason = NULL;
1036          if (parse_Frame_asciiz(res, "FAIL", &reason)) {
1037             VG_(umsg)("img_calc_gnu_debuglink_crc32: fail: "
1038                       "%s\n", reason);
1039          } else {
1040             VG_(umsg)("img_calc_gnu_debuglink_crc32: fail: "
1041                       "unknown reason\n");
1042          }
1043       } else {
1044          VG_(umsg)("img_calc_gnu_debuglink_crc32: fail: "
1045                    "server unexpectedly closed the connection\n");
1046       }
1047 
1048       if (req) free_Frame(req);
1049       if (res) free_Frame(res);
1050       // FIXME: now what?
1051       give_up__comms_lost();
1052       /* NOTREACHED */
1053       vg_assert(0);
1054    }
1055    /*NOTREACHED*/
1056    vg_assert(0);
1057 }
1058 
1059 ////////////////////////////////////////////////////
1060 #include "minilzo-inl.c"
1061 
1062 /*--------------------------------------------------------------------*/
1063 /*--- end                                                  image.c ---*/
1064 /*--------------------------------------------------------------------*/
1065