1 /* -*- mode: C; c-basic-offset: 3; -*- */
2 
3 /*--------------------------------------------------------------------*/
4 /*--- An abstraction that provides a file-reading mechanism.       ---*/
5 /*---                                                      image.c ---*/
6 /*--------------------------------------------------------------------*/
7 
8 /*
9    This file is part of Valgrind, a dynamic binary instrumentation
10    framework.
11 
12    Copyright (C) 2013-2015 Mozilla Foundation
13 
14    This program is free software; you can redistribute it and/or
15    modify it under the terms of the GNU General Public License as
16    published by the Free Software Foundation; either version 2 of the
17    License, or (at your option) any later version.
18 
19    This program is distributed in the hope that it will be useful, but
20    WITHOUT ANY WARRANTY; without even the implied warranty of
21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22    General Public License for more details.
23 
24    You should have received a copy of the GNU General Public License
25    along with this program; if not, write to the Free Software
26    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27    02111-1307, USA.
28 
29    The GNU General Public License is contained in the file COPYING.
30 */
31 
32 /* Contributed by Julian Seward <jseward@acm.org> */
33 
34 /* See the corresponding auxprogs/valgrind-di-server.c for a list of
35    cleanups for this file and itself. */
36 
37 #include "pub_core_basics.h"
38 #include "pub_core_vki.h"
39 #include "pub_core_libcbase.h"
40 #include "pub_core_libcassert.h"
41 #include "pub_core_libcprint.h"
42 #include "pub_core_libcproc.h"     /* VG_(read_millisecond_timer) */
43 #include "pub_core_libcfile.h"
44 #include "priv_misc.h"             /* dinfo_zalloc/free/strdup */
45 #include "priv_image.h"            /* self */
46 
47 #include "minilzo.h"
48 
49 /* These values (1024 entries of 8192 bytes each) gives a cache
50    size of 8MB. */
51 #define CACHE_ENTRY_SIZE_BITS (12+1)
52 #define CACHE_N_ENTRIES       1024
53 
54 #define CACHE_ENTRY_SIZE      (1 << CACHE_ENTRY_SIZE_BITS)
55 
56 /* An entry in the cache. */
57 typedef
58    struct {
59       DiOffT off; // file offset for data[0]
60       SizeT  used; // 1 .. sizeof(data), or 0 to denote not-in-use
61       UChar  data[CACHE_ENTRY_SIZE];
62    }
63    CEnt;
64 
65 /* Source for files */
66 typedef
67    struct {
68       // True: img is of local file.  False: img is from a server.
69       Bool  is_local;
70       // The fd for the local file, or sd for a remote server.
71       Int   fd;
72       // The name.  In ML_(dinfo_zalloc)'d space.  Used only for printing
73       // error messages; hence it doesn't really matter what this contains.
74       HChar* name;
75       // The rest of these fields are only valid when using remote files
76       // (that is, using a debuginfo server; hence when is_local==False)
77       // Session ID allocated to us by the server.  Cannot be zero.
78       ULong session_id;
79    }
80    Source;
81 
82 struct _DiImage {
83    // The source -- how to get hold of the file we are reading
84    Source source;
85    // Total size of the image.
86    SizeT size;
87    // The number of entries used.  0 .. CACHE_N_ENTRIES
88    UInt  ces_used;
89    // Pointers to the entries.  ces[0 .. ces_used-1] are non-NULL.
90    // ces[ces_used .. CACHE_N_ENTRIES-1] are NULL.
91    // The non-NULL entries may be arranged arbitrarily.  We expect to use
92    // a pseudo-LRU scheme though.
93    CEnt* ces[CACHE_N_ENTRIES];
94 };
95 
96 /* A frame.  The first 4 bytes of |data| give the kind of the frame,
97    and the rest of it is kind-specific data. */
98 typedef  struct { UChar* data; SizeT n_data; }  Frame;
99 
write_UInt_le(UChar * dst,UInt n)100 static void write_UInt_le ( /*OUT*/UChar* dst, UInt n )
101 {
102    Int i;
103    for (i = 0; i <= 3; i++) {
104       dst[i] = (UChar)(n & 0xFF);
105       n >>= 8;
106    }
107 }
108 
read_UInt_le(const UChar * src)109 static UInt read_UInt_le ( const UChar* src )
110 {
111    UInt r = 0;
112    Int i;
113    for (i = 3; i >= 0; i--) {
114       r <<= 8;
115       r += (UInt)src[i];
116    }
117    return r;
118 }
119 
write_ULong_le(UChar * dst,ULong n)120 static void write_ULong_le ( /*OUT*/UChar* dst, ULong n )
121 {
122    Int i;
123    for (i = 0; i <= 7; i++) {
124       dst[i] = (UChar)(n & 0xFF);
125       n >>= 8;
126    }
127 }
128 
read_ULong_le(const UChar * src)129 static ULong read_ULong_le ( const UChar* src )
130 {
131    ULong r = 0;
132    Int i;
133    for (i = 7; i >= 0; i--) {
134       r <<= 8;
135       r += (ULong)src[i];
136    }
137    return r;
138 }
139 
140 
141 /* Set |sd| to be blocking.  Returns True on success. */
set_blocking(int sd)142 static Bool set_blocking ( int sd )
143 {
144    Int res;
145    res = VG_(fcntl)(sd, VKI_F_GETFL, 0/*ignored*/);
146    if (res != -1)
147       res = VG_(fcntl)(sd, VKI_F_SETFL, res & ~VKI_O_NONBLOCK);
148    return (res != -1);
149 }
150 
151 /* Tries to read 'len' bytes from fd, blocking if necessary.  Assumes
152    fd has been set in blocking mode.  If it returns with the number of
153    bytes read < len, it means that either fd was closed, or there was
154    an error on it. */
my_read(Int fd,UChar * buf,Int len)155 static Int my_read ( Int fd, UChar* buf, Int len )
156 {
157    Int nRead = 0;
158    while (1) {
159       if (nRead == len) return nRead;
160       vg_assert(nRead < len);
161       Int nNeeded = len - nRead;
162       vg_assert(nNeeded > 0);
163       Int n = VG_(read)(fd, &buf[nRead], nNeeded);
164       if (n <= 0) return nRead; /* error or EOF */
165       nRead += n;
166    }
167 }
168 
169 /* Tries to write 'len' bytes to fd, blocking if necessary.  Assumes
170    fd has been set in blocking mode.  If it returns with the number of
171    bytes written < len, it means that either fd was closed, or there was
172    an error on it. */
my_write(Int fd,const UChar * buf,Int len)173 static Int my_write ( Int fd, const UChar* buf, Int len )
174 {
175    Int nWritten = 0;
176    while (1) {
177       if (nWritten == len) return nWritten;
178       vg_assert(nWritten < len);
179       Int nStillToDo = len - nWritten;
180       vg_assert(nStillToDo > 0);
181       Int n = VG_(write_socket)(fd, &buf[nWritten], nStillToDo);
182       if (n < 0) return nWritten; /* error or EOF */
183       nWritten += n;
184    }
185 }
186 
187 /* If we lost communication with the remote server, just give up.
188    Recovering is too difficult. */
give_up__comms_lost(void)189 static void give_up__comms_lost(void)
190 {
191    VG_(umsg)("\n");
192    VG_(umsg)(
193       "Valgrind: debuginfo reader: Lost communication with the remote\n");
194    VG_(umsg)(
195       "Valgrind: debuginfo server.  I can't recover.  Giving up.  Sorry.\n");
196    VG_(umsg)("\n");
197    VG_(exit)(1);
198    /*NOTREACHED*/
199 }
200 
give_up__image_overrun(void)201 static void give_up__image_overrun(void)
202 {
203    VG_(umsg)("\n");
204    VG_(umsg)(
205       "Valgrind: debuginfo reader: Possibly corrupted debuginfo file.\n");
206    VG_(umsg)(
207       "Valgrind: I can't recover.  Giving up.  Sorry.\n");
208    VG_(umsg)("\n");
209    VG_(exit)(1);
210    /*NOTREACHED*/
211 }
212 
213 /* "Do" a transaction: that is, send the given frame to the server and
214    return the frame it sends back.  Caller owns the resulting frame
215    and must free it.  A NULL return means the transaction failed for
216    some reason. */
do_transaction(Int sd,const Frame * req)217 static Frame* do_transaction ( Int sd, const Frame* req )
218 {
219    if (0) VG_(printf)("CLIENT: send %c%c%c%c\n",
220                       req->data[0], req->data[1], req->data[2], req->data[3]);
221 
222    /* What goes on the wire is:
223          adler(le32) n_data(le32) data[0 .. n_data-1]
224       where the checksum covers n_data as well as data[].
225    */
226    /* The initial Adler-32 value */
227    UInt adler = VG_(adler32)(0, NULL, 0);
228 
229    /* Fold in the length field, encoded as le32. */
230    UChar wr_first8[8];
231    write_UInt_le(&wr_first8[4], req->n_data);
232    adler = VG_(adler32)(adler, &wr_first8[4], 4);
233    /* Fold in the data values */
234    adler = VG_(adler32)(adler, req->data, req->n_data);
235    write_UInt_le(&wr_first8[0], adler);
236 
237    Int r = my_write(sd, &wr_first8[0], 8);
238    if (r != 8) return NULL;
239    vg_assert(req->n_data >= 4); // else ill formed -- no KIND field
240    r = my_write(sd, req->data, req->n_data);
241    if (r != req->n_data) return NULL;
242 
243    /* So, the request is sent.  Now get a request of the same format
244       out of the channel. */
245    UChar rd_first8[8];  // adler32; length32
246    r = my_read(sd, &rd_first8[0], 8);
247    if (r != 8) return NULL;
248    UInt rd_adler = read_UInt_le(&rd_first8[0]);
249    UInt rd_len   = read_UInt_le(&rd_first8[4]);
250    /* Allocate a Frame to hold the result data, and read into it. */
251    // Reject obviously-insane length fields.
252    if (rd_len < 4 || rd_len > 4*1024*1024) return NULL;
253    Frame* res = ML_(dinfo_zalloc)("di.do_transaction.1", sizeof(Frame));
254    res->n_data = rd_len;
255    res->data = ML_(dinfo_zalloc)("di.do_transaction.2", rd_len);
256    r = my_read(sd, res->data, res->n_data);
257    if (r != rd_len) return NULL;
258 
259    if (0) VG_(printf)("CLIENT: recv %c%c%c%c\n",
260                       res->data[0], res->data[1], res->data[2], res->data[3]);
261 
262    /* Compute the checksum for the received data, and check it. */
263    adler = VG_(adler32)(0, NULL, 0); // initial value
264    adler = VG_(adler32)(adler, &rd_first8[4], 4);
265    if (res->n_data > 0)
266       adler = VG_(adler32)(adler, res->data, res->n_data);
267 
268    if (adler/*computed*/ != rd_adler/*expected*/) return NULL;
269    return res;
270 }
271 
free_Frame(Frame * fr)272 static void free_Frame ( Frame* fr )
273 {
274    vg_assert(fr && fr->data);
275    ML_(dinfo_free)(fr->data);
276    ML_(dinfo_free)(fr);
277 }
278 
mk_Frame_noargs(const HChar * tag)279 static Frame* mk_Frame_noargs ( const HChar* tag )
280 {
281    vg_assert(VG_(strlen)(tag) == 4);
282    Frame* f = ML_(dinfo_zalloc)("di.mFn.1", sizeof(Frame));
283    f->n_data = 4;
284    f->data = ML_(dinfo_zalloc)("di.mFn.2", f->n_data);
285    VG_(memcpy)(&f->data[0], tag, 4);
286    return f;
287 }
288 
mk_Frame_le64_le64_le64(const HChar * tag,ULong n1,ULong n2,ULong n3)289 static Frame* mk_Frame_le64_le64_le64 ( const HChar* tag,
290                                         ULong n1, ULong n2, ULong n3 )
291 {
292    vg_assert(VG_(strlen)(tag) == 4);
293    Frame* f = ML_(dinfo_zalloc)("di.mFlll.1", sizeof(Frame));
294    f->n_data = 4 + 3*8;
295    f->data = ML_(dinfo_zalloc)("di.mFlll.2", f->n_data);
296    VG_(memcpy)(&f->data[0], tag, 4);
297    write_ULong_le(&f->data[4 + 0*8], n1);
298    write_ULong_le(&f->data[4 + 1*8], n2);
299    write_ULong_le(&f->data[4 + 2*8], n3);
300    return f;
301 }
302 
mk_Frame_asciiz(const HChar * tag,const HChar * str)303 static Frame* mk_Frame_asciiz ( const HChar* tag, const HChar* str )
304 {
305    vg_assert(VG_(strlen)(tag) == 4);
306    Frame* f = ML_(dinfo_zalloc)("di.mFa.1", sizeof(Frame));
307    SizeT n_str = VG_(strlen)(str);
308    f->n_data = 4 + n_str + 1;
309    f->data = ML_(dinfo_zalloc)("di.mFa.2", f->n_data);
310    VG_(memcpy)(&f->data[0], tag, 4);
311    VG_(memcpy)(&f->data[4], str, n_str);
312    vg_assert(f->data[4 + n_str] == 0);
313    return f;
314 }
315 
parse_Frame_le64(const Frame * fr,const HChar * tag,ULong * n1)316 static Bool parse_Frame_le64 ( const Frame* fr, const HChar* tag,
317                                /*OUT*/ULong* n1 )
318 {
319    vg_assert(VG_(strlen)(tag) == 4);
320    if (!fr || !fr->data) return False;
321    if (fr->n_data < 4) return False;
322    if (VG_(memcmp)(&fr->data[0], tag, 4) != 0) return False;
323    if (fr->n_data != 4 + 1*8) return False;
324    *n1 = read_ULong_le(&fr->data[4 + 0*8]);
325    return True;
326 }
327 
parse_Frame_le64_le64(const Frame * fr,const HChar * tag,ULong * n1,ULong * n2)328 static Bool parse_Frame_le64_le64 ( const Frame* fr, const HChar* tag,
329                                     /*OUT*/ULong* n1, /*OUT*/ULong* n2 )
330 {
331    vg_assert(VG_(strlen)(tag) == 4);
332    if (!fr || !fr->data) return False;
333    if (fr->n_data < 4) return False;
334    if (VG_(memcmp)(&fr->data[0], tag, 4) != 0) return False;
335    if (fr->n_data != 4 + 2*8) return False;
336    *n1 = read_ULong_le(&fr->data[4 + 0*8]);
337    *n2 = read_ULong_le(&fr->data[4 + 1*8]);
338    return True;
339 }
340 
parse_Frame_asciiz(const Frame * fr,const HChar * tag,UChar ** str)341 static Bool parse_Frame_asciiz ( const Frame* fr, const HChar* tag,
342                                  /*OUT*/UChar** str )
343 {
344    vg_assert(VG_(strlen)(tag) == 4);
345    if (!fr || !fr->data) return False;
346    if (fr->n_data < 4) return False;
347    if (VG_(memcmp)(&fr->data[0], tag, 4) != 0) return False;
348    if (fr->n_data < 5) return False; // else there isn't even enough
349                                      // space for the terminating zero
350    /* Find the terminating zero and ensure it's right at the end
351       of the data.  If not, the frame is malformed. */
352    SizeT i = 4;
353    while (True) {
354       if (i >= fr->n_data) break;
355       if (fr->data[i] == 0) break;
356       i++;
357    }
358    vg_assert(i <= fr->n_data);
359    if (i == fr->n_data-1 && fr->data[i] == 0) {
360       *str = &fr->data[4];
361       return True;
362    } else {
363       return False;
364    }
365 }
366 
parse_Frame_le64_le64_le64_bytes(const Frame * fr,const HChar * tag,ULong * n1,ULong * n2,ULong * n3,UChar ** data,ULong * n_data)367 static Bool parse_Frame_le64_le64_le64_bytes (
368                const Frame* fr, const HChar* tag,
369                /*OUT*/ULong* n1, /*OUT*/ULong* n2, /*OUT*/ULong* n3,
370                /*OUT*/UChar** data, /*OUT*/ULong* n_data
371             )
372 {
373    vg_assert(VG_(strlen)(tag) == 4);
374    if (!fr || !fr->data) return False;
375    if (fr->n_data < 4) return False;
376    if (VG_(memcmp)(&fr->data[0], tag, 4) != 0) return False;
377    if (fr->n_data < 4 + 3*8) return False;
378    *n1 = read_ULong_le(&fr->data[4 + 0*8]);
379    *n2 = read_ULong_le(&fr->data[4 + 1*8]);
380    *n3 = read_ULong_le(&fr->data[4 + 2*8]);
381    *data   = &fr->data[4 + 3*8];
382    *n_data = fr->n_data - (4 + 3*8);
383    vg_assert(fr->n_data >= 4 + 3*8);
384    return True;
385 }
386 
block_round_down(DiOffT i)387 static DiOffT block_round_down ( DiOffT i )
388 {
389    return i & ((DiOffT)~(CACHE_ENTRY_SIZE-1));
390 }
391 
392 /* Is this offset inside this CEnt? */
is_in_CEnt(const CEnt * cent,DiOffT off)393 static inline Bool is_in_CEnt ( const CEnt* cent, DiOffT off )
394 {
395    /* This assertion is checked by set_CEnt, so checking it here has
396       no benefit, whereas skipping it does remove it from the hottest
397       path. */
398    /* vg_assert(cent->used > 0 && cent->used <= CACHE_ENTRY_SIZE); */
399    /* What we want to return is:
400         cent->off <= off && off < cent->off + cent->used;
401       This is however a very hot path, so here's alternative that uses
402       only one conditional branch, using the following transformation,
403       where all quantities are unsigned:
404               x >= LO && x < LO+N
405          -->  x-LO >= 0 && x-LO < LO+N-LO
406          -->  x-LO >= 0 && x-LO < N
407          -->  x-LO < N
408       This is however only valid when the original bounds, that is, LO
409       .. LO+N-1, do not wrap around the end of the address space.  That
410       is, we require that LO <= LO+N-1.  But that's OK .. we don't
411       expect wraparounds in CEnts or for that matter any object
412       allocated from C-land.  See Hacker's Delight, Chapter 4.1,
413       "Checking Bounds of Integers", for more details.
414    */
415    return off - cent->off < cent->used;
416 }
417 
418 /* Allocate a new CEnt, connect it to |img|, and return its index. */
alloc_CEnt(DiImage * img)419 static UInt alloc_CEnt ( DiImage* img )
420 {
421    vg_assert(img);
422    vg_assert(img->ces_used < CACHE_N_ENTRIES);
423    UInt entNo = img->ces_used;
424    img->ces_used++;
425    vg_assert(img->ces[entNo] == NULL);
426    img->ces[entNo] = ML_(dinfo_zalloc)("di.alloc_CEnt.1", sizeof(CEnt));
427    return entNo;
428 }
429 
430 /* Move the given entry to the top and slide those above it down by 1,
431    to make space. */
move_CEnt_to_top(DiImage * img,UInt entNo)432 static void move_CEnt_to_top ( DiImage* img, UInt entNo )
433 {
434    vg_assert(img->ces_used <= CACHE_N_ENTRIES);
435    vg_assert(entNo > 0 && entNo < img->ces_used);
436    CEnt* tmp = img->ces[entNo];
437    while (entNo > 0) {
438       img->ces[entNo] = img->ces[entNo-1];
439       entNo--;
440    }
441    img->ces[0] = tmp;
442 }
443 
444 /* Set the given entry so that it has a chunk of the file containing
445    the given offset.  It is this function that brings data into the
446    cache, either by reading the local file or pulling it from the
447    remote server. */
set_CEnt(const DiImage * img,UInt entNo,DiOffT off)448 static void set_CEnt ( const DiImage* img, UInt entNo, DiOffT off )
449 {
450    SizeT len;
451    DiOffT off_orig = off;
452    vg_assert(img);
453    vg_assert(img->ces_used <= CACHE_N_ENTRIES);
454    vg_assert(entNo >= 0 && entNo < img->ces_used);
455    vg_assert(off < img->size);
456    vg_assert(img->ces[entNo] != NULL);
457    /* Compute [off, +len) as the slice we are going to read. */
458    off = block_round_down(off);
459    len = img->size - off;
460    if (len > CACHE_ENTRY_SIZE) len = CACHE_ENTRY_SIZE;
461    /* It is conceivable that the 'len > 0' bit could fail if we make
462       an image with a zero sized file.  But then no 'get' request on
463       that image would be valid. */
464    vg_assert(len > 0 && len <= CACHE_ENTRY_SIZE);
465    vg_assert(off + len <= img->size);
466    vg_assert(off <= off_orig && off_orig < off+len);
467    /* So, read  off .. off+len-1  into the entry. */
468    CEnt* ce = img->ces[entNo];
469 
470    if (0) {
471       static UInt t_last = 0;
472       static ULong nread = 0;
473       UInt now = VG_(read_millisecond_timer)();
474       UInt delay = now - t_last;
475       t_last = now;
476       nread += len;
477       VG_(printf)("XXXXXXXX (tot %'llu)  read %'lu  offset %'llu  delay %'u\n",
478                   nread, len, off, delay);
479    }
480 
481    if (img->source.is_local) {
482       // Simple: just read it
483       SysRes sr = VG_(pread)(img->source.fd, &ce->data[0], (Int)len, off);
484       vg_assert(!sr_isError(sr));
485    } else {
486       // Not so simple: poke the server
487       vg_assert(img->source.session_id > 0);
488       Frame* req
489          = mk_Frame_le64_le64_le64("READ", img->source.session_id, off, len);
490       Frame* res = do_transaction(img->source.fd, req);
491       free_Frame(req); req = NULL;
492       if (!res) goto server_fail;
493       ULong  rx_session_id = 0, rx_off = 0, rx_len = 0, rx_zdata_len = 0;
494       UChar* rx_data = NULL;
495       /* Pretty confusing.  rx_sessionid, rx_off and rx_len are copies
496          of the values that we requested in the READ frame just above,
497          so we can be sure that the server is responding to the right
498          request.  It just copies them from the request into the
499          response.  rx_data is the actual data, and rx_zdata_len is
500          its compressed length.  Hence rx_len must equal len, but
501          rx_zdata_len can be different -- smaller, hopefully.. */
502       if (!parse_Frame_le64_le64_le64_bytes
503           (res, "RDOK", &rx_session_id, &rx_off,
504                         &rx_len, &rx_data, &rx_zdata_len))
505          goto server_fail;
506       if (rx_session_id != img->source.session_id
507           || rx_off != off || rx_len != len || rx_data == NULL)
508          goto server_fail;
509 
510       //VG_(memcpy)(&ce->data[0], rx_data, len);
511       // Decompress into the destination buffer
512       // Tell the lib the max number of output bytes it can write.
513       // After the call, this holds the number of bytes actually written,
514       // and it's an error if it is different.
515       lzo_uint out_len = len;
516       Int lzo_rc = lzo1x_decompress_safe(rx_data, rx_zdata_len,
517                                          &ce->data[0], &out_len,
518                                          NULL);
519       Bool ok = lzo_rc == LZO_E_OK && out_len == len;
520       if (!ok) goto server_fail;
521 
522       free_Frame(res); res = NULL;
523       goto end_of_else_clause;
524      server_fail:
525       /* The server screwed up somehow.  Now what? */
526       if (res) {
527          UChar* reason = NULL;
528          if (parse_Frame_asciiz(res, "FAIL", &reason)) {
529             VG_(umsg)("set_CEnt (reading data from DI server): fail: "
530                       "%s\n", reason);
531          } else {
532             VG_(umsg)("set_CEnt (reading data from DI server): fail: "
533                       "unknown reason\n");
534          }
535          free_Frame(res); res = NULL;
536       } else {
537          VG_(umsg)("set_CEnt (reading data from DI server): fail: "
538                    "server unexpectedly closed the connection\n");
539       }
540       give_up__comms_lost();
541       /* NOTREACHED */
542       vg_assert(0);
543      end_of_else_clause:
544       {}
545    }
546 
547    ce->off  = off;
548    ce->used = len;
549    vg_assert(ce->used > 0 && ce->used <= CACHE_ENTRY_SIZE);
550 }
551 
552 __attribute__((noinline))
get_slowcase(DiImage * img,DiOffT off)553 static UChar get_slowcase ( DiImage* img, DiOffT off )
554 {
555    /* Stay sane .. */
556    vg_assert(off < img->size);
557    vg_assert(img->ces_used <= CACHE_N_ENTRIES);
558    UInt i;
559    /* Start the search at entry 1, since the fast-case function
560       checked slot zero already. */
561    for (i = 1; i < img->ces_used; i++) {
562       vg_assert(img->ces[i]);
563       if (is_in_CEnt(img->ces[i], off))
564          break;
565    }
566    vg_assert(i <= img->ces_used);
567    if (i == img->ces_used) {
568       /* It's not in any entry.  Either allocate a new entry or
569          recycle the LRU one. */
570       if (img->ces_used == CACHE_N_ENTRIES) {
571          /* All entries in use.  Recycle the (ostensibly) LRU one. */
572          set_CEnt(img, CACHE_N_ENTRIES-1, off);
573          i = CACHE_N_ENTRIES-1;
574       } else {
575          /* Allocate a new one, and fill it in. */
576          UInt entNo = alloc_CEnt(img);
577          set_CEnt(img, entNo, off);
578          i = entNo;
579       }
580    } else {
581       /* We found it at position 'i'. */
582       vg_assert(i > 0);
583    }
584    if (i > 0) {
585       move_CEnt_to_top(img, i);
586       i = 0;
587    }
588    vg_assert(is_in_CEnt(img->ces[i], off));
589    return img->ces[i]->data[ off - img->ces[i]->off ];
590 }
591 
592 // This is called a lot, so do the usual fast/slow split stuff on it. */
get(DiImage * img,DiOffT off)593 static inline UChar get ( DiImage* img, DiOffT off )
594 {
595    /* Most likely case is, it's in the ces[0] position. */
596    /* ML_(img_from_local_file) requests a read for ces[0] when
597       creating the image.  Hence slot zero is always non-NULL, so we
598       can skip this test. */
599    if (LIKELY(/* img->ces[0] != NULL && */
600               is_in_CEnt(img->ces[0], off))) {
601       return img->ces[0]->data[ off - img->ces[0]->off ];
602    }
603    /* Else we'll have to fish around for it. */
604    return get_slowcase(img, off);
605 }
606 
607 /* Create an image from a file in the local filesystem.  This is
608    relatively straightforward. */
ML_(img_from_local_file)609 DiImage* ML_(img_from_local_file)(const HChar* fullpath)
610 {
611    SysRes         fd;
612    struct vg_stat stat_buf;
613    DiOffT         size;
614 
615    fd = VG_(open)(fullpath, VKI_O_RDONLY, 0);
616    if (sr_isError(fd))
617       return NULL;
618 
619    if (VG_(fstat)(sr_Res(fd), &stat_buf) != 0) {
620       VG_(close)(sr_Res(fd));
621       return NULL;
622    }
623 
624    size = stat_buf.size;
625    if (size == 0 || size == DiOffT_INVALID
626        || /* size is unrepresentable as a SizeT */
627           size != (DiOffT)(SizeT)(size)) {
628       VG_(close)(sr_Res(fd));
629       return NULL;
630    }
631 
632    DiImage* img = ML_(dinfo_zalloc)("di.image.ML_iflf.1", sizeof(DiImage));
633    img->source.is_local = True;
634    img->source.fd       = sr_Res(fd);
635    img->size            = size;
636    img->ces_used        = 0;
637    img->source.name     = ML_(dinfo_strdup)("di.image.ML_iflf.2", fullpath);
638    /* img->ces is already zeroed out */
639    vg_assert(img->source.fd >= 0);
640 
641    /* Force the zeroth entry to be the first chunk of the file.
642       That's likely to be the first part that's requested anyway, and
643       loading it at this point forcing img->cent[0] to always be
644       non-empty, thereby saving us an is-it-empty check on the fast
645       path in get(). */
646    UInt entNo = alloc_CEnt(img);
647    vg_assert(entNo == 0);
648    set_CEnt(img, 0, 0);
649 
650    return img;
651 }
652 
653 
654 /* Create an image from a file on a remote debuginfo server.  This is
655    more complex.  There are lots of ways in which it can fail. */
ML_(img_from_di_server)656 DiImage* ML_(img_from_di_server)(const HChar* filename,
657                                  const HChar* serverAddr)
658 {
659    if (filename == NULL || serverAddr == NULL)
660       return NULL;
661 
662    /* The filename must be a plain filename -- no slashes at all. */
663    if (VG_(strchr)(filename, '/') != NULL)
664       return NULL;
665 
666    /* Try to connect to the server.  A side effect of this is to parse
667       and reject, if syntactically invalid, |serverAddr|.  Reasons why
668       this could fail:
669       - serverAddr is not of the form d.d.d.d:d or d.d.d.d
670       - attempt to connect to that address:port failed
671    */
672    Int sd = VG_(connect_via_socket)(serverAddr);
673    if (sd < 0)
674       return NULL;
675    if (!set_blocking(sd))
676       return NULL;
677    Int one = 1;
678    Int sr = VG_(setsockopt)(sd, VKI_IPPROTO_TCP, VKI_TCP_NODELAY,
679                             &one, sizeof(one));
680    vg_assert(sr == 0);
681 
682    /* Ok, we got a connection.  Ask it for version string, so as to be
683       reasonably sure we're talking to an instance of
684       auxprogs/valgrind-di-server and not to some other random program
685       that happens to be listening on that port. */
686    Frame* req = mk_Frame_noargs("VERS");
687    Frame* res = do_transaction(sd, req);
688    if (res == NULL)
689       goto fail; // do_transaction failed?!
690    UChar* vstr = NULL;
691    if (!parse_Frame_asciiz(res, "VEOK", &vstr))
692       goto fail; // unexpected response kind, or invalid ID string
693    vg_assert(vstr);
694    if (VG_(strcmp)("Valgrind Debuginfo Server, Version 1",
695                    (const HChar*)vstr) != 0)
696       goto fail; // wrong version string
697    free_Frame(req);
698    free_Frame(res);
699    req = NULL;
700    res = NULL;
701 
702    /* Server seems plausible.  Present it with the name of the file we
703       want and see if it'll give us back a session ID for it. */
704    req = mk_Frame_asciiz("OPEN", filename);
705    res = do_transaction(sd, req);
706    if (res == NULL)
707       goto fail;
708    ULong session_id = 0, size = 0;
709    if (!parse_Frame_le64_le64(res, "OPOK", &session_id, &size))
710       goto fail;
711    free_Frame(req);
712    free_Frame(res);
713    req = NULL;
714    res = NULL;
715 
716    /* We have a session ID.  We're ready to roll. */
717    DiImage* img = ML_(dinfo_zalloc)("di.image.ML_ifds.1", sizeof(DiImage));
718    img->source.is_local   = False;
719    img->source.fd         = sd;
720    img->source.session_id = session_id;
721    img->size              = size;
722    img->ces_used          = 0;
723    img->source.name       = ML_(dinfo_zalloc)("di.image.ML_ifds.2",
724                                               20 + VG_(strlen)(filename)
725                                                  + VG_(strlen)(serverAddr));
726    VG_(sprintf)(img->source.name, "%s at %s", filename, serverAddr);
727 
728    /* img->ces is already zeroed out */
729    vg_assert(img->source.fd >= 0);
730 
731    /* See comment on equivalent bit in ML_(img_from_local_file) for
732       rationale. */
733    UInt entNo = alloc_CEnt(img);
734    vg_assert(entNo == 0);
735    set_CEnt(img, 0, 0);
736 
737    return img;
738 
739   fail:
740    free_Frame(req);
741    if (res) {
742       UChar* reason = NULL;
743       if (parse_Frame_asciiz(res, "FAIL", &reason)) {
744          // HACK: if it's just telling us that the file can't
745          // be opened, don't print it, else we'll get flooded with
746          // such complaints, one for each main object for which there
747          // isn't a debuginfo file on the server.
748          if (0 != VG_(strcmp)((const HChar*)reason, "OPEN: cannot open file"))
749             VG_(umsg)("ML_(img_from_di_server): fail: %s\n", reason);
750       } else {
751          VG_(umsg)("ML_(img_from_di_server): fail: unknown reason\n");
752       }
753       free_Frame(res);
754    }
755    VG_(close)(sd);
756    return NULL;
757 }
758 
ML_(img_done)759 void ML_(img_done)(DiImage* img)
760 {
761    vg_assert(img);
762    if (img->source.is_local) {
763       /* Close the file; nothing else to do. */
764       vg_assert(img->source.session_id == 0);
765       VG_(close)(img->source.fd);
766    } else {
767       /* Close the socket.  The server can detect this and will scrub
768          the connection when it happens, so there's no need to tell it
769          explicitly by sending it a "CLOSE" message, or any such. */
770       vg_assert(img->source.session_id != 0);
771       VG_(close)(img->source.fd);
772    }
773 
774    /* Free up the cache entries, ultimately |img| itself. */
775    UInt i;
776    vg_assert(img->ces_used <= CACHE_N_ENTRIES);
777    for (i = 0; i < img->ces_used; i++) {
778       ML_(dinfo_free)(img->ces[i]);
779    }
780    /* Take the opportunity to sanity check the rest. */
781    for (i = i; i < img->ces_used; i++) {
782       vg_assert(img->ces[i] == NULL);
783    }
784    ML_(dinfo_free)(img->source.name);
785    ML_(dinfo_free)(img);
786 }
787 
ML_(img_size)788 DiOffT ML_(img_size)(const DiImage* img)
789 {
790    vg_assert(img);
791    return img->size;
792 }
793 
ML_(img_valid)794 inline Bool ML_(img_valid)(const DiImage* img, DiOffT offset, SizeT size)
795 {
796    vg_assert(img);
797    vg_assert(offset != DiOffT_INVALID);
798    return img->size > 0 && offset + size <= (DiOffT)img->size;
799 }
800 
801 __attribute__((noinline))
ensure_valid_failed(const DiImage * img,DiOffT offset,SizeT size,const HChar * caller)802 static void ensure_valid_failed (const DiImage* img, DiOffT offset, SizeT size,
803                                  const HChar* caller)
804 {
805    VG_(umsg)("Valgrind: debuginfo reader: ensure_valid failed:\n");
806    VG_(umsg)("Valgrind:   during call to %s\n", caller);
807    VG_(umsg)("Valgrind:   request for range [%llu, +%lu) exceeds\n",
808              offset, size);
809    VG_(umsg)("Valgrind:   valid image size of %lu for image:\n",
810              img->size);
811    VG_(umsg)("Valgrind:   \"%s\"\n", img->source.name);
812    give_up__image_overrun();
813 }
814 
815 /* Check the given range is valid, and if not, shut down the system.
816    An invalid range would imply that we're trying to read outside the
817    image, which normally means the image is corrupted somehow, or the
818    caller is buggy.  Recovering is too complex, and we have
819    probably-corrupt debuginfo, so just give up. */
ensure_valid(const DiImage * img,DiOffT offset,SizeT size,const HChar * caller)820 static void ensure_valid(const DiImage* img, DiOffT offset, SizeT size,
821                          const HChar* caller)
822 {
823    if (LIKELY(ML_(img_valid)(img, offset, size)))
824       return;
825    else
826       ensure_valid_failed(img, offset, size, caller);
827 }
828 
829 
ML_(img_get)830 void ML_(img_get)(/*OUT*/void* dst,
831                   DiImage* img, DiOffT offset, SizeT size)
832 {
833    vg_assert(img);
834    vg_assert(size > 0);
835    ensure_valid(img, offset, size, "ML_(img_get)");
836    SizeT i;
837    for (i = 0; i < size; i++) {
838       ((UChar*)dst)[i] = get(img, offset + i);
839    }
840 }
841 
ML_(img_get_some)842 SizeT ML_(img_get_some)(/*OUT*/void* dst,
843                         DiImage* img, DiOffT offset, SizeT size)
844 {
845    vg_assert(img);
846    vg_assert(size > 0);
847    ensure_valid(img, offset, size, "ML_(img_get_some)");
848    UChar* dstU = (UChar*)dst;
849    /* Use |get| in the normal way to get the first byte of the range.
850       This guarantees to put the cache entry containing |offset| in
851       position zero. */
852    dstU[0] = get(img, offset);
853    /* Now just read as many bytes as we can (or need) directly out of
854       entry zero, without bothering to call |get| each time. */
855    const CEnt* ce = img->ces[0];
856    vg_assert(ce && ce->used >= 1);
857    vg_assert(is_in_CEnt(ce, offset));
858    SizeT nToCopy = size - 1;
859    SizeT nAvail  = (SizeT)(ce->used - (offset + 1 - ce->off));
860    vg_assert(nAvail >= 0 && nAvail <= ce->used-1);
861    if (nAvail < nToCopy) nToCopy = nAvail;
862    VG_(memcpy)(&dstU[1], &ce->data[offset + 1 - ce->off], nToCopy);
863    return nToCopy + 1;
864 }
865 
866 
ML_(img_strlen)867 SizeT ML_(img_strlen)(DiImage* img, DiOffT off)
868 {
869    ensure_valid(img, off, 1, "ML_(img_strlen)");
870    SizeT i = 0;
871    while (get(img, off + i) != 0) i++;
872    return i;
873 }
874 
ML_(img_strdup)875 HChar* ML_(img_strdup)(DiImage* img, const HChar* cc, DiOffT offset)
876 {
877    ensure_valid(img, offset, 1, "ML_(img_strdup)");
878    SizeT  len = ML_(img_strlen)(img, offset);
879    HChar* res = ML_(dinfo_zalloc)(cc, len+1);
880    SizeT  i;
881    for (i = 0; i < len; i++) {
882       res[i] = get(img, offset+i);
883    }
884    vg_assert(res[len] == 0);
885    return res;
886 }
887 
ML_(img_strcmp)888 Int ML_(img_strcmp)(DiImage* img, DiOffT off1, DiOffT off2)
889 {
890    ensure_valid(img, off1, 1, "ML_(img_strcmp)(first arg)");
891    ensure_valid(img, off2, 1, "ML_(img_strcmp)(second arg)");
892    while (True) {
893       UChar c1 = get(img, off1);
894       UChar c2 = get(img, off2);
895       if (c1 < c2) return -1;
896       if (c1 > c2) return 1;
897       if (c1 == 0) return 0;
898       off1++; off2++;
899    }
900 }
901 
ML_(img_strcmp_c)902 Int ML_(img_strcmp_c)(DiImage* img, DiOffT off1, const HChar* str2)
903 {
904    ensure_valid(img, off1, 1, "ML_(img_strcmp_c)");
905    while (True) {
906       UChar c1 = get(img, off1);
907       UChar c2 = *(const UChar*)str2;
908       if (c1 < c2) return -1;
909       if (c1 > c2) return 1;
910       if (c1 == 0) return 0;
911       off1++; str2++;
912    }
913 }
914 
ML_(img_get_UChar)915 UChar ML_(img_get_UChar)(DiImage* img, DiOffT offset)
916 {
917    ensure_valid(img, offset, 1, "ML_(img_get_UChar)");
918    return get(img, offset);
919 }
920 
ML_(img_get_UShort)921 UShort ML_(img_get_UShort)(DiImage* img, DiOffT offset)
922 {
923    UShort r;
924    ML_(img_get)(&r, img, offset, sizeof(r));
925    return r;
926 }
927 
ML_(img_get_UInt)928 UInt ML_(img_get_UInt)(DiImage* img, DiOffT offset)
929 {
930    UInt r;
931    ML_(img_get)(&r, img, offset, sizeof(r));
932    return r;
933 }
934 
ML_(img_get_ULong)935 ULong ML_(img_get_ULong)(DiImage* img, DiOffT offset)
936 {
937    ULong r;
938    ML_(img_get)(&r, img, offset, sizeof(r));
939    return r;
940 }
941 
942 
943 /*
944  * This routine for calculating the CRC for a separate debug file
945  * is GPLed code borrowed from GNU binutils.
946  */
ML_(img_calc_gnu_debuglink_crc32)947 UInt ML_(img_calc_gnu_debuglink_crc32)(DiImage* img)
948 {
949   static const UInt crc32_table[256] =
950     {
951       0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419,
952       0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4,
953       0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07,
954       0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
955       0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856,
956       0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
957       0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4,
958       0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
959       0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3,
960       0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a,
961       0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599,
962       0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
963       0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190,
964       0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f,
965       0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e,
966       0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
967       0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed,
968       0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
969       0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3,
970       0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
971       0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a,
972       0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5,
973       0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010,
974       0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
975       0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17,
976       0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6,
977       0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615,
978       0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
979       0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344,
980       0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
981       0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a,
982       0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
983       0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1,
984       0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c,
985       0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef,
986       0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
987       0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe,
988       0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31,
989       0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c,
990       0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
991       0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b,
992       0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
993       0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1,
994       0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
995       0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278,
996       0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7,
997       0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66,
998       0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
999       0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605,
1000       0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8,
1001       0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b,
1002       0x2d02ef8d
1003     };
1004 
1005    vg_assert(img);
1006 
1007    /* If the image is local, calculate the CRC here directly.  If it's
1008       remote, forward the request to the server. */
1009    if (img->source.is_local) {
1010       /* Work through the image in 1 KB chunks. */
1011       UInt   crc      = 0xFFFFFFFF;
1012       DiOffT img_szB  = ML_(img_size)(img);
1013       DiOffT curr_off = 0;
1014       while (1) {
1015          vg_assert(curr_off >= 0 && curr_off <= img_szB);
1016          if (curr_off == img_szB) break;
1017          DiOffT avail = img_szB - curr_off;
1018          vg_assert(avail > 0 && avail <= img_szB);
1019          if (avail > 1024) avail = 1024;
1020          UChar buf[1024];
1021          SizeT nGot = ML_(img_get_some)(buf, img, curr_off, avail);
1022          vg_assert(nGot >= 1 && nGot <= avail);
1023          UInt i;
1024          for (i = 0; i < (UInt)nGot; i++)
1025             crc = crc32_table[(crc ^ buf[i]) & 0xff] ^ (crc >> 8);
1026          curr_off += nGot;
1027       }
1028       return ~crc & 0xFFFFFFFF;
1029    } else {
1030       Frame* req = mk_Frame_noargs("CRC3");
1031       Frame* res = do_transaction(img->source.fd, req);
1032       if (!res) goto remote_crc_fail;
1033       ULong crc32 = 0;
1034       if (!parse_Frame_le64(res, "CROK", &crc32)) goto remote_crc_fail;
1035       if ((crc32 & ~0xFFFFFFFFULL) != 0) goto remote_crc_fail;
1036       free_Frame(req);
1037       free_Frame(res);
1038       return (UInt)crc32;
1039      remote_crc_fail:
1040 
1041       // XXXX common this up with the READ diagnostic cases
1042       if (res) {
1043          UChar* reason = NULL;
1044          if (parse_Frame_asciiz(res, "FAIL", &reason)) {
1045             VG_(umsg)("img_calc_gnu_debuglink_crc32: fail: "
1046                       "%s\n", reason);
1047          } else {
1048             VG_(umsg)("img_calc_gnu_debuglink_crc32: fail: "
1049                       "unknown reason\n");
1050          }
1051       } else {
1052          VG_(umsg)("img_calc_gnu_debuglink_crc32: fail: "
1053                    "server unexpectedly closed the connection\n");
1054       }
1055 
1056       if (req) free_Frame(req);
1057       if (res) free_Frame(res);
1058       // FIXME: now what?
1059       give_up__comms_lost();
1060       /* NOTREACHED */
1061       vg_assert(0);
1062    }
1063    /*NOTREACHED*/
1064    vg_assert(0);
1065 }
1066 
1067 ////////////////////////////////////////////////////
1068 #include "minilzo-inl.c"
1069 
1070 /*--------------------------------------------------------------------*/
1071 /*--- end                                                  image.c ---*/
1072 /*--------------------------------------------------------------------*/
1073