1 // merge.h -- handle section merging for gold  -*- C++ -*-
2 
3 // Copyright (C) 2006-2014 Free Software Foundation, Inc.
4 // Written by Ian Lance Taylor <iant@google.com>.
5 
6 // This file is part of gold.
7 
8 // This program is free software; you can redistribute it and/or modify
9 // it under the terms of the GNU General Public License as published by
10 // the Free Software Foundation; either version 3 of the License, or
11 // (at your option) any later version.
12 
13 // This program is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 // GNU General Public License for more details.
17 
18 // You should have received a copy of the GNU General Public License
19 // along with this program; if not, write to the Free Software
20 // Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
21 // MA 02110-1301, USA.
22 
23 #ifndef GOLD_MERGE_H
24 #define GOLD_MERGE_H
25 
26 #include <climits>
27 #include <map>
28 #include <vector>
29 
30 #include "stringpool.h"
31 #include "output.h"
32 
33 namespace gold
34 {
35 
36 class Merge_map;
37 
38 // For each object with merge sections, we store an Object_merge_map.
39 // This is used to map locations in input sections to a merged output
40 // section.  The output section itself is not recorded here--it can be
41 // found in the output_sections_ field of the Object.
42 
43 class Object_merge_map
44 {
45  public:
Object_merge_map()46   Object_merge_map()
47     : first_shnum_(-1U), first_map_(),
48       second_shnum_(-1U), second_map_(),
49       section_merge_maps_()
50   { }
51 
52   ~Object_merge_map();
53 
54   // Add a mapping for MERGE_MAP, for the bytes from OFFSET to OFFSET
55   // + LENGTH in the input section SHNDX to OUTPUT_OFFSET in the
56   // output section.  An OUTPUT_OFFSET of -1 means that the bytes are
57   // discarded.  OUTPUT_OFFSET is relative to the start of the merged
58   // data in the output section.
59   void
60   add_mapping(const Merge_map*, unsigned int shndx, section_offset_type offset,
61 	      section_size_type length, section_offset_type output_offset);
62 
63   // Get the output offset for an input address.  MERGE_MAP is the map
64   // we are looking for, or NULL if we don't care.  The input address
65   // is at offset OFFSET in section SHNDX.  This sets *OUTPUT_OFFSET
66   // to the offset in the output section; this will be -1 if the bytes
67   // are not being copied to the output.  This returns true if the
68   // mapping is known, false otherwise.  *OUTPUT_OFFSET is relative to
69   // the start of the merged data in the output section.
70   bool
71   get_output_offset(const Merge_map*, unsigned int shndx,
72 		    section_offset_type offset,
73 		    section_offset_type* output_offset);
74 
75   // Return whether this is the merge map for section SHNDX.
76   bool
77   is_merge_section_for(const Merge_map*, unsigned int shndx);
78 
79   // Initialize an mapping from input offsets to output addresses for
80   // section SHNDX.  STARTING_ADDRESS is the output address of the
81   // merged section.
82   template<int size>
83   void
84   initialize_input_to_output_map(
85       unsigned int shndx,
86       typename elfcpp::Elf_types<size>::Elf_Addr starting_address,
87       Unordered_map<section_offset_type,
88 		    typename elfcpp::Elf_types<size>::Elf_Addr>*);
89 
90  private:
91   // Map input section offsets to a length and an output section
92   // offset.  An output section offset of -1 means that this part of
93   // the input section is being discarded.
94   struct Input_merge_entry
95   {
96     // The offset in the input section.
97     section_offset_type input_offset;
98     // The length.
99     section_size_type length;
100     // The offset in the output section.
101     section_offset_type output_offset;
102   };
103 
104   // A less-than comparison routine for Input_merge_entry.
105   struct Input_merge_compare
106   {
107     bool
operatorInput_merge_compare108     operator()(const Input_merge_entry& i1, const Input_merge_entry& i2) const
109     { return i1.input_offset < i2.input_offset; }
110   };
111 
112   // A list of entries for a particular input section.
113   struct Input_merge_map
114   {
115     typedef std::vector<Input_merge_entry> Entries;
116 
117     // We store these with the Relobj, and we look them up by input
118     // section.  It is possible to have two different merge maps
119     // associated with a single output section.  For example, this
120     // happens routinely with .rodata, when merged string constants
121     // and merged fixed size constants are both put into .rodata.  The
122     // output offset that we store is not the offset from the start of
123     // the output section; it is the offset from the start of the
124     // merged data in the output section.  That means that the caller
125     // is going to add the offset of the merged data within the output
126     // section, which means that the caller needs to know which set of
127     // merged data it found the entry in.  So it's not enough to find
128     // this data based on the input section and the output section; we
129     // also have to find it based on a set of merged data in the
130     // output section.  In order to verify that we are looking at the
131     // right data, we store a pointer to the Merge_map here, and we
132     // pass in a pointer when looking at the data.  If we are asked to
133     // look up information for a different Merge_map, we report that
134     // we don't have it, rather than trying a lookup and returning an
135     // answer which will receive the wrong offset.
136     const Merge_map* merge_map;
137     // The list of mappings.
138     Entries entries;
139     // Whether the ENTRIES field is sorted by input_offset.
140     bool sorted;
141 
Input_merge_mapInput_merge_map142     Input_merge_map()
143       : merge_map(NULL), entries(), sorted(true)
144     { }
145   };
146 
147   // Map input section indices to merge maps.
148   typedef std::map<unsigned int, Input_merge_map*> Section_merge_maps;
149 
150   // Return a pointer to the Input_merge_map to use for the input
151   // section SHNDX, or NULL.
152   Input_merge_map*
153   get_input_merge_map(unsigned int shndx);
154 
155   // Get or make the Input_merge_map to use for the section SHNDX
156   // with MERGE_MAP.
157   Input_merge_map*
158   get_or_make_input_merge_map(const Merge_map* merge_map, unsigned int shndx);
159 
160   // Any given object file will normally only have a couple of input
161   // sections with mergeable contents.  So we keep the first two input
162   // section numbers inline, and push any further ones into a map.  A
163   // value of -1U in first_shnum_ or second_shnum_ means that we don't
164   // have a corresponding entry.
165   unsigned int first_shnum_;
166   Input_merge_map first_map_;
167   unsigned int second_shnum_;
168   Input_merge_map second_map_;
169   Section_merge_maps section_merge_maps_;
170 };
171 
172 // This class manages mappings from input sections to offsets in an
173 // output section.  This is used where input sections are merged.  The
174 // actual data is stored in fields in Object.
175 
176 class Merge_map
177 {
178  public:
Merge_map()179   Merge_map()
180   { }
181 
182   // Add a mapping for the bytes from OFFSET to OFFSET + LENGTH in the
183   // input section SHNDX in object OBJECT to OUTPUT_OFFSET in the
184   // output section.  An OUTPUT_OFFSET of -1 means that the bytes are
185   // discarded.  OUTPUT_OFFSET is not the offset from the start of the
186   // output section, it is the offset from the start of the merged
187   // data within the output section.
188   void
189   add_mapping(Relobj* object, unsigned int shndx,
190 	      section_offset_type offset, section_size_type length,
191 	      section_offset_type output_offset);
192 
193   // Return the output offset for an input address.  The input address
194   // is at offset OFFSET in section SHNDX in OBJECT.  This sets
195   // *OUTPUT_OFFSET to the offset in the output section; this will be
196   // -1 if the bytes are not being copied to the output.  This returns
197   // true if the mapping is known, false otherwise.  This returns the
198   // value stored by add_mapping, namely the offset from the start of
199   // the merged data within the output section.
200   bool
201   get_output_offset(const Relobj* object, unsigned int shndx,
202 		    section_offset_type offset,
203 		    section_offset_type* output_offset) const;
204 
205   // Return whether this is the merge mapping for section SHNDX in
206   // OBJECT.  This should return true when get_output_offset would
207   // return true for some input offset.
208   bool
209   is_merge_section_for(const Relobj* object, unsigned int shndx) const;
210 };
211 
212 // A general class for SHF_MERGE data, to hold functions shared by
213 // fixed-size constant data and string data.
214 
215 class Output_merge_base : public Output_section_data
216 {
217  public:
Output_merge_base(uint64_t entsize,uint64_t addralign)218   Output_merge_base(uint64_t entsize, uint64_t addralign)
219     : Output_section_data(addralign), merge_map_(), entsize_(entsize),
220       keeps_input_sections_(false), first_relobj_(NULL), first_shndx_(-1),
221       input_sections_()
222   { }
223 
224   // Return the entry size.
225   uint64_t
entsize()226   entsize() const
227   { return this->entsize_; }
228 
229   // Whether this is a merge string section.  This is only true of
230   // Output_merge_string.
231   bool
is_string()232   is_string()
233   { return this->do_is_string(); }
234 
235   // Whether this keeps input sections.
236   bool
keeps_input_sections()237   keeps_input_sections() const
238   { return this->keeps_input_sections_; }
239 
240   // Set the keeps-input-sections flag.  This is virtual so that sub-classes
241   // can perform additional checks.
242   void
set_keeps_input_sections()243   set_keeps_input_sections()
244   { this->do_set_keeps_input_sections(); }
245 
246   // Return the object of the first merged input section.  This used
247   // for script processing.  This is NULL if merge section is empty.
248   Relobj*
first_relobj()249   first_relobj() const
250   { return this->first_relobj_; }
251 
252   // Return the section index of the first merged input section.  This
253   // is used for script processing.  This is valid only if merge section
254   // is not valid.
255   unsigned int
first_shndx()256   first_shndx() const
257   {
258     gold_assert(this->first_relobj_ != NULL);
259     return this->first_shndx_;
260   }
261 
262   // Set of merged input sections.
263   typedef Unordered_set<Section_id, Section_id_hash> Input_sections;
264 
265   // Beginning of merged input sections.
266   Input_sections::const_iterator
input_sections_begin()267   input_sections_begin() const
268   {
269     gold_assert(this->keeps_input_sections_);
270     return this->input_sections_.begin();
271   }
272 
273   // Beginning of merged input sections.
274   Input_sections::const_iterator
input_sections_end()275   input_sections_end() const
276   {
277     gold_assert(this->keeps_input_sections_);
278     return this->input_sections_.end();
279   }
280 
281  protected:
282   // Return the output offset for an input offset.
283   bool
284   do_output_offset(const Relobj* object, unsigned int shndx,
285 		   section_offset_type offset,
286 		   section_offset_type* poutput) const;
287 
288   // Return whether this is the merge section for an input section.
289   bool
290   do_is_merge_section_for(const Relobj*, unsigned int shndx) const;
291 
292   // Add a mapping from an OFFSET in input section SHNDX in object
293   // OBJECT to an OUTPUT_OFFSET in the output section.  OUTPUT_OFFSET
294   // is the offset from the start of the merged data in the output
295   // section.
296   void
add_mapping(Relobj * object,unsigned int shndx,section_offset_type offset,section_size_type length,section_offset_type output_offset)297   add_mapping(Relobj* object, unsigned int shndx, section_offset_type offset,
298 	      section_size_type length, section_offset_type output_offset)
299   {
300     this->merge_map_.add_mapping(object, shndx, offset, length, output_offset);
301   }
302 
303   // This may be overridden by the child class.
304   virtual bool
do_is_string()305   do_is_string()
306   { return false; }
307 
308   // This may be overridden by the child class.
309   virtual void
do_set_keeps_input_sections()310   do_set_keeps_input_sections()
311   { this->keeps_input_sections_ = true; }
312 
313   // Record the merged input section for script processing.
314   void
315   record_input_section(Relobj* relobj, unsigned int shndx);
316 
317  private:
318   // A mapping from input object/section/offset to offset in output
319   // section.
320   Merge_map merge_map_;
321   // The entry size.  For fixed-size constants, this is the size of
322   // the constants.  For strings, this is the size of a character.
323   uint64_t entsize_;
324   // Whether we keep input sections.
325   bool keeps_input_sections_;
326   // Object of the first merged input section.  We use this for script
327   // processing.
328   Relobj* first_relobj_;
329   // Section index of the first merged input section.
330   unsigned int first_shndx_;
331   // Input sections.  We only keep them is keeps_input_sections_ is true.
332   Input_sections input_sections_;
333 };
334 
335 // Handle SHF_MERGE sections with fixed-size constant data.
336 
337 class Output_merge_data : public Output_merge_base
338 {
339  public:
Output_merge_data(uint64_t entsize,uint64_t addralign)340   Output_merge_data(uint64_t entsize, uint64_t addralign)
341     : Output_merge_base(entsize, addralign), p_(NULL), len_(0), alc_(0),
342       input_count_(0),
343       hashtable_(128, Merge_data_hash(this), Merge_data_eq(this))
344   { }
345 
346  protected:
347   // Add an input section.
348   bool
349   do_add_input_section(Relobj* object, unsigned int shndx);
350 
351   // Set the final data size.
352   void
353   set_final_data_size();
354 
355   // Write the data to the file.
356   void
357   do_write(Output_file*);
358 
359   // Write the data to a buffer.
360   void
361   do_write_to_buffer(unsigned char*);
362 
363   // Write to a map file.
364   void
do_print_to_mapfile(Mapfile * mapfile)365   do_print_to_mapfile(Mapfile* mapfile) const
366   { mapfile->print_output_data(this, _("** merge constants")); }
367 
368   // Print merge stats to stderr.
369   void
370   do_print_merge_stats(const char* section_name);
371 
372   // Set keeps-input-sections flag.
373   void
do_set_keeps_input_sections()374   do_set_keeps_input_sections()
375   {
376     gold_assert(this->input_count_ == 0);
377     Output_merge_base::do_set_keeps_input_sections();
378   }
379 
380  private:
381   // We build a hash table of the fixed-size constants.  Each constant
382   // is stored as a pointer into the section data we are accumulating.
383 
384   // A key in the hash table.  This is an offset in the section
385   // contents we are building.
386   typedef section_offset_type Merge_data_key;
387 
388   // Compute the hash code.  To do this we need a pointer back to the
389   // object holding the data.
390   class Merge_data_hash
391   {
392    public:
Merge_data_hash(const Output_merge_data * pomd)393     Merge_data_hash(const Output_merge_data* pomd)
394       : pomd_(pomd)
395     { }
396 
397     size_t
398     operator()(Merge_data_key) const;
399 
400    private:
401     const Output_merge_data* pomd_;
402   };
403 
404   friend class Merge_data_hash;
405 
406   // Compare two entries in the hash table for equality.  To do this
407   // we need a pointer back to the object holding the data.  Note that
408   // we now have a pointer to the object stored in two places in the
409   // hash table.  Fixing this would require specializing the hash
410   // table, which would be hard to do portably.
411   class Merge_data_eq
412   {
413    public:
Merge_data_eq(const Output_merge_data * pomd)414     Merge_data_eq(const Output_merge_data* pomd)
415       : pomd_(pomd)
416     { }
417 
418     bool
419     operator()(Merge_data_key k1, Merge_data_key k2) const;
420 
421    private:
422     const Output_merge_data* pomd_;
423   };
424 
425   friend class Merge_data_eq;
426 
427   // The type of the hash table.
428   typedef Unordered_set<Merge_data_key, Merge_data_hash, Merge_data_eq>
429     Merge_data_hashtable;
430 
431   // Given a hash table key, which is just an offset into the section
432   // data, return a pointer to the corresponding constant.
433   const unsigned char*
constant(Merge_data_key k)434   constant(Merge_data_key k) const
435   {
436     gold_assert(k >= 0 && k < static_cast<section_offset_type>(this->len_));
437     return this->p_ + k;
438   }
439 
440   // Add a constant to the output.
441   void
442   add_constant(const unsigned char*);
443 
444   // The accumulated data.
445   unsigned char* p_;
446   // The length of the accumulated data.
447   section_size_type len_;
448   // The size of the allocated buffer.
449   section_size_type alc_;
450   // The number of entries seen in input files.
451   size_t input_count_;
452   // The hash table.
453   Merge_data_hashtable hashtable_;
454 };
455 
456 // Handle SHF_MERGE sections with string data.  This is a template
457 // based on the type of the characters in the string.
458 
459 template<typename Char_type>
460 class Output_merge_string : public Output_merge_base
461 {
462  public:
Output_merge_string(uint64_t addralign)463   Output_merge_string(uint64_t addralign)
464     : Output_merge_base(sizeof(Char_type), addralign), stringpool_(addralign),
465       merged_strings_lists_(), input_count_(0), input_size_(0)
466   {
467     this->stringpool_.set_no_zero_null();
468   }
469 
470  protected:
471   // Add an input section.
472   bool
473   do_add_input_section(Relobj* object, unsigned int shndx);
474 
475   // Do all the final processing after the input sections are read in.
476   // Returns the final data size.
477   section_size_type
478   finalize_merged_data();
479 
480   // Set the final data size.
481   void
482   set_final_data_size();
483 
484   // Write the data to the file.
485   void
486   do_write(Output_file*);
487 
488   // Write the data to a buffer.
489   void
490   do_write_to_buffer(unsigned char*);
491 
492   // Write to a map file.
493   void
do_print_to_mapfile(Mapfile * mapfile)494   do_print_to_mapfile(Mapfile* mapfile) const
495   { mapfile->print_output_data(this, _("** merge strings")); }
496 
497   // Print merge stats to stderr.
498   void
499   do_print_merge_stats(const char* section_name);
500 
501   // Writes the stringpool to a buffer.
502   void
stringpool_to_buffer(unsigned char * buffer,section_size_type buffer_size)503   stringpool_to_buffer(unsigned char* buffer, section_size_type buffer_size)
504   { this->stringpool_.write_to_buffer(buffer, buffer_size); }
505 
506   // Clears all the data in the stringpool, to save on memory.
507   void
clear_stringpool()508   clear_stringpool()
509   { this->stringpool_.clear(); }
510 
511   // Whether this is a merge string section.
512   virtual bool
do_is_string()513   do_is_string()
514   { return true; }
515 
516   // Set keeps-input-sections flag.
517   void
do_set_keeps_input_sections()518   do_set_keeps_input_sections()
519   {
520     gold_assert(this->input_count_ == 0);
521     Output_merge_base::do_set_keeps_input_sections();
522   }
523 
524  private:
525   // The name of the string type, for stats.
526   const char*
527   string_name();
528 
529   // As we see input sections, we build a mapping from object, section
530   // index and offset to strings.
531   struct Merged_string
532   {
533     // The offset in the input section.
534     section_offset_type offset;
535     // The key in the Stringpool.
536     Stringpool::Key stringpool_key;
537 
Merged_stringMerged_string538     Merged_string(section_offset_type offseta, Stringpool::Key stringpool_keya)
539       : offset(offseta), stringpool_key(stringpool_keya)
540     { }
541   };
542 
543   typedef std::vector<Merged_string> Merged_strings;
544 
545   struct Merged_strings_list
546   {
547     // The input object where the strings were found.
548     Relobj* object;
549     // The input section in the input object.
550     unsigned int shndx;
551     // The list of merged strings.
552     Merged_strings merged_strings;
553 
Merged_strings_listMerged_strings_list554     Merged_strings_list(Relobj* objecta, unsigned int shndxa)
555       : object(objecta), shndx(shndxa), merged_strings()
556     { }
557   };
558 
559   typedef std::vector<Merged_strings_list*> Merged_strings_lists;
560 
561   // As we see the strings, we add them to a Stringpool.
562   Stringpool_template<Char_type> stringpool_;
563   // Map from a location in an input object to an entry in the
564   // Stringpool.
565   Merged_strings_lists merged_strings_lists_;
566   // The number of entries seen in input files.
567   size_t input_count_;
568   // The total size of input sections.
569   size_t input_size_;
570 };
571 
572 } // End namespace gold.
573 
574 #endif // !defined(GOLD_MERGE_H)
575