1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- Mode: C++ -*-
3 //
4 // Copyright (C) 2020 Google, Inc.
5 //
6 // Author: Matthias Maennich
7 
8 /// @file
9 ///
10 /// This contains the declarations for the symtab reader.
11 
12 #ifndef __ABG_SYMTAB_READER_H__
13 #define __ABG_SYMTAB_READER_H__
14 
15 #include <gelf.h>
16 
17 #include <functional>
18 #include <iterator>
19 #include <memory>
20 #include <unordered_map>
21 #include <vector>
22 
23 #include "abg-cxx-compat.h"  // for abg_compat::optional
24 #include "abg-ir.h"
25 
26 namespace abigail
27 {
28 namespace symtab_reader
29 {
30 
31 /// The symtab filter is the object passed to the symtab object in order to
32 /// iterate over the symbols in the symtab while applying filters.
33 ///
34 /// The general idea is that it consists of a set of optionally enforced flags,
35 /// such as 'functions' or 'variables'. If not set, those are not filtered for,
36 /// neither inclusive nor exclusive. If set they are all ANDed together.
37 class symtab_filter
38 {
39 public:
40   // Default constructor disabling all features.
symtab_filter()41   symtab_filter() {}
42 
43   bool
44   matches(const elf_symbol& symbol) const;
45 
46   /// Enable or disable function filtering
47   ///
48   /// @param new_value whether to filter for functions
49   void
50   set_functions(bool new_value = true)
51   {functions_ = new_value;};
52 
53   /// Enable or disable variable filtering
54   ///
55   /// @param new_value whether to filter for variables
56   void
57   set_variables(bool new_value = true)
58   {variables_ = new_value;};
59 
60   /// Enable or disable public symbol filtering
61   ///
62   /// @param new_value whether to filter for public symbols
63   void
64   set_public_symbols(bool new_value = true)
65   {public_symbols_ = new_value;};
66 
67   /// Enable or disable undefined symbol filtering
68   ///
69   /// @param new_value whether to filter for undefined symbols
70   void
71   set_undefined_symbols(bool new_value = true)
72   {undefined_symbols_ = new_value;};
73 
74   /// Enable or disable kernel symbol filtering
75   ///
76   /// @param new_value whether to filter for kernel symbols
77   void
78   set_kernel_symbols(bool new_value = true)
79   {kernel_symbols_ = new_value;};
80 
81 private:
82   // The symbol is a function (FUNC)
83   abg_compat::optional<bool> functions_;
84 
85   // The symbol is a variables (OBJECT)
86   abg_compat::optional<bool> variables_;
87 
88   // The symbol is publicly accessible (global/weak with default/protected
89   // visibility)
90   abg_compat::optional<bool> public_symbols_;
91 
92   // The symbols is not defined (declared)
93   abg_compat::optional<bool> undefined_symbols_;
94 
95   // The symbol is listed in the ksymtab (for Linux Kernel binaries).
96   abg_compat::optional<bool> kernel_symbols_;
97 };
98 
99 /// Base iterator for our custom iterator based on whatever the const_iterator
100 /// is for a vector of symbols.
101 /// As of writing this, std::vector<elf_symbol_sptr>::const_iterator.
102 typedef elf_symbols::const_iterator base_iterator;
103 
104 /// An iterator to walk a vector of elf_symbols filtered by symtab_filter.
105 ///
106 /// The implementation inherits all properties from the vector's
107 /// const_iterator, but intercepts where necessary to allow effective
108 /// filtering. This makes it a STL compatible iterator for general purpose
109 /// usage.
110 class symtab_iterator : public base_iterator
111 {
112 public:
113   typedef base_iterator::value_type	 value_type;
114   typedef base_iterator::reference	 reference;
115   typedef base_iterator::pointer	 pointer;
116   typedef base_iterator::difference_type difference_type;
117   typedef std::forward_iterator_tag	 iterator_category;
118 
119   /// Construct the iterator based on a pair of underlying iterators and a
120   /// symtab_filter object. Immediately fast forward to the next element that
121   /// matches the criteria (if any).
122   ///
123   /// @param begin the underlying begin iterator
124   ///
125   /// @param begin the underlying end iterator
126   ///
127   /// @param filter the symtab_filter to apply
128   symtab_iterator(base_iterator	       begin,
129 		  base_iterator	       end,
130 		  const symtab_filter& filter = symtab_filter())
base_iterator(begin)131     : base_iterator(begin), end_(end), filter_(filter)
132   {skip_to_next();}
133 
134   /// Pre-increment operator to advance to the next matching element.
135   ///
136   /// @return itself after incrementing
137   symtab_iterator&
138   operator++()
139   {
140     base_iterator::operator++();
141     skip_to_next();
142     return *this;
143   }
144 
145   /// Post-increment operator to advance to the next matching element.
146   ///
147   /// @return a copy of the iterator before incrementing
148   symtab_iterator
149   operator++(int)
150   {
151     symtab_iterator result(*this);
152     ++(*this);
153     return result;
154   }
155 
156 private:
157   /// The end of the underlying iterator.
158   const base_iterator end_;
159 
160   /// The symtab_filter used to determine when to advance.
161   const symtab_filter& filter_;
162 
163   /// Skip to the next element that matches the filter criteria (if any). Hold
164   /// off when reaching the end of the underlying iterator.
165   void
skip_to_next()166   skip_to_next()
167   {
168     while (*this != end_ && !filter_.matches(***this))
169       ++(*this);
170   }
171 };
172 
173 /// Convenience declaration of a unique_ptr<symtab>
174 class symtab;
175 typedef std::unique_ptr<symtab> symtab_ptr;
176 
177 /// symtab is the actual data container of the symtab_reader implementation.
178 ///
179 /// The symtab is instantiated either via an Elf handle (from binary) or from a
180 /// set of existing symbol maps (usually when instantiated from XML). It will
181 /// then discover the symtab, possibly the ksymtab (for Linux Kernel binaries)
182 /// and setup the data containers and lookup maps for later perusal.
183 ///
184 /// The symtab is supposed to be used in a const context as all information is
185 /// already computed at construction time. Symbols are stored sorted to allow
186 /// deterministic reading of the entries.
187 ///
188 /// An example use of the symtab class is
189 ///
190 /// const auto symtab    = symtab::load(elf_handle, env);
191 /// symtab_filter filter = symtab->make_filter();
192 /// filter.set_public_symbols();
193 /// filter.set_functions();
194 ///
195 /// for (const auto& symbol : filtered_symtab(*symtab, filter))
196 ///   {
197 ///     std::cout << symbol->get_name() << "\n";
198 ///   }
199 ///
200 /// This uses the filtered_symtab proxy object to capture the filter.
201 class symtab
202 {
203 public:
204   typedef std::function<bool(const elf_symbol_sptr&)> symbol_predicate;
205 
206   /// Indicate whether any (kernel) symbols have been seen at construction.
207   ///
208   /// @return true if there are symbols detected earlier.
209   bool
has_symbols()210   has_symbols() const
211   {return is_kernel_binary_ ? has_ksymtab_entries_ : !symbols_.empty();}
212 
213   symtab_filter
214   make_filter() const;
215 
216   /// The (only) iterator type we offer is a const_iterator implemented by the
217   /// symtab_iterator.
218   typedef symtab_iterator const_iterator;
219 
220   /// Obtain an iterator to the beginning of the symtab according to the filter
221   /// criteria. Whenever this iterator advances, it skips elements that do not
222   /// match the filter criteria.
223   ///
224   /// @param filter the symtab_filter to match symbols against
225   ///
226   /// @return a filtering const_iterator of the underlying type
227   const_iterator
begin(const symtab_filter & filter)228   begin(const symtab_filter& filter) const
229   {return symtab_iterator(symbols_.begin(), symbols_.end(), filter);}
230 
231   /// Obtain an iterator to the end of the symtab.
232   ///
233   /// @return an end iterator
234   const_iterator
end()235   end() const
236   {return symtab_iterator(symbols_.end(), symbols_.end());}
237 
238   const elf_symbols&
239   lookup_symbol(const std::string& name) const;
240 
241   const elf_symbol_sptr&
242   lookup_symbol(GElf_Addr symbol_addr) const;
243 
244   static symtab_ptr
245   load(Elf*		elf_handle,
246        ir::environment* env,
247        symbol_predicate is_suppressed = NULL);
248 
249   static symtab_ptr
250   load(string_elf_symbols_map_sptr function_symbol_map,
251        string_elf_symbols_map_sptr variables_symbol_map);
252 
253   void
254   update_main_symbol(GElf_Addr addr, const std::string& name);
255 
256 private:
257   /// Default constructor. Private to enforce creation by factory methods.
258   symtab();
259 
260   /// The vector of symbols we discovered.
261   elf_symbols symbols_;
262 
263   /// Whether this is a Linux Kernel binary
264   bool is_kernel_binary_;
265 
266   /// Whether this kernel_binary has ksymtab entries
267   ///
268   /// A kernel module might not have a ksymtab if it does not export any
269   /// symbols. In order to quickly decide whether the symbol table is empty, we
270   /// remember whether we ever saw ksymtab entries.
271   bool has_ksymtab_entries_;
272 
273   /// Lookup map name->symbol(s)
274   typedef std::unordered_map<std::string, std::vector<elf_symbol_sptr>>
275 		       name_symbol_map_type;
276   name_symbol_map_type name_symbol_map_;
277 
278   /// Lookup map addr->symbol
279   typedef std::unordered_map<GElf_Addr, elf_symbol_sptr> addr_symbol_map_type;
280   addr_symbol_map_type addr_symbol_map_;
281 
282   /// Lookup map function entry address -> symbol
283   addr_symbol_map_type entry_addr_symbol_map_;
284 
285   bool
286   load_(Elf* elf_handle, ir::environment* env, symbol_predicate is_suppressed);
287 
288   bool
289   load_(string_elf_symbols_map_sptr function_symbol_map,
290        string_elf_symbols_map_sptr variables_symbol_map);
291 
292   void
293   update_function_entry_address_symbol_map(Elf*	     elf_handle,
294 					   GElf_Sym* native_symbol,
295 					   const elf_symbol_sptr& symbol_sptr);
296 };
297 
298 /// Helper class to allow range-for loops on symtabs for C++11 and later code.
299 /// It serves as a proxy for the symtab iterator and provides a begin() method
300 /// without arguments, as required for range-for loops (and possibly other
301 /// iterator based transformations).
302 ///
303 /// Example usage:
304 ///
305 ///   for (const auto& symbol : filtered_symtab(tab, filter))
306 ///     {
307 ///       std::cout << symbol->get_name() << "\n";
308 ///     }
309 ///
310 class filtered_symtab
311 {
312   const symtab&	      tab_;
313   const symtab_filter filter_;
314 
315 public:
316   /// Construct the proxy object keeping references to the underlying symtab
317   /// and the filter object.
filtered_symtab(const symtab & tab,const symtab_filter & filter)318   filtered_symtab(const symtab& tab, const symtab_filter& filter)
319     : tab_(tab), filter_(filter)
320   {}
321 
322   /// Pass through symtab.begin(), but also pass on the filter.
323   symtab::const_iterator
begin()324   begin() const
325   {return tab_.begin(filter_);}
326 
327   /// Pass through symtab.end().
328   symtab::const_iterator
end()329   end() const
330   {return tab_.end();}
331 };
332 
333 } // end namespace symtab_reader
334 } // end namespace abigail
335 
336 #endif // __ABG_SYMTAB_READER_H__
337