1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- Mode: C++ -*-
3 //
4 // Copyright (C) 2016-2020 Red Hat, Inc.
5 //
6 // Author: Dodji Seketeli
7 
8 /// @file
9 ///
10 /// Declaration of types pertaining to the interned string pool used
11 /// throughout Libabigail, for performance reasons.
12 ///
13 /// For the record, the concept of the String Interning method is
14 /// explained at https://en.wikipedia.org/wiki/String_interning.
15 
16 #ifndef __ABG_INTERNED_STR_H__
17 #define __ABG_INTERNED_STR_H__
18 
19 #include <functional>
20 #include <memory>
21 #include <ostream>
22 #include <string>
23 #include <unordered_set>
24 
25 
26 namespace abigail
27 {
28 // Inject some std types into this namespace.
29 using std::unordered_set;
30 using std::shared_ptr;
31 using std::string;
32 using std::ostream;
33 
34 /// The abstraction of an interned string.
35 ///
36 /// It's a wrapper around a pointer to a std::string, along with a set
37 /// of method that helps make this string integrate with std::string
38 /// seamlessly.  For instance, the type provides equality operators
39 /// that help compare it against std::string.
40 ///
41 /// Note that this @ref interned_string type is design to have the
42 /// same size as a pointer to a string.
43 class interned_string
44 {
45   std::string* raw_;
46 
47   /// Constructor.
48   ///
49   /// @param raw the pointer to string that this interned_string
50   /// wraps.
interned_string(string * raw)51   interned_string(string* raw)
52     : raw_(raw)
53   {}
54 
55 public:
56 
57   /// Default constructor.
58   ///
59   /// Constructs an empty pointer to string.
interned_string()60   interned_string()
61     : raw_()
62   {}
63 
64   /// Copy constructor.
65   ///
66   /// @param o the other instance to copy from.
interned_string(const interned_string & o)67   interned_string(const interned_string& o)
68   {raw_ = o.raw_;}
69 
70   /// Assignment operator.
71   ///
72   /// @param o the other instance to assign to the current one.
73   interned_string&
74   operator=(const interned_string& o)
75   {
76     raw_ = o.raw_;
77     return *this;
78   }
79 
80   /// Clear the string.
81   void
clear()82   clear()
83   {raw_ = 0;}
84 
85   /// Test if the current instance of @ref interned_string is empty.
86   ///
87   /// @return true iff the currentisntance of @ref interned_string is
88   /// empty.
89   bool
empty()90   empty() const
91   {return !raw_;}
92 
93   /// Return the underlying pointer to std::string that this
94   /// interned_string wraps.
95   ///
96   /// @return a pointer to the underlying std::string, or 0 if this
97   /// interned_string is empty.
98   const string*
raw()99   raw() const
100   {return raw_;}
101 
102   /// Compare the current instance of @ref interned_string against
103   /// another instance of @ref interned_string.
104   ///
105   /// Note that this comparison is done in O(1), because it compares
106   /// the pointer values of the two underlying pointers to std::string
107   /// held by each instances of @ref interned_string.
108   ///
109   /// @param o the other @ref interned_string to compare against.
110   ///
111   /// @return true iff the current instance equals @p o.
112   bool
113   operator==(const interned_string& o) const
114   {return raw_ == o.raw_;}
115 
116   /// Inequality operator.
117   ///
118   /// @param o the other @ref interned_string to compare the current
119   /// instance against.
120   ///
121   /// @return true iff the current instance is different from the @p
122   /// o.
123   bool
124   operator!=(const interned_string& o) const
125   {return !operator==(o);}
126 
127   /// Compare the current instance of @ref interned_string against
128   /// an instance of std::string.
129   ///
130   /// Note that this comparison is done in O(N), N being the size (in
131   /// number of characters) of the strings being compared.
132   ///
133   /// @param o the instance of std::string to compare against.
134   ///
135   /// @return true iff the current instance equals @p o.
136   bool
137   operator==(const string& o) const
138   {
139     if (raw_)
140       return *raw_ == o;
141     return o.empty();
142   }
143 
144   /// Inequality operator.
145   ///
146   /// Takes the current instance of @ref interned_string and an
147   /// instance of std::string.
148   ///
149   /// @param o the instance of std::string to compare the current
150   /// instance of @ref interned_string against.
151   ///
152   /// @return true if the current instance of @ref interned_string is
153   /// different from @p o.
154   bool
155   operator!=(const string& o) const
156   {return ! operator==(o);}
157 
158   /// "Less than" operator.
159   ///
160   /// Lexicographically compares the current instance of @ref
161   /// interned_string against another instance.
162   ///
163   /// @param o the other instance of @ref interned_string to compare
164   /// against.
165   ///
166   /// @return true iff the current instance of interned_string is
167   /// lexicographycally less than the string @p o.
168   bool
169   operator<(const interned_string& o) const
170   {return static_cast<string>(*this) < static_cast<std::string>(o);}
171 
172   /// Conversion operator to string.
173   ///
174   /// @return the underlying string this instance refers too.
string()175   operator string() const
176   {
177     if (!raw_)
178       return "";
179     return *raw_;
180   }
181 
182   friend class interned_string_pool;
183 }; // end class interned_string
184 
185 bool
186 operator==(const string& l, const interned_string& r);
187 
188 bool
189 operator!=(const string& l, const interned_string& r);
190 
191 ostream&
192 operator<<(ostream& o, const interned_string& s);
193 
194 string
195 operator+(const interned_string& s1,const string& s2);
196 
197 string
198 operator+(const string& s1, const interned_string& s2);
199 
200 /// A functor to hash instances of @ref interned_string.
201 struct hash_interned_string
202 {
203   /// The hash operator.
204   ///
205   /// It's super fast because hashing an interned string amounts to
206   /// hashing the pointer to it's underlying string.  It's because
207   /// every distinct string is present only in one copy in the
208   /// environment.
209   ///
210   /// @param s the instance of @ref interned_string to hash.
211   ///
212   /// @return the returned hash value.
213   size_t
operatorhash_interned_string214   operator()(const interned_string& s) const
215   {
216     std::hash<size_t> hash_size_t;
217     return hash_size_t(reinterpret_cast<size_t>(s.raw()));
218   }
219 }; // end struct hash_interned_string
220 
221 
222 /// The interned string pool.
223 ///
224 /// This is where all the distinct strings represented by the interned
225 /// strings leave.  The pool is the actor responsible for creating
226 /// interned strings.
227 class interned_string_pool
228 {
229   struct priv;
230   typedef shared_ptr<priv> priv_sptr;
231 
232   priv_sptr priv_;
233 
234 public:
235 
236   interned_string_pool();
237 
238   interned_string
239   create_string(const std::string&);
240 
241   bool
242   has_string(const char* s) const;
243 
244   const char*
245   get_string(const char* s) const;
246 
247   ~interned_string_pool();
248 }; // end class interned_string_pool
249 
250 /// Convenience typedef for a set of @ref interned_string
251 typedef unordered_set<interned_string,
252 		      hash_interned_string> interned_string_set_type;
253 
254 } // end namespace abigail
255 
256 #endif // __ABG_INTERNED_STR_H__
257