1 //===- StringHash.h -------------------------------------------------------===//
2 //
3 //                     The MCLinker Project
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 #ifndef MCLD_ADT_STRINGHASH_H
10 #define MCLD_ADT_STRINGHASH_H
11 #include <llvm/ADT/StringRef.h>
12 #include <llvm/Support/DataTypes.h>
13 #include <cctype>
14 #include <cassert>
15 #include <functional>
16 
17 namespace mcld {
18 namespace hash {
19 
20 enum Type {
21   RS,
22   JS,
23   PJW,
24   ELF,
25   BKDR,
26   SDBM,
27   DJB,
28   DEK,
29   BP,
30   FNV,
31   AP,
32   ES
33 };
34 
35 /** \class template<uint32_t TYPE> StringHash
36  *  \brief the template StringHash class, for specification
37  */
38 template<uint32_t TYPE>
39 struct StringHash : public std::unary_function<const llvm::StringRef&, uint32_t>
40 {
operatorStringHash41   uint32_t operator()(const llvm::StringRef& pKey) const
42   {
43     assert(false && "Undefined StringHash function.\n");
44     return 0;
45   }
46 };
47 
48 /** \class StringHash<RSHash>
49  *  \brief RS StringHash funciton
50  */
51 template<>
52 struct StringHash<RS> : public std::unary_function<const llvm::StringRef&, uint32_t>
53 {
54   uint32_t operator()(const llvm::StringRef& pKey) const
55   {
56     const unsigned int b = 378551;
57     uint32_t a = 63689;
58     uint32_t hash_val = 0;
59 
60     for(unsigned int i = 0; i < pKey.size(); ++i) {
61       hash_val = hash_val * a + pKey[i];
62       a = a * b;
63     }
64     return hash_val;
65   }
66 };
67 
68 /** \class StringHash<JSHash>
69  *  \brief JS hash funciton
70  */
71 template<>
72 struct StringHash<JS> : public std::unary_function<const llvm::StringRef&, uint32_t>
73 {
74   uint32_t operator()(const llvm::StringRef& pKey) const
75   {
76     uint32_t hash_val = 1315423911;
77 
78     for(unsigned int i = 0; i < pKey.size(); ++i) {
79        hash_val ^= ((hash_val << 5) + pKey[i] + (hash_val >> 2));
80     }
81     return hash_val;
82   }
83 };
84 
85 /** \class StringHash<PJW>
86  *  \brief P.J. Weinberger hash function
87  */
88 template<>
89 struct StringHash<PJW> : public std::unary_function<const llvm::StringRef&, uint32_t>
90 {
91   uint32_t operator()(const llvm::StringRef& pKey) const
92   {
93     const unsigned int BitsInUnsignedInt = (unsigned int)(sizeof(unsigned int) * 8);
94     const unsigned int ThreeQuarters     = (unsigned int)((BitsInUnsignedInt  * 3) / 4);
95     const unsigned int OneEighth         = (unsigned int)(BitsInUnsignedInt / 8);
96     const unsigned int HighBits          = (unsigned int)(0xFFFFFFFF) << (BitsInUnsignedInt - OneEighth);
97     uint32_t hash_val = 0;
98     uint32_t test = 0;
99 
100     for(unsigned int i = 0; i < pKey.size(); ++i) {
101       hash_val = (hash_val << OneEighth) + pKey[i];
102 
103       if((test = hash_val & HighBits) != 0) {
104         hash_val = (( hash_val ^ (test >> ThreeQuarters)) & (~HighBits));
105       }
106     }
107     return hash_val;
108   }
109 };
110 
111 /** \class StringHash<ELF>
112  *  \brief ELF hash function.
113  */
114 template<>
115 struct StringHash<ELF> : public std::unary_function<const llvm::StringRef&, uint32_t>
116 {
117   uint32_t operator()(const llvm::StringRef& pKey) const
118   {
119     uint32_t hash_val = 0;
120     uint32_t x = 0;
121 
122     for (unsigned int i = 0; i < pKey.size(); ++i) {
123       hash_val = (hash_val << 4) + pKey[i];
124       if((x = hash_val & 0xF0000000L) != 0)
125         hash_val ^= (x >> 24);
126       hash_val &= ~x;
127     }
128     return hash_val;
129   }
130 };
131 
132 /** \class StringHash<BKDR>
133  *  \brief BKDR hash function
134  */
135 template<>
136 struct StringHash<BKDR> : public std::unary_function<const llvm::StringRef&, uint32_t>
137 {
138   uint32_t operator()(const llvm::StringRef& pKey) const
139   {
140     const uint32_t seed = 131;
141     uint32_t hash_val = 0;
142 
143     for(uint32_t i = 0; i < pKey.size(); ++i)
144       hash_val = (hash_val * seed) + pKey[i];
145     return hash_val;
146   }
147 };
148 
149 
150 /** \class StringHash<SDBM>
151  *  \brief SDBM hash function
152  *  0.049s in 100000 test
153  */
154 template<>
155 struct StringHash<SDBM> : public std::unary_function<const llvm::StringRef&, uint32_t>
156 {
157   uint32_t operator()(const llvm::StringRef& pKey) const
158   {
159     uint32_t hash_val = 0;
160 
161     for(uint32_t i = 0; i < pKey.size(); ++i)
162       hash_val = pKey[i] + (hash_val << 6) + (hash_val << 16) - hash_val;
163     return hash_val;
164   }
165 };
166 
167 /** \class StringHash<DJB>
168  *  \brief DJB hash function
169  *  0.057s in 100000 test
170  */
171 template<>
172 struct StringHash<DJB> : public std::unary_function<const llvm::StringRef&, uint32_t>
173 {
174   uint32_t operator()(const llvm::StringRef& pKey) const
175   {
176     uint32_t hash_val = 5381;
177 
178     for(uint32_t i = 0; i < pKey.size(); ++i)
179       hash_val = ((hash_val << 5) + hash_val) + pKey[i];
180 
181     return hash_val;
182   }
183 };
184 
185 /** \class StringHash<DEK>
186  *  \brief DEK hash function
187  *  0.60s
188  */
189 template<>
190 struct StringHash<DEK> : public std::unary_function<const llvm::StringRef&, uint32_t>
191 {
192   uint32_t operator()(const llvm::StringRef& pKey) const
193   {
194     uint32_t hash_val = pKey.size();
195 
196     for(uint32_t i = 0; i < pKey.size(); ++i)
197       hash_val = ((hash_val << 5) ^ (hash_val >> 27)) ^ pKey[i];
198 
199     return hash_val;
200   }
201 };
202 
203 /** \class StringHash<BP>
204  *  \brief BP hash function
205  *  0.057s
206  */
207 template<>
208 struct StringHash<BP> : public std::unary_function<const llvm::StringRef&, uint32_t>
209 {
210   uint32_t operator()(const llvm::StringRef& pKey) const
211   {
212     uint32_t hash_val = 0;
213     for(uint32_t i = 0; i < pKey.size(); ++i)
214       hash_val = hash_val << 7 ^ pKey[i];
215 
216     return hash_val;
217   }
218 };
219 
220 /** \class StringHash<FNV>
221  *  \brief FNV hash function
222  *  0.058s
223  */
224 template<>
225 struct StringHash<FNV> : public std::unary_function<const llvm::StringRef&, uint32_t>
226 {
227   uint32_t operator()(const llvm::StringRef& pKey) const
228   {
229     const uint32_t fnv_prime = 0x811C9DC5;
230     uint32_t hash_val = 0;
231     for(uint32_t i = 0; i < pKey.size(); ++i) {
232       hash_val *= fnv_prime;
233       hash_val ^= pKey[i];
234     }
235 
236     return hash_val;
237   }
238 };
239 
240 /** \class StringHash<AP>
241  *  \brief AP hash function
242  *  0.060s
243  */
244 template<>
245 struct StringHash<AP> : public std::unary_function<const llvm::StringRef&, uint32_t>
246 {
247   uint32_t operator()(const llvm::StringRef& pKey) const
248   {
249     unsigned int hash_val = 0xAAAAAAAA;
250 
251     for(uint32_t i = 0; i < pKey.size(); ++i) {
252       hash_val ^= ((i & 1) == 0)?
253                           ((hash_val <<  7) ^ pKey[i] * (hash_val >> 3)):
254                           (~((hash_val << 11) + (pKey[i] ^ (hash_val >> 5))));
255     }
256 
257     return hash_val;
258   }
259 };
260 
261 /** \class StringHash<ES>
262  *  \brief This is a revision of Edward Sayers' string characteristic function.
263  *
264  *  31-28  27  26  25   -   0
265  *  +----+---+---+------------+
266  *  | .  | N | - | a/A  ~ z/Z |
267  *  +----+---+---+------------+
268  *
269  *  . (bit 31~28) - The number of '.' characters
270  *  N (bit 27)    - Are there any numbers in the string
271  *  - (bit 26)    - Does the string have '-' character
272  *  bit 25~0      - Bit 25 is set only if the string contains a 'a' or 'A', and
273  *                  Bit 24 is set only if ...                   'b' or 'B', ...
274  */
275 template<>
276 struct StringHash<ES> : public std::unary_function<const llvm::StringRef&, uint32_t>
277 {
278   uint32_t operator()(const llvm::StringRef& pString) const
279   {
280     uint32_t result = 0x0;
281     unsigned int dot = 0;
282     std::string::size_type idx;
283     for (idx = 0; idx < pString.size(); ++idx) {
284       int cur_char = pString[idx];
285 
286       if ('.' == cur_char) {
287         ++dot;
288         continue;
289       }
290 
291       if (isdigit(cur_char)) {
292         result |= (1 << 27);
293         continue;
294       }
295 
296       if ('_' == cur_char) {
297         result |= (1 << 26);
298         continue;
299       }
300 
301       if (isupper(cur_char)) {
302         result |= (1 << (cur_char - 'A'));
303         continue;
304       }
305 
306       if (islower(cur_char)) {
307         result |= (1 << (cur_char - 'a'));
308         continue;
309       }
310     }
311     result |= (dot << 28);
312     return result;
313   }
314 
315 
316   /** \func may_include
317    *  \brief is it possible that pRule is a sub-string of pInString
318    */
319   static bool may_include(uint32_t pRule, uint32_t pInString)
320   {
321     uint32_t in_c = pInString << 4;
322     uint32_t r_c  = pRule << 4;
323 
324     uint32_t res = (in_c ^ r_c) & r_c;
325     if (0 != res)
326       return false;
327 
328     uint32_t in_dot = pInString >> 28;
329     uint32_t r_dot  = pRule >> 28;
330     if (r_dot > in_dot)
331       return false;
332 
333     return true;
334   }
335 };
336 
337 /** \class template<uint32_t TYPE> StringCompare
338  *  \brief the template StringCompare class, for specification
339  */
340 template<typename STRING_TYPE>
341 struct StringCompare : public std::binary_function<const STRING_TYPE&, const STRING_TYPE&, bool>
342 {
343   bool operator()(const STRING_TYPE& X, const STRING_TYPE& Y) const
344   { return X == Y; }
345 };
346 
347 template<>
348 struct StringCompare<const char*> : public std::binary_function<const char*, const char*, bool>
349 {
350   bool operator()(const char* X, const char* Y) const
351   { return (0 == std::strcmp(X, Y)); }
352 };
353 
354 template<>
355 struct StringCompare<char*> : public std::binary_function<const char*, const char*, bool>
356 {
357   bool operator()(const char* X, const char* Y) const
358   { return (0 == std::strcmp(X, Y)); }
359 };
360 
361 } // namespace of hash
362 } // namespace of mcld
363 
364 #endif
365 
366