1 //===- StringHash.h -------------------------------------------------------===//
2 //
3 //                     The MCLinker Project
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 #ifndef MCLD_ADT_STRINGHASH_H_
10 #define MCLD_ADT_STRINGHASH_H_
11 
12 #include <llvm/ADT/StringRef.h>
13 #include <llvm/Support/DataTypes.h>
14 
15 #include <cassert>
16 #include <cctype>
17 #include <functional>
18 
19 namespace mcld {
20 namespace hash {
21 
22 enum Type { RS, JS, PJW, ELF, BKDR, SDBM, DJB, DEK, BP, FNV, AP, ES };
23 
24 /** \class template<uint32_t TYPE> StringHash
25  *  \brief the template StringHash class, for specification
26  */
27 template <uint32_t TYPE>
28 struct StringHash
29     : public std::unary_function<const llvm::StringRef, uint32_t> {
operatorStringHash30   uint32_t operator()(const llvm::StringRef pKey) const {
31     assert(false && "Undefined StringHash function.\n");
32     return 0;
33   }
34 };
35 
36 /** \class StringHash<RSHash>
37  *  \brief RS StringHash funciton
38  */
39 template <>
40 struct StringHash<RS>
41     : public std::unary_function<const llvm::StringRef, uint32_t> {
42   uint32_t operator()(const llvm::StringRef pKey) const {
43     const unsigned int b = 378551;
44     uint32_t a = 63689;
45     uint32_t hash_val = 0;
46 
47     for (unsigned int i = 0; i < pKey.size(); ++i) {
48       hash_val = hash_val * a + pKey[i];
49       a = a * b;
50     }
51     return hash_val;
52   }
53 };
54 
55 /** \class StringHash<JSHash>
56  *  \brief JS hash funciton
57  */
58 template <>
59 struct StringHash<JS>
60     : public std::unary_function<const llvm::StringRef, uint32_t> {
61   uint32_t operator()(const llvm::StringRef pKey) const {
62     uint32_t hash_val = 1315423911;
63 
64     for (unsigned int i = 0; i < pKey.size(); ++i) {
65       hash_val ^= ((hash_val << 5) + pKey[i] + (hash_val >> 2));
66     }
67     return hash_val;
68   }
69 };
70 
71 /** \class StringHash<PJW>
72  *  \brief P.J. Weinberger hash function
73  */
74 template <>
75 struct StringHash<PJW>
76     : public std::unary_function<const llvm::StringRef, uint32_t> {
77   uint32_t operator()(const llvm::StringRef pKey) const {
78     const unsigned int BitsInUnsignedInt =
79         (unsigned int)(sizeof(unsigned int) * 8);
80     const unsigned int ThreeQuarters =
81         (unsigned int)((BitsInUnsignedInt * 3) / 4);
82     const unsigned int OneEighth = (unsigned int)(BitsInUnsignedInt / 8);
83     const unsigned int HighBits = (unsigned int)(0xFFFFFFFF)
84                                   << (BitsInUnsignedInt - OneEighth);
85     uint32_t hash_val = 0;
86     uint32_t test = 0;
87 
88     for (unsigned int i = 0; i < pKey.size(); ++i) {
89       hash_val = (hash_val << OneEighth) + pKey[i];
90 
91       if ((test = hash_val & HighBits) != 0) {
92         hash_val = ((hash_val ^ (test >> ThreeQuarters)) & (~HighBits));
93       }
94     }
95     return hash_val;
96   }
97 };
98 
99 /** \class StringHash<ELF>
100  *  \brief ELF hash function.
101  */
102 template <>
103 struct StringHash<ELF>
104     : public std::unary_function<const llvm::StringRef, uint32_t> {
105   uint32_t operator()(const llvm::StringRef pKey) const {
106     uint32_t hash_val = 0;
107     uint32_t x = 0;
108 
109     for (unsigned int i = 0; i < pKey.size(); ++i) {
110       hash_val = (hash_val << 4) + pKey[i];
111       if ((x = hash_val & 0xF0000000L) != 0)
112         hash_val ^= (x >> 24);
113       hash_val &= ~x;
114     }
115     return hash_val;
116   }
117 };
118 
119 /** \class StringHash<BKDR>
120  *  \brief BKDR hash function
121  */
122 template <>
123 struct StringHash<BKDR>
124     : public std::unary_function<const llvm::StringRef, uint32_t> {
125   uint32_t operator()(const llvm::StringRef pKey) const {
126     const uint32_t seed = 131;
127     uint32_t hash_val = 0;
128 
129     for (uint32_t i = 0; i < pKey.size(); ++i)
130       hash_val = (hash_val * seed) + pKey[i];
131     return hash_val;
132   }
133 };
134 
135 /** \class StringHash<SDBM>
136  *  \brief SDBM hash function
137  *  0.049s in 100000 test
138  */
139 template <>
140 struct StringHash<SDBM>
141     : public std::unary_function<const llvm::StringRef, uint32_t> {
142   uint32_t operator()(const llvm::StringRef pKey) const {
143     uint32_t hash_val = 0;
144 
145     for (uint32_t i = 0; i < pKey.size(); ++i)
146       hash_val = pKey[i] + (hash_val << 6) + (hash_val << 16) - hash_val;
147     return hash_val;
148   }
149 };
150 
151 /** \class StringHash<DJB>
152  *  \brief DJB hash function
153  *  0.057s in 100000 test
154  */
155 template <>
156 struct StringHash<DJB>
157     : public std::unary_function<const llvm::StringRef, uint32_t> {
158   uint32_t operator()(const llvm::StringRef pKey) const {
159     uint32_t hash_val = 5381;
160 
161     for (uint32_t i = 0; i < pKey.size(); ++i)
162       hash_val = ((hash_val << 5) + hash_val) + pKey[i];
163 
164     return hash_val;
165   }
166 };
167 
168 /** \class StringHash<DEK>
169  *  \brief DEK hash function
170  *  0.60s
171  */
172 template <>
173 struct StringHash<DEK>
174     : public std::unary_function<const llvm::StringRef, uint32_t> {
175   uint32_t operator()(const llvm::StringRef pKey) const {
176     uint32_t hash_val = pKey.size();
177 
178     for (uint32_t i = 0; i < pKey.size(); ++i)
179       hash_val = ((hash_val << 5) ^ (hash_val >> 27)) ^ pKey[i];
180 
181     return hash_val;
182   }
183 };
184 
185 /** \class StringHash<BP>
186  *  \brief BP hash function
187  *  0.057s
188  */
189 template <>
190 struct StringHash<BP>
191     : public std::unary_function<const llvm::StringRef, uint32_t> {
192   uint32_t operator()(const llvm::StringRef pKey) const {
193     uint32_t hash_val = 0;
194     for (uint32_t i = 0; i < pKey.size(); ++i)
195       hash_val = hash_val << 7 ^ pKey[i];
196 
197     return hash_val;
198   }
199 };
200 
201 /** \class StringHash<FNV>
202  *  \brief FNV hash function
203  *  0.058s
204  */
205 template <>
206 struct StringHash<FNV>
207     : public std::unary_function<const llvm::StringRef, uint32_t> {
208   uint32_t operator()(const llvm::StringRef pKey) const {
209     const uint32_t fnv_prime = 0x811C9DC5;
210     uint32_t hash_val = 0;
211     for (uint32_t i = 0; i < pKey.size(); ++i) {
212       hash_val *= fnv_prime;
213       hash_val ^= pKey[i];
214     }
215 
216     return hash_val;
217   }
218 };
219 
220 /** \class StringHash<AP>
221  *  \brief AP hash function
222  *  0.060s
223  */
224 template <>
225 struct StringHash<AP>
226     : public std::unary_function<const llvm::StringRef, uint32_t> {
227   uint32_t operator()(const llvm::StringRef pKey) const {
228     unsigned int hash_val = 0xAAAAAAAA;
229 
230     for (uint32_t i = 0; i < pKey.size(); ++i) {
231       hash_val ^= ((i & 1) == 0)
232                       ? ((hash_val << 7) ^ pKey[i] * (hash_val >> 3))
233                       : (~((hash_val << 11) + (pKey[i] ^ (hash_val >> 5))));
234     }
235 
236     return hash_val;
237   }
238 };
239 
240 /** \class StringHash<ES>
241  *  \brief This is a revision of Edward Sayers' string characteristic function.
242  *
243  *  31-28  27  26  25   -   0
244  *  +----+---+---+------------+
245  *  | .  | N | - | a/A  ~ z/Z |
246  *  +----+---+---+------------+
247  *
248  *  . (bit 31~28) - The number of '.' characters
249  *  N (bit 27)    - Are there any numbers in the string
250  *  - (bit 26)    - Does the string have '-' character
251  *  bit 25~0      - Bit 25 is set only if the string contains a 'a' or 'A', and
252  *                  Bit 24 is set only if ...                   'b' or 'B', ...
253  */
254 template <>
255 struct StringHash<ES>
256     : public std::unary_function<const llvm::StringRef, uint32_t> {
257   uint32_t operator()(const llvm::StringRef pString) const {
258     uint32_t result = 0x0;
259     unsigned int dot = 0;
260     std::string::size_type idx;
261     for (idx = 0; idx < pString.size(); ++idx) {
262       int cur_char = pString[idx];
263 
264       if ('.' == cur_char) {
265         ++dot;
266         continue;
267       }
268 
269       if (isdigit(cur_char)) {
270         result |= (1 << 27);
271         continue;
272       }
273 
274       if ('_' == cur_char) {
275         result |= (1 << 26);
276         continue;
277       }
278 
279       if (isupper(cur_char)) {
280         result |= (1 << (cur_char - 'A'));
281         continue;
282       }
283 
284       if (islower(cur_char)) {
285         result |= (1 << (cur_char - 'a'));
286         continue;
287       }
288     }
289     result |= (dot << 28);
290     return result;
291   }
292 
293   /** \func may_include
294    *  \brief is it possible that pRule is a sub-string of pInString
295    */
296   static bool may_include(uint32_t pRule, uint32_t pInString) {
297     uint32_t in_c = pInString << 4;
298     uint32_t r_c = pRule << 4;
299 
300     uint32_t res = (in_c ^ r_c) & r_c;
301     if (0 != res)
302       return false;
303 
304     uint32_t in_dot = pInString >> 28;
305     uint32_t r_dot = pRule >> 28;
306     if (r_dot > in_dot)
307       return false;
308 
309     return true;
310   }
311 };
312 
313 /** \class template<uint32_t TYPE> StringCompare
314  *  \brief the template StringCompare class, for specification
315  */
316 template <typename STRING_TYPE>
317 struct StringCompare : public std::binary_function<const STRING_TYPE&,
318                                                    const STRING_TYPE&,
319                                                    bool> {
320   bool operator()(const STRING_TYPE& X, const STRING_TYPE& Y) const {
321     return X == Y;
322   }
323 };
324 
325 template <>
326 struct StringCompare<const char*>
327     : public std::binary_function<const char*, const char*, bool> {
328   bool operator()(const char* X, const char* Y) const {
329     return (std::strcmp(X, Y) == 0);
330   }
331 };
332 
333 template <>
334 struct StringCompare<char*>
335     : public std::binary_function<const char*, const char*, bool> {
336   bool operator()(const char* X, const char* Y) const {
337     return (std::strcmp(X, Y) == 0);
338   }
339 };
340 
341 }  // namespace hash
342 }  // namespace mcld
343 
344 #endif  // MCLD_ADT_STRINGHASH_H_
345