1 //===- StringHash.h -------------------------------------------------------===// 2 // 3 // The MCLinker Project 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 #ifndef MCLD_ADT_STRINGHASH_H_ 10 #define MCLD_ADT_STRINGHASH_H_ 11 12 #include <llvm/ADT/StringRef.h> 13 #include <llvm/Support/DataTypes.h> 14 15 #include <cassert> 16 #include <cctype> 17 #include <functional> 18 19 namespace mcld { 20 namespace hash { 21 22 enum Type { RS, JS, PJW, ELF, BKDR, SDBM, DJB, DEK, BP, FNV, AP, ES }; 23 24 /** \class template<uint32_t TYPE> StringHash 25 * \brief the template StringHash class, for specification 26 */ 27 template <uint32_t TYPE> 28 struct StringHash 29 : public std::unary_function<const llvm::StringRef, uint32_t> { operatorStringHash30 uint32_t operator()(const llvm::StringRef pKey) const { 31 assert(false && "Undefined StringHash function.\n"); 32 return 0; 33 } 34 }; 35 36 /** \class StringHash<RSHash> 37 * \brief RS StringHash funciton 38 */ 39 template <> 40 struct StringHash<RS> 41 : public std::unary_function<const llvm::StringRef, uint32_t> { 42 uint32_t operator()(const llvm::StringRef pKey) const { 43 const unsigned int b = 378551; 44 uint32_t a = 63689; 45 uint32_t hash_val = 0; 46 47 for (unsigned int i = 0; i < pKey.size(); ++i) { 48 hash_val = hash_val * a + pKey[i]; 49 a = a * b; 50 } 51 return hash_val; 52 } 53 }; 54 55 /** \class StringHash<JSHash> 56 * \brief JS hash funciton 57 */ 58 template <> 59 struct StringHash<JS> 60 : public std::unary_function<const llvm::StringRef, uint32_t> { 61 uint32_t operator()(const llvm::StringRef pKey) const { 62 uint32_t hash_val = 1315423911; 63 64 for (unsigned int i = 0; i < pKey.size(); ++i) { 65 hash_val ^= ((hash_val << 5) + pKey[i] + (hash_val >> 2)); 66 } 67 return hash_val; 68 } 69 }; 70 71 /** \class StringHash<PJW> 72 * \brief P.J. Weinberger hash function 73 */ 74 template <> 75 struct StringHash<PJW> 76 : public std::unary_function<const llvm::StringRef, uint32_t> { 77 uint32_t operator()(const llvm::StringRef pKey) const { 78 const unsigned int BitsInUnsignedInt = 79 (unsigned int)(sizeof(unsigned int) * 8); 80 const unsigned int ThreeQuarters = 81 (unsigned int)((BitsInUnsignedInt * 3) / 4); 82 const unsigned int OneEighth = (unsigned int)(BitsInUnsignedInt / 8); 83 const unsigned int HighBits = (unsigned int)(0xFFFFFFFF) 84 << (BitsInUnsignedInt - OneEighth); 85 uint32_t hash_val = 0; 86 uint32_t test = 0; 87 88 for (unsigned int i = 0; i < pKey.size(); ++i) { 89 hash_val = (hash_val << OneEighth) + pKey[i]; 90 91 if ((test = hash_val & HighBits) != 0) { 92 hash_val = ((hash_val ^ (test >> ThreeQuarters)) & (~HighBits)); 93 } 94 } 95 return hash_val; 96 } 97 }; 98 99 /** \class StringHash<ELF> 100 * \brief ELF hash function. 101 */ 102 template <> 103 struct StringHash<ELF> 104 : public std::unary_function<const llvm::StringRef, uint32_t> { 105 uint32_t operator()(const llvm::StringRef pKey) const { 106 uint32_t hash_val = 0; 107 uint32_t x = 0; 108 109 for (unsigned int i = 0; i < pKey.size(); ++i) { 110 hash_val = (hash_val << 4) + pKey[i]; 111 if ((x = hash_val & 0xF0000000L) != 0) 112 hash_val ^= (x >> 24); 113 hash_val &= ~x; 114 } 115 return hash_val; 116 } 117 }; 118 119 /** \class StringHash<BKDR> 120 * \brief BKDR hash function 121 */ 122 template <> 123 struct StringHash<BKDR> 124 : public std::unary_function<const llvm::StringRef, uint32_t> { 125 uint32_t operator()(const llvm::StringRef pKey) const { 126 const uint32_t seed = 131; 127 uint32_t hash_val = 0; 128 129 for (uint32_t i = 0; i < pKey.size(); ++i) 130 hash_val = (hash_val * seed) + pKey[i]; 131 return hash_val; 132 } 133 }; 134 135 /** \class StringHash<SDBM> 136 * \brief SDBM hash function 137 * 0.049s in 100000 test 138 */ 139 template <> 140 struct StringHash<SDBM> 141 : public std::unary_function<const llvm::StringRef, uint32_t> { 142 uint32_t operator()(const llvm::StringRef pKey) const { 143 uint32_t hash_val = 0; 144 145 for (uint32_t i = 0; i < pKey.size(); ++i) 146 hash_val = pKey[i] + (hash_val << 6) + (hash_val << 16) - hash_val; 147 return hash_val; 148 } 149 }; 150 151 /** \class StringHash<DJB> 152 * \brief DJB hash function 153 * 0.057s in 100000 test 154 */ 155 template <> 156 struct StringHash<DJB> 157 : public std::unary_function<const llvm::StringRef, uint32_t> { 158 uint32_t operator()(const llvm::StringRef pKey) const { 159 uint32_t hash_val = 5381; 160 161 for (uint32_t i = 0; i < pKey.size(); ++i) 162 hash_val = ((hash_val << 5) + hash_val) + pKey[i]; 163 164 return hash_val; 165 } 166 }; 167 168 /** \class StringHash<DEK> 169 * \brief DEK hash function 170 * 0.60s 171 */ 172 template <> 173 struct StringHash<DEK> 174 : public std::unary_function<const llvm::StringRef, uint32_t> { 175 uint32_t operator()(const llvm::StringRef pKey) const { 176 uint32_t hash_val = pKey.size(); 177 178 for (uint32_t i = 0; i < pKey.size(); ++i) 179 hash_val = ((hash_val << 5) ^ (hash_val >> 27)) ^ pKey[i]; 180 181 return hash_val; 182 } 183 }; 184 185 /** \class StringHash<BP> 186 * \brief BP hash function 187 * 0.057s 188 */ 189 template <> 190 struct StringHash<BP> 191 : public std::unary_function<const llvm::StringRef, uint32_t> { 192 uint32_t operator()(const llvm::StringRef pKey) const { 193 uint32_t hash_val = 0; 194 for (uint32_t i = 0; i < pKey.size(); ++i) 195 hash_val = hash_val << 7 ^ pKey[i]; 196 197 return hash_val; 198 } 199 }; 200 201 /** \class StringHash<FNV> 202 * \brief FNV hash function 203 * 0.058s 204 */ 205 template <> 206 struct StringHash<FNV> 207 : public std::unary_function<const llvm::StringRef, uint32_t> { 208 uint32_t operator()(const llvm::StringRef pKey) const { 209 const uint32_t fnv_prime = 0x811C9DC5; 210 uint32_t hash_val = 0; 211 for (uint32_t i = 0; i < pKey.size(); ++i) { 212 hash_val *= fnv_prime; 213 hash_val ^= pKey[i]; 214 } 215 216 return hash_val; 217 } 218 }; 219 220 /** \class StringHash<AP> 221 * \brief AP hash function 222 * 0.060s 223 */ 224 template <> 225 struct StringHash<AP> 226 : public std::unary_function<const llvm::StringRef, uint32_t> { 227 uint32_t operator()(const llvm::StringRef pKey) const { 228 unsigned int hash_val = 0xAAAAAAAA; 229 230 for (uint32_t i = 0; i < pKey.size(); ++i) { 231 hash_val ^= ((i & 1) == 0) 232 ? ((hash_val << 7) ^ pKey[i] * (hash_val >> 3)) 233 : (~((hash_val << 11) + (pKey[i] ^ (hash_val >> 5)))); 234 } 235 236 return hash_val; 237 } 238 }; 239 240 /** \class StringHash<ES> 241 * \brief This is a revision of Edward Sayers' string characteristic function. 242 * 243 * 31-28 27 26 25 - 0 244 * +----+---+---+------------+ 245 * | . | N | - | a/A ~ z/Z | 246 * +----+---+---+------------+ 247 * 248 * . (bit 31~28) - The number of '.' characters 249 * N (bit 27) - Are there any numbers in the string 250 * - (bit 26) - Does the string have '-' character 251 * bit 25~0 - Bit 25 is set only if the string contains a 'a' or 'A', and 252 * Bit 24 is set only if ... 'b' or 'B', ... 253 */ 254 template <> 255 struct StringHash<ES> 256 : public std::unary_function<const llvm::StringRef, uint32_t> { 257 uint32_t operator()(const llvm::StringRef pString) const { 258 uint32_t result = 0x0; 259 unsigned int dot = 0; 260 std::string::size_type idx; 261 for (idx = 0; idx < pString.size(); ++idx) { 262 int cur_char = pString[idx]; 263 264 if ('.' == cur_char) { 265 ++dot; 266 continue; 267 } 268 269 if (isdigit(cur_char)) { 270 result |= (1 << 27); 271 continue; 272 } 273 274 if ('_' == cur_char) { 275 result |= (1 << 26); 276 continue; 277 } 278 279 if (isupper(cur_char)) { 280 result |= (1 << (cur_char - 'A')); 281 continue; 282 } 283 284 if (islower(cur_char)) { 285 result |= (1 << (cur_char - 'a')); 286 continue; 287 } 288 } 289 result |= (dot << 28); 290 return result; 291 } 292 293 /** \func may_include 294 * \brief is it possible that pRule is a sub-string of pInString 295 */ 296 static bool may_include(uint32_t pRule, uint32_t pInString) { 297 uint32_t in_c = pInString << 4; 298 uint32_t r_c = pRule << 4; 299 300 uint32_t res = (in_c ^ r_c) & r_c; 301 if (0 != res) 302 return false; 303 304 uint32_t in_dot = pInString >> 28; 305 uint32_t r_dot = pRule >> 28; 306 if (r_dot > in_dot) 307 return false; 308 309 return true; 310 } 311 }; 312 313 /** \class template<uint32_t TYPE> StringCompare 314 * \brief the template StringCompare class, for specification 315 */ 316 template <typename STRING_TYPE> 317 struct StringCompare : public std::binary_function<const STRING_TYPE&, 318 const STRING_TYPE&, 319 bool> { 320 bool operator()(const STRING_TYPE& X, const STRING_TYPE& Y) const { 321 return X == Y; 322 } 323 }; 324 325 template <> 326 struct StringCompare<const char*> 327 : public std::binary_function<const char*, const char*, bool> { 328 bool operator()(const char* X, const char* Y) const { 329 return (std::strcmp(X, Y) == 0); 330 } 331 }; 332 333 template <> 334 struct StringCompare<char*> 335 : public std::binary_function<const char*, const char*, bool> { 336 bool operator()(const char* X, const char* Y) const { 337 return (std::strcmp(X, Y) == 0); 338 } 339 }; 340 341 } // namespace hash 342 } // namespace mcld 343 344 #endif // MCLD_ADT_STRINGHASH_H_ 345