1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "util/hash/hash.h"
18 
19 #include "utils/base/macros.h"
20 
21 namespace libtextclassifier2 {
22 
23 namespace {
24 // Lower-level versions of Get... that read directly from a character buffer
25 // without any bounds checking.
DecodeFixed32(const char * ptr)26 inline uint32 DecodeFixed32(const char *ptr) {
27   return ((static_cast<uint32>(static_cast<unsigned char>(ptr[0]))) |
28           (static_cast<uint32>(static_cast<unsigned char>(ptr[1])) << 8) |
29           (static_cast<uint32>(static_cast<unsigned char>(ptr[2])) << 16) |
30           (static_cast<uint32>(static_cast<unsigned char>(ptr[3])) << 24));
31 }
32 
33 // 0xff is in case char is signed.
ByteAs32(char c)34 static inline uint32 ByteAs32(char c) { return static_cast<uint32>(c) & 0xff; }
35 }  // namespace
36 
Hash32(const char * data,size_t n,uint32 seed)37 uint32 Hash32(const char *data, size_t n, uint32 seed) {
38   // 'm' and 'r' are mixing constants generated offline.
39   // They're not really 'magic', they just happen to work well.
40   const uint32 m = 0x5bd1e995;
41   const int r = 24;
42 
43   // Initialize the hash to a 'random' value
44   uint32 h = static_cast<uint32>(seed ^ n);
45 
46   // Mix 4 bytes at a time into the hash
47   while (n >= 4) {
48     uint32 k = DecodeFixed32(data);
49     k *= m;
50     k ^= k >> r;
51     k *= m;
52     h *= m;
53     h ^= k;
54     data += 4;
55     n -= 4;
56   }
57 
58   // Handle the last few bytes of the input array
59   switch (n) {
60     case 3:
61       h ^= ByteAs32(data[2]) << 16;
62       TC3_FALLTHROUGH_INTENDED;
63     case 2:
64       h ^= ByteAs32(data[1]) << 8;
65       TC3_FALLTHROUGH_INTENDED;
66     case 1:
67       h ^= ByteAs32(data[0]);
68       h *= m;
69   }
70 
71   // Do a few final mixes of the hash to ensure the last few
72   // bytes are well-incorporated.
73   h ^= h >> 13;
74   h *= m;
75   h ^= h >> 15;
76   return h;
77 }
78 
79 }  // namespace libtextclassifier2
80