1 // Copyright 2018 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "absl/strings/internal/memutil.h"
16 
17 #include <algorithm>
18 #include <cstdlib>
19 
20 #include "benchmark/benchmark.h"
21 #include "absl/strings/ascii.h"
22 
23 // We fill the haystack with aaaaaaaaaaaaaaaaaa...aaaab.
24 // That gives us:
25 // - an easy search: 'b'
26 // - a medium search: 'ab'.  That means every letter is a possible match.
27 // - a pathological search: 'aaaaaa.......aaaaab' (half as many a's as haytack)
28 // We benchmark case-sensitive and case-insensitive versions of
29 // three memmem implementations:
30 // - memmem() from memutil.h
31 // - search() from STL
32 // - memmatch(), a custom implementation using memchr and memcmp.
33 // Here are sample results:
34 //
35 // Run on (12 X 3800 MHz CPU s)
36 // CPU Caches:
37 //   L1 Data 32K (x6)
38 //   L1 Instruction 32K (x6)
39 //   L2 Unified 256K (x6)
40 //   L3 Unified 15360K (x1)
41 // ----------------------------------------------------------------
42 // Benchmark                           Time          CPU Iterations
43 // ----------------------------------------------------------------
44 // BM_Memmem                        3583 ns      3582 ns     196469  2.59966GB/s
45 // BM_MemmemMedium                 13743 ns     13742 ns      50901  693.986MB/s
46 // BM_MemmemPathological        13695030 ns  13693977 ns         51  713.133kB/s
47 // BM_Memcasemem                    3299 ns      3299 ns     212942  2.82309GB/s
48 // BM_MemcasememMedium             16407 ns     16406 ns      42170  581.309MB/s
49 // BM_MemcasememPathological    17267745 ns  17266030 ns         41  565.598kB/s
50 // BM_Search                        1610 ns      1609 ns     431321  5.78672GB/s
51 // BM_SearchMedium                 11111 ns     11110 ns      63001  858.414MB/s
52 // BM_SearchPathological        12117390 ns  12116397 ns         58  805.984kB/s
53 // BM_Searchcase                    3081 ns      3081 ns     229949  3.02313GB/s
54 // BM_SearchcaseMedium             16003 ns     16001 ns      44170  595.998MB/s
55 // BM_SearchcasePathological    15823413 ns  15821909 ns         44  617.222kB/s
56 // BM_Memmatch                       197 ns       197 ns    3584225  47.2951GB/s
57 // BM_MemmatchMedium               52333 ns     52329 ns      13280  182.244MB/s
58 // BM_MemmatchPathological        659799 ns    659727 ns       1058  14.4556MB/s
59 // BM_Memcasematch                  5460 ns      5460 ns     127606  1.70586GB/s
60 // BM_MemcasematchMedium           32861 ns     32857 ns      21258  290.248MB/s
61 // BM_MemcasematchPathological  15154243 ns  15153089 ns         46  644.464kB/s
62 // BM_MemmemStartup                    5 ns         5 ns  150821500
63 // BM_SearchStartup                    5 ns         5 ns  150644203
64 // BM_MemmatchStartup                  7 ns         7 ns   97068802
65 //
66 // Conclusions:
67 //
68 // The following recommendations are based on the sample results above. However,
69 // we have found that the performance of STL search can vary significantly
70 // depending on compiler and standard library implementation. We recommend you
71 // run the benchmarks for yourself on relevant platforms.
72 //
73 // If you need case-insensitive, STL search is slightly better than memmem for
74 // all cases.
75 //
76 // Case-sensitive is more subtle:
77 // Custom memmatch is _very_ fast at scanning, so if you have very few possible
78 // matches in your haystack, that's the way to go. Performance drops
79 // significantly with more matches.
80 //
81 // STL search is slightly faster than memmem in the medium and pathological
82 // benchmarks. However, the performance of memmem is currently more dependable
83 // across platforms and build configurations.
84 
85 namespace {
86 
87 constexpr int kHaystackSize = 10000;
88 constexpr int64_t kHaystackSize64 = kHaystackSize;
MakeHaystack()89 const char* MakeHaystack() {
90   char* haystack = new char[kHaystackSize];
91   for (int i = 0; i < kHaystackSize - 1; ++i) haystack[i] = 'a';
92   haystack[kHaystackSize - 1] = 'b';
93   return haystack;
94 }
95 const char* const kHaystack = MakeHaystack();
96 
BM_Memmem(benchmark::State & state)97 void BM_Memmem(benchmark::State& state) {
98   for (auto _ : state) {
99     benchmark::DoNotOptimize(
100         absl::strings_internal::memmem(kHaystack, kHaystackSize, "b", 1));
101   }
102   state.SetBytesProcessed(kHaystackSize64 * state.iterations());
103 }
104 BENCHMARK(BM_Memmem);
105 
BM_MemmemMedium(benchmark::State & state)106 void BM_MemmemMedium(benchmark::State& state) {
107   for (auto _ : state) {
108     benchmark::DoNotOptimize(
109         absl::strings_internal::memmem(kHaystack, kHaystackSize, "ab", 2));
110   }
111   state.SetBytesProcessed(kHaystackSize64 * state.iterations());
112 }
113 BENCHMARK(BM_MemmemMedium);
114 
BM_MemmemPathological(benchmark::State & state)115 void BM_MemmemPathological(benchmark::State& state) {
116   for (auto _ : state) {
117     benchmark::DoNotOptimize(absl::strings_internal::memmem(
118         kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2,
119         kHaystackSize - kHaystackSize / 2));
120   }
121   state.SetBytesProcessed(kHaystackSize64 * state.iterations());
122 }
123 BENCHMARK(BM_MemmemPathological);
124 
BM_Memcasemem(benchmark::State & state)125 void BM_Memcasemem(benchmark::State& state) {
126   for (auto _ : state) {
127     benchmark::DoNotOptimize(
128         absl::strings_internal::memcasemem(kHaystack, kHaystackSize, "b", 1));
129   }
130   state.SetBytesProcessed(kHaystackSize64 * state.iterations());
131 }
132 BENCHMARK(BM_Memcasemem);
133 
BM_MemcasememMedium(benchmark::State & state)134 void BM_MemcasememMedium(benchmark::State& state) {
135   for (auto _ : state) {
136     benchmark::DoNotOptimize(
137         absl::strings_internal::memcasemem(kHaystack, kHaystackSize, "ab", 2));
138   }
139   state.SetBytesProcessed(kHaystackSize64 * state.iterations());
140 }
141 BENCHMARK(BM_MemcasememMedium);
142 
BM_MemcasememPathological(benchmark::State & state)143 void BM_MemcasememPathological(benchmark::State& state) {
144   for (auto _ : state) {
145     benchmark::DoNotOptimize(absl::strings_internal::memcasemem(
146         kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2,
147         kHaystackSize - kHaystackSize / 2));
148   }
149   state.SetBytesProcessed(kHaystackSize64 * state.iterations());
150 }
151 BENCHMARK(BM_MemcasememPathological);
152 
case_eq(const char a,const char b)153 bool case_eq(const char a, const char b) {
154   return absl::ascii_tolower(a) == absl::ascii_tolower(b);
155 }
156 
BM_Search(benchmark::State & state)157 void BM_Search(benchmark::State& state) {
158   for (auto _ : state) {
159     benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
160                                          kHaystack + kHaystackSize - 1,
161                                          kHaystack + kHaystackSize));
162   }
163   state.SetBytesProcessed(kHaystackSize64 * state.iterations());
164 }
165 BENCHMARK(BM_Search);
166 
BM_SearchMedium(benchmark::State & state)167 void BM_SearchMedium(benchmark::State& state) {
168   for (auto _ : state) {
169     benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
170                                          kHaystack + kHaystackSize - 2,
171                                          kHaystack + kHaystackSize));
172   }
173   state.SetBytesProcessed(kHaystackSize64 * state.iterations());
174 }
175 BENCHMARK(BM_SearchMedium);
176 
BM_SearchPathological(benchmark::State & state)177 void BM_SearchPathological(benchmark::State& state) {
178   for (auto _ : state) {
179     benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
180                                          kHaystack + kHaystackSize / 2,
181                                          kHaystack + kHaystackSize));
182   }
183   state.SetBytesProcessed(kHaystackSize64 * state.iterations());
184 }
185 BENCHMARK(BM_SearchPathological);
186 
BM_Searchcase(benchmark::State & state)187 void BM_Searchcase(benchmark::State& state) {
188   for (auto _ : state) {
189     benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
190                                          kHaystack + kHaystackSize - 1,
191                                          kHaystack + kHaystackSize, case_eq));
192   }
193   state.SetBytesProcessed(kHaystackSize64 * state.iterations());
194 }
195 BENCHMARK(BM_Searchcase);
196 
BM_SearchcaseMedium(benchmark::State & state)197 void BM_SearchcaseMedium(benchmark::State& state) {
198   for (auto _ : state) {
199     benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
200                                          kHaystack + kHaystackSize - 2,
201                                          kHaystack + kHaystackSize, case_eq));
202   }
203   state.SetBytesProcessed(kHaystackSize64 * state.iterations());
204 }
205 BENCHMARK(BM_SearchcaseMedium);
206 
BM_SearchcasePathological(benchmark::State & state)207 void BM_SearchcasePathological(benchmark::State& state) {
208   for (auto _ : state) {
209     benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
210                                          kHaystack + kHaystackSize / 2,
211                                          kHaystack + kHaystackSize, case_eq));
212   }
213   state.SetBytesProcessed(kHaystackSize64 * state.iterations());
214 }
215 BENCHMARK(BM_SearchcasePathological);
216 
memcasechr(const char * s,int c,size_t slen)217 char* memcasechr(const char* s, int c, size_t slen) {
218   c = absl::ascii_tolower(c);
219   for (; slen; ++s, --slen) {
220     if (absl::ascii_tolower(*s) == c) return const_cast<char*>(s);
221   }
222   return nullptr;
223 }
224 
memcasematch(const char * phaystack,size_t haylen,const char * pneedle,size_t neelen)225 const char* memcasematch(const char* phaystack, size_t haylen,
226                          const char* pneedle, size_t neelen) {
227   if (0 == neelen) {
228     return phaystack;  // even if haylen is 0
229   }
230   if (haylen < neelen) return nullptr;
231 
232   const char* match;
233   const char* hayend = phaystack + haylen - neelen + 1;
234   while ((match = static_cast<char*>(
235               memcasechr(phaystack, pneedle[0], hayend - phaystack)))) {
236     if (absl::strings_internal::memcasecmp(match, pneedle, neelen) == 0)
237       return match;
238     else
239       phaystack = match + 1;
240   }
241   return nullptr;
242 }
243 
BM_Memmatch(benchmark::State & state)244 void BM_Memmatch(benchmark::State& state) {
245   for (auto _ : state) {
246     benchmark::DoNotOptimize(
247         absl::strings_internal::memmatch(kHaystack, kHaystackSize, "b", 1));
248   }
249   state.SetBytesProcessed(kHaystackSize64 * state.iterations());
250 }
251 BENCHMARK(BM_Memmatch);
252 
BM_MemmatchMedium(benchmark::State & state)253 void BM_MemmatchMedium(benchmark::State& state) {
254   for (auto _ : state) {
255     benchmark::DoNotOptimize(
256         absl::strings_internal::memmatch(kHaystack, kHaystackSize, "ab", 2));
257   }
258   state.SetBytesProcessed(kHaystackSize64 * state.iterations());
259 }
260 BENCHMARK(BM_MemmatchMedium);
261 
BM_MemmatchPathological(benchmark::State & state)262 void BM_MemmatchPathological(benchmark::State& state) {
263   for (auto _ : state) {
264     benchmark::DoNotOptimize(absl::strings_internal::memmatch(
265         kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2,
266         kHaystackSize - kHaystackSize / 2));
267   }
268   state.SetBytesProcessed(kHaystackSize64 * state.iterations());
269 }
270 BENCHMARK(BM_MemmatchPathological);
271 
BM_Memcasematch(benchmark::State & state)272 void BM_Memcasematch(benchmark::State& state) {
273   for (auto _ : state) {
274     benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize, "b", 1));
275   }
276   state.SetBytesProcessed(kHaystackSize64 * state.iterations());
277 }
278 BENCHMARK(BM_Memcasematch);
279 
BM_MemcasematchMedium(benchmark::State & state)280 void BM_MemcasematchMedium(benchmark::State& state) {
281   for (auto _ : state) {
282     benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize, "ab", 2));
283   }
284   state.SetBytesProcessed(kHaystackSize64 * state.iterations());
285 }
286 BENCHMARK(BM_MemcasematchMedium);
287 
BM_MemcasematchPathological(benchmark::State & state)288 void BM_MemcasematchPathological(benchmark::State& state) {
289   for (auto _ : state) {
290     benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize,
291                                           kHaystack + kHaystackSize / 2,
292                                           kHaystackSize - kHaystackSize / 2));
293   }
294   state.SetBytesProcessed(kHaystackSize64 * state.iterations());
295 }
296 BENCHMARK(BM_MemcasematchPathological);
297 
BM_MemmemStartup(benchmark::State & state)298 void BM_MemmemStartup(benchmark::State& state) {
299   for (auto _ : state) {
300     benchmark::DoNotOptimize(absl::strings_internal::memmem(
301         kHaystack + kHaystackSize - 10, 10, kHaystack + kHaystackSize - 1, 1));
302   }
303 }
304 BENCHMARK(BM_MemmemStartup);
305 
BM_SearchStartup(benchmark::State & state)306 void BM_SearchStartup(benchmark::State& state) {
307   for (auto _ : state) {
308     benchmark::DoNotOptimize(
309         std::search(kHaystack + kHaystackSize - 10, kHaystack + kHaystackSize,
310                     kHaystack + kHaystackSize - 1, kHaystack + kHaystackSize));
311   }
312 }
313 BENCHMARK(BM_SearchStartup);
314 
BM_MemmatchStartup(benchmark::State & state)315 void BM_MemmatchStartup(benchmark::State& state) {
316   for (auto _ : state) {
317     benchmark::DoNotOptimize(absl::strings_internal::memmatch(
318         kHaystack + kHaystackSize - 10, 10, kHaystack + kHaystackSize - 1, 1));
319   }
320 }
321 BENCHMARK(BM_MemmatchStartup);
322 
323 }  // namespace
324