1 // Copyright 2020 The Pigweed Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4 // use this file except in compliance with the License. You may obtain a copy of
5 // the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 // License for the specific language governing permissions and limitations under
13 // the License.
14 
15 // Configuration macros for the tokenizer module.
16 #pragma once
17 
18 #include <assert.h>
19 
20 // For a tokenized string that has arguments, the types of the arguments are
21 // encoded in either a 4-byte (uint32_t) or a 8-byte (uint64_t) value. The 4 or
22 // 6 least-significant bits, respectively, store the number of arguments, while
23 // the remaining bits encode the argument types. Argument types are encoded
24 // two-bits per argument, in little-endian order. Up to 14 arguments in 4 bytes
25 // or 29 arguments in 8 bytes are supported.
26 #ifndef PW_TOKENIZER_CFG_ARG_TYPES_SIZE_BYTES
27 #define PW_TOKENIZER_CFG_ARG_TYPES_SIZE_BYTES 4
28 #endif  // PW_TOKENIZER_CFG_ARG_TYPES_SIZE_BYTES
29 
30 static_assert(PW_TOKENIZER_CFG_ARG_TYPES_SIZE_BYTES == 4 ||
31                   PW_TOKENIZER_CFG_ARG_TYPES_SIZE_BYTES == 8,
32               "PW_TOKENIZER_CFG_ARG_TYPES_SIZE_BYTES must be 4 or 8");
33 
34 // Maximum number of characters to hash in C. In C code, strings shorter than
35 // this length are treated as if they were zero-padded up to the length. Strings
36 // that are the same length and share a common prefix longer than this value
37 // hash to the same value. Increasing PW_TOKENIZER_CFG_C_HASH_LENGTH increases
38 // the compilation time for C due to the complexity of the hashing macros.
39 //
40 // PW_TOKENIZER_CFG_C_HASH_LENGTH has no effect on C++ code. In C++, hashing is
41 // done with a constexpr function instead of a macro. There are no string length
42 // limitations and compilation times are unaffected by this macro.
43 //
44 // Only hash lengths for which there is a corresponding macro header
45 // (pw_tokenizer/internal/mash_macro_#.h) are supported. Additional macros may
46 // be generated with the generate_hash_macro.py function. New macro headers must
47 // then be added to pw_tokenizer/internal/hash.h.
48 //
49 // This MUST match the value of DEFAULT_C_HASH_LENGTH in
50 // pw_tokenizer/py/pw_tokenizer/tokens.py.
51 #ifndef PW_TOKENIZER_CFG_C_HASH_LENGTH
52 #define PW_TOKENIZER_CFG_C_HASH_LENGTH 128
53 #endif  // PW_TOKENIZER_CFG_C_HASH_LENGTH
54 
55 // The size of the stack-allocated argument encoding buffer to use. This only
56 // affects tokenization macros that stack-allocate the encoding buffer
57 // (PW_TOKENIZE_TO_CALLBACK and PW_TOKENIZE_TO_GLOBAL_HANDLER). A buffer of this
58 // size is allocated and used for the 4-byte token and for encoding all
59 // arguments. It must be at least large enough for the token (4 bytes).
60 //
61 // This buffer does not need to be large to accommodate a good number of
62 // tokenized string arguments. Integer arguments are usually encoded smaller
63 // than their native size (e.g. 1 or 2 bytes for smaller numbers). All floating
64 // point types are encoded as four bytes. Null-terminated strings are encoded
65 // 1:1 in size.
66 #ifndef PW_TOKENIZER_CFG_ENCODING_BUFFER_SIZE_BYTES
67 #define PW_TOKENIZER_CFG_ENCODING_BUFFER_SIZE_BYTES 52
68 #endif  // PW_TOKENIZER_CFG_ENCODING_BUFFER_SIZE_BYTES
69