1 // Copyright 2020 The Pigweed Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4 // use this file except in compliance with the License. You may obtain a copy of
5 // the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 // License for the specific language governing permissions and limitations under
13 // the License.
14 #pragma once
15 
16 #ifdef __cplusplus
17 
18 #include <cstddef>
19 #include <cstdint>
20 
21 #else
22 
23 #include <assert.h>
24 #include <stddef.h>
25 #include <stdint.h>
26 
27 #endif  // __cplusplus
28 
29 #include "pw_preprocessor/arguments.h"
30 #include "pw_preprocessor/compiler.h"
31 #include "pw_preprocessor/concat.h"
32 #include "pw_preprocessor/util.h"
33 #include "pw_tokenizer/internal/argument_types.h"
34 #include "pw_tokenizer/internal/tokenize_string.h"
35 
36 // The type of the token used in place of a format string. Also available as
37 // pw::tokenizer::Token.
38 typedef uint32_t pw_tokenizer_Token;
39 
40 // Strings may optionally be tokenized to a domain. Strings in different domains
41 // can be processed separately by the token database tools. Each domain in use
42 // must have a corresponding section declared in the linker script. See
43 // pw_tokenizer_linker_sections.ld for more details.
44 //
45 // The default domain is an empty string.
46 #define PW_TOKENIZER_DEFAULT_DOMAIN ""
47 
48 // Tokenizes a string and converts it to a pw_tokenizer_Token. In C++, the
49 // string may be a literal or a constexpr char array. In C, the argument must be
50 // a string literal. In either case, the string must be null terminated, but may
51 // contain any characters (including '\0').
52 //
53 // This expression can be assigned to a local or global variable, but cannot be
54 // used in another expression. For example:
55 //
56 //   constexpr uint32_t global = PW_TOKENIZE_STRING("Wow!");  // This works.
57 //
58 //   void SomeFunction() {
59 //     constexpr uint32_t token = PW_TOKENIZE_STRING("Cool!");  // This works.
60 //
61 //     DoSomethingElse(PW_TOKENIZE_STRING("Lame!"));  // This does NOT work.
62 //   }
63 //
64 #define PW_TOKENIZE_STRING(string_literal) \
65   PW_TOKENIZE_STRING_DOMAIN(PW_TOKENIZER_DEFAULT_DOMAIN, string_literal)
66 
67 // Same as PW_TOKENIZE_STRING, but tokenizes to the specified domain.
68 #define PW_TOKENIZE_STRING_DOMAIN(domain, string_literal) \
69   PW_TOKENIZE_STRING_MASK(domain, UINT32_MAX, string_literal)
70 
71 // Same as PW_TOKENIZE_STRING_DOMAIN, but applies a mask to the token.
72 #define PW_TOKENIZE_STRING_MASK(domain, mask, string_literal)                \
73   /* assign to a variable */ _PW_TOKENIZER_MASK_TOKEN(mask, string_literal); \
74                                                                              \
75   static_assert(0 < (mask) && (mask) <= UINT32_MAX,                          \
76                 "Tokenizer masks must be non-zero uint32_t values.");        \
77                                                                              \
78   _PW_TOKENIZER_RECORD_ORIGINAL_STRING(                                      \
79       _PW_TOKENIZER_MASK_TOKEN(mask, string_literal), domain, string_literal)
80 
81 #define _PW_TOKENIZER_MASK_TOKEN(mask, string_literal) \
82   ((pw_tokenizer_Token)(mask)&PW_TOKENIZER_STRING_TOKEN(string_literal))
83 
84 // Encodes a tokenized string and arguments to the provided buffer. The size of
85 // the buffer is passed via a pointer to a size_t. After encoding is complete,
86 // the size_t is set to the number of bytes written to the buffer.
87 //
88 // The macro's arguments are equivalent to the following function signature:
89 //
90 //   TokenizeToBuffer(void* buffer,
91 //                    size_t* buffer_size_pointer,
92 //                    const char* format,
93 //                    ...);  /* printf-style arguments */
94 //
95 // For example, the following encodes a tokenized string with a temperature to a
96 // buffer. The buffer is passed to a function to send the message over a UART.
97 //
98 //   uint8_t buffer[32];
99 //   size_t size_bytes = sizeof(buffer);
100 //   PW_TOKENIZE_TO_BUFFER(
101 //       buffer, &size_bytes, "Temperature (C): %0.2f", temperature_c);
102 //   MyProject_EnqueueMessageForUart(buffer, size);
103 //
104 #define PW_TOKENIZE_TO_BUFFER(buffer, buffer_size_pointer, format, ...) \
105   PW_TOKENIZE_TO_BUFFER_DOMAIN(PW_TOKENIZER_DEFAULT_DOMAIN,             \
106                                buffer,                                  \
107                                buffer_size_pointer,                     \
108                                format,                                  \
109                                __VA_ARGS__)
110 
111 // Same as PW_TOKENIZE_TO_BUFFER, but tokenizes to the specified domain.
112 #define PW_TOKENIZE_TO_BUFFER_DOMAIN(                 \
113     domain, buffer, buffer_size_pointer, format, ...) \
114   PW_TOKENIZE_TO_BUFFER_MASK(                         \
115       domain, UINT32_MAX, buffer, buffer_size_pointer, format, __VA_ARGS__)
116 
117 // Same as PW_TOKENIZE_TO_BUFFER_DOMAIN, but applies a mask to the token.
118 #define PW_TOKENIZE_TO_BUFFER_MASK(                               \
119     domain, mask, buffer, buffer_size_pointer, format, ...)       \
120   do {                                                            \
121     PW_TOKENIZE_FORMAT_STRING(domain, mask, format, __VA_ARGS__); \
122     _pw_tokenizer_ToBuffer(buffer,                                \
123                            buffer_size_pointer,                   \
124                            _pw_tokenizer_token,                   \
125                            PW_TOKENIZER_ARG_TYPES(__VA_ARGS__)    \
126                                PW_COMMA_ARGS(__VA_ARGS__));       \
127   } while (0)
128 
129 // Encodes a tokenized string and arguments to a buffer on the stack. The
130 // provided callback is called with the encoded data. The size of the
131 // stack-allocated argument encoding buffer is set with the
132 // PW_TOKENIZER_CFG_ENCODING_BUFFER_SIZE_BYTES option.
133 //
134 // The macro's arguments are equivalent to the following function signature:
135 //
136 //   TokenizeToCallback(void (*callback)(const uint8_t* data, size_t size),
137 //                      const char* format,
138 //                      ...);  /* printf-style arguments */
139 //
140 // For example, the following encodes a tokenized string with a sensor name and
141 // floating point data. The encoded message is passed directly to the
142 // MyProject_EnqueueMessageForUart function, which the caller provides as a
143 // callback.
144 //
145 //   void MyProject_EnqueueMessageForUart(const uint8_t* buffer,
146 //                                        size_t size_bytes) {
147 //     uart_queue_write(uart_instance, buffer, size_bytes);
148 //   }
149 //
150 //   void LogSensorValue(const char* sensor_name, float value) {
151 //     PW_TOKENIZE_TO_CALLBACK(MyProject_EnqueueMessageForUart,
152 //                             "%s: %f",
153 //                             sensor_name,
154 //                             value);
155 //   }
156 //
157 #define PW_TOKENIZE_TO_CALLBACK(callback, format, ...) \
158   PW_TOKENIZE_TO_CALLBACK_DOMAIN(                      \
159       PW_TOKENIZER_DEFAULT_DOMAIN, callback, format, __VA_ARGS__)
160 
161 // Same as PW_TOKENIZE_TO_CALLBACK, but tokenizes to the specified domain.
162 #define PW_TOKENIZE_TO_CALLBACK_DOMAIN(domain, callback, format, ...) \
163   PW_TOKENIZE_TO_CALLBACK_MASK(                                       \
164       domain, UINT32_MAX, callback, format, __VA_ARGS__)
165 
166 // Same as PW_TOKENIZE_TO_CALLBACK_DOMAIN, but applies a mask to the token.
167 #define PW_TOKENIZE_TO_CALLBACK_MASK(domain, mask, callback, format, ...) \
168   do {                                                                    \
169     PW_TOKENIZE_FORMAT_STRING(domain, mask, format, __VA_ARGS__);         \
170     _pw_tokenizer_ToCallback(callback,                                    \
171                              _pw_tokenizer_token,                         \
172                              PW_TOKENIZER_ARG_TYPES(__VA_ARGS__)          \
173                                  PW_COMMA_ARGS(__VA_ARGS__));             \
174   } while (0)
175 
176 PW_EXTERN_C_START
177 
178 // These functions encode the tokenized strings. These should not be called
179 // directly. Instead, use the corresponding PW_TOKENIZE_TO_* macros above.
180 void _pw_tokenizer_ToBuffer(void* buffer,
181                             size_t* buffer_size_bytes,  // input and output arg
182                             pw_tokenizer_Token token,
183                             pw_tokenizer_ArgTypes types,
184                             ...);
185 
186 void _pw_tokenizer_ToCallback(void (*callback)(const uint8_t* encoded_message,
187                                                size_t size_bytes),
188                               pw_tokenizer_Token token,
189                               pw_tokenizer_ArgTypes types,
190                               ...);
191 
192 // This empty function allows the compiler to check the format string.
193 static inline void pw_tokenizer_CheckFormatString(const char* format, ...)
194     PW_PRINTF_FORMAT(1, 2);
195 
pw_tokenizer_CheckFormatString(const char * format,...)196 static inline void pw_tokenizer_CheckFormatString(const char* format, ...) {
197   (void)format;
198 }
199 
200 PW_EXTERN_C_END
201 
202 // These macros implement string tokenization. They should not be used directly;
203 // use one of the PW_TOKENIZE_* macros above instead.
204 
205 // This macro takes a printf-style format string and corresponding arguments. It
206 // checks that the arguments are correct, stores the format string in a special
207 // section, and calculates the string's token at compile time. This
208 // clang-format off
209 #define PW_TOKENIZE_FORMAT_STRING(domain, mask, format, ...)                  \
210   if (0) { /* Do not execute to prevent double evaluation of the arguments. */ \
211     pw_tokenizer_CheckFormatString(format PW_COMMA_ARGS(__VA_ARGS__));         \
212   }                                                                            \
213                                                                                \
214   /* Check that the macro is invoked with a supported number of arguments. */  \
215   static_assert(                                                               \
216       PW_FUNCTION_ARG_COUNT(__VA_ARGS__) <= PW_TOKENIZER_MAX_SUPPORTED_ARGS,   \
217       "Tokenized strings cannot have more than "                               \
218       PW_STRINGIFY(PW_TOKENIZER_MAX_SUPPORTED_ARGS) " arguments; "             \
219       PW_STRINGIFY(PW_FUNCTION_ARG_COUNT(__VA_ARGS__))                         \
220       " arguments were used for " #format " (" #__VA_ARGS__ ")");              \
221                                                                                \
222   /* Tokenize the string to a pw_tokenizer_Token at compile time. */           \
223   static _PW_TOKENIZER_CONST pw_tokenizer_Token _pw_tokenizer_token =          \
224       _PW_TOKENIZER_MASK_TOKEN(mask, format);                                  \
225                                                                                \
226   _PW_TOKENIZER_RECORD_ORIGINAL_STRING(_pw_tokenizer_token, domain, format)
227 
228 // clang-format on
229 
230 // Creates unique names to use for tokenized string entries and linker sections.
231 #define _PW_TOKENIZER_UNIQUE(prefix) PW_CONCAT(prefix, __LINE__, _, __COUNTER__)
232 
233 #ifdef __cplusplus
234 
235 #define _PW_TOKENIZER_CONST constexpr
236 
237 #define _PW_TOKENIZER_RECORD_ORIGINAL_STRING(token, domain, string)            \
238   alignas(1) static constexpr ::pw::tokenizer::internal::Entry<sizeof(domain), \
239                                                                sizeof(string)> \
240       _PW_TOKENIZER_SECTION _PW_TOKENIZER_UNIQUE(                              \
241           _pw_tokenizer_string_entry_) {                                       \
242     token, domain, string                                                      \
243   }
244 
245 namespace pw {
246 namespace tokenizer {
247 
248 using Token = ::pw_tokenizer_Token;
249 
250 }  // namespace tokenizer
251 }  // namespace pw
252 
253 #else
254 
255 #define _PW_TOKENIZER_CONST const
256 
257 #define _PW_TOKENIZER_RECORD_ORIGINAL_STRING(token, domain, string) \
258   _Alignas(1) static const _PW_TOKENIZER_STRING_ENTRY(token, domain, string)
259 
260 #endif  // __cplusplus
261 
262 // _PW_TOKENIZER_SECTION places the tokenized strings in a special .pw_tokenizer
263 // linker section. Host-side decoding tools read the strings and tokens from
264 // this section to build a database of tokenized strings.
265 //
266 // This section should be declared as type INFO so that it is excluded from the
267 // final binary. To declare the section, as well as the .pw_tokenizer.info
268 // metadata section, add the following to the linker script's SECTIONS command:
269 //
270 //   .pw_tokenizer.info 0x0 (INFO) :
271 //   {
272 //     KEEP(*(.pw_tokenizer.info))
273 //   }
274 //
275 //   .pw_tokenizer.entries 0x0 (INFO) :
276 //   {
277 //     KEEP(*(.pw_tokenizer.entries.*))
278 //   }
279 //
280 // A linker script snippet that provides these sections is provided in the file
281 // pw_tokenizer_linker_sections.ld. This file may be directly included into
282 // existing linker scripts.
283 //
284 // The tokenized string sections can also be managed without linker script
285 // modifications, though this is not recommended. The section can be extracted
286 // and removed from the ELF with objcopy:
287 //
288 //   objcopy --only-section .pw_tokenizer.* <ORIGINAL_ELF> <OUTPUT_ELF>
289 //   objcopy --remove-section .pw_tokenizer.* <ORIGINAL_ELF>
290 //
291 // OUTPUT_ELF will be an ELF with only the tokenized strings, and the original
292 // ELF file will have the sections removed.
293 //
294 // Without the above linker script modifications, the section garbage collection
295 // option (--gc-sections) removes the tokenized string sections. To avoid
296 // editing the target linker script, a separate metadata ELF can be linked
297 // without --gc-sections to preserve the tokenized data.
298 //
299 // pw_tokenizer is intended for use with ELF files only. Mach-O files (macOS
300 // executables) do not support section names longer than 16 characters, so a
301 // short, dummy section name is used on macOS.
302 #ifdef __APPLE__
303 #define _PW_TOKENIZER_SECTION \
304   PW_KEEP_IN_SECTION(PW_STRINGIFY(_PW_TOKENIZER_UNIQUE(.pw.)))
305 #else
306 #define _PW_TOKENIZER_SECTION \
307   PW_KEEP_IN_SECTION(PW_STRINGIFY(_PW_TOKENIZER_UNIQUE(.pw_tokenizer.entries.)))
308 #endif  // __APPLE__
309