1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_PLATFORM_PLATFORM_STRINGS_H_
17 #define TENSORFLOW_CORE_PLATFORM_PLATFORM_STRINGS_H_
18 
19 // This header defines the macro TF_PLATFORM_STRINGS() which should be used
20 // once in each dynamically loadable TensorFlow module.  It embeds static
21 // strings into the compilation unit that allow TensorFlow to determine what
22 // compilation options were in effect when the compilation unit was built.  All
23 // compilation units within the same dynamically loadable library should be
24 // built with the same options (or at least, the strings should be embedded in
25 // the compilation unit built with the most restrictive options).
26 
27 // The platform strings embedded into a binary may be retrieved with the
28 // GetPlatformStrings function.
29 
30 // Rationale:
31 // We wish to load only those libraries that this CPU can execute.  For
32 // example, we should not load a library compiled with avx256 instructions on a
33 // CPU that cannot execute them.
34 //
35 // One might think that one could dlopen() the library, and call a routine that
36 // would return which cpu type it was compiled for.  Alas, this does not work,
37 // because at dlopen() time, a library containing C++ will execute constructors
38 // of class variables with static storage class.  Even code that looks
39 // innocuous may use optional platform-specific instructions.  For example,
40 // the fastest way to zero a region of memory might use optional instructions.
41 //
42 // One might think one could run a tool such as "objdump" to read flags from
43 // the libraries' headers, or perhaps disassemble each library to look for
44 // particular instructions.  Unfortunately, the desired flags are not present
45 // in the headers, and disassembly can be prohibitively slow ("objdump -d" is
46 // very slow, for example).  Moreover, a tool to examine the library may not
47 // be present on the system unless the user has installed special packages (for
48 // example, on Windows).
49 //
50 // Instead, we adopt a crude but straightforward solution:  We require
51 // developers to use the macro TF_PLATFORM_STRINGS() in their library, to
52 // embed the compilation options as constant strings.  The compiler's
53 // predefined macros pick which strings are included.  We then search for the
54 // strings in the files, and then dlopen() only those libraries that have or
55 // lack strings as needed.
56 //
57 // We adopt the approach of placing in the binary a fairly raw copy of the
58 // predefined macros, rather than trying to interpret them in complex ways at
59 // compile time.  This allows the loading binary to alter its interpretation of
60 // the strings without library developers having to recompile.
61 
62 #include <stdio.h>
63 
64 #include <string>
65 #include <vector>
66 
67 // Aside from the header guard, the internal macros defined here have the form:
68 //   TF_PLAT_STR_*
69 
70 // If a macro is removed from the list of tested macros, the major version in
71 // the following version number should be incremented, and the minor version
72 // set to zero.  Otherwise, if a macro is added to the list of tested macros,
73 // the minor number should be incremented.
74 #define TF_PLAT_STR_VERSION_ "1.0"
75 
76 // Prefix of each option string indicator in the binary.
77 // After the prefix, such strings have the form:
78 //    [A-Za-z_0-9]=<value>
79 // followed by a terminating nul.  To simplify searching, this prefix is all
80 // ASCII, starts with a nul, and contains no character twice.
81 #define TF_PLAT_STR_MAGIC_PREFIX_ "\0S\\s\":^p*L}"
82 
83 // A helper macro for TF_PLAT_STR_AS_STR_().
84 #define TF_PLAT_STR_STR_1_(x) #x
85 
86 // Yield a constant string corresponding to x, after macro expansion.
87 #define TF_PLAT_STR_AS_STR_(x) TF_PLAT_STR_STR_1_(x)
88 
89 // An empty definition to make lists more uniform.
90 #define TF_PLAT_STR_TERMINATOR_
91 
92 // TF_PLAT_STR_(x) introduces a constant string indicating whether a
93 // particular compilation option has been turned on.
94 //
95 // In gcc and clang, we might imagine using something like
96 // #define TF_PLAT_STR_(x) \
97 //     (sizeof (#x) != sizeof (TF_PLAT_STR_AS_STR_ (x))? \
98 //      TF_PLAT_STR_MAGIC_PREFIX_ #x "=" TF_PLAT_STR_AS_STR_ (x) : \
99 //      TF_PLAT_STR_MAGIC_PREFIX_ #x "=0"),
100 // but some compilers (notably MSVC) place both "foo" and "bar" in the binary
101 // when presented with
102 //    (true?  "foo" : "bar")
103 // so we must use #if to select the strings we need, which is rather verbose.
104 #define TF_PLAT_STR_(x) TF_PLAT_STR_MAGIC_PREFIX_ #x "=" TF_PLAT_STR_AS_STR_(x)
105 
106 // Include the #if machinery that sets the macros used below.
107 // platform_strings_computed.h can be generated by filtering this header file
108 // through:
109 // awk '
110 // header == "" { print; }
111 // /\*\// && header == "" {
112 //     print "// Generated from platform_strings.h.";
113 //     print "";
114 //     print "#ifndef TENSORFLOW_CORE_PLATFORM_PLATFORM_STRINGS_COMPUTED_H_";
115 //     print "#define TENSORFLOW_CORE_PLATFORM_PLATFORM_STRINGS_COMPUTED_H_";
116 //     print "";
117 //     header = 1;
118 // }
119 // /^#define TF_PLAT_STR_LIST_[a-zA-Z0-9_]*\(\) *\\$/ { active = 1; }
120 // /TF_PLAT_STR_TERMINATOR_/ { active = 0; }
121 // /^ *TF_PLAT_STR_[A-Za-z0-9_]* *\\$/ && active {
122 //     x = $0;
123 //     sub(/^ *TF_PLAT_STR_/, "", x);
124 //     sub(/ *\\$/, "", x);
125 //     printf ("#if defined(%s)\n", x);
126 //     printf ("#define TF_PLAT_STR_%s TF_PLAT_STR_(%s)\n", x, x);
127 //     printf ("#else\n");
128 //     printf ("#define TF_PLAT_STR_%s\n", x);
129 //     printf ("#endif\n");
130 // }
131 // END {
132 //     print "";
133 //     print "#endif  // TENSORFLOW_CORE_PLATFORM_PLATFORM_STRINGS_COMPUTED_H_";
134 // }'
135 #include "tensorflow/core/platform/platform_strings_computed.h"
136 
137 // clang-format butchers the following lines.
138 // clang-format off
139 
140 // x86_64 and x86_32 optional features.
141 #define TF_PLAT_STR_LIST___x86_64__()                                      \
142         TF_PLAT_STR__M_IX86_FP                                             \
143         TF_PLAT_STR__NO_PREFETCHW                                          \
144         TF_PLAT_STR___3dNOW_A__                                            \
145         TF_PLAT_STR___3dNOW__                                              \
146         TF_PLAT_STR___ABM__                                                \
147         TF_PLAT_STR___ADX__                                                \
148         TF_PLAT_STR___AES__                                                \
149         TF_PLAT_STR___AVX2__                                               \
150         TF_PLAT_STR___AVX512BW__                                           \
151         TF_PLAT_STR___AVX512CD__                                           \
152         TF_PLAT_STR___AVX512DQ__                                           \
153         TF_PLAT_STR___AVX512ER__                                           \
154         TF_PLAT_STR___AVX512F__                                            \
155         TF_PLAT_STR___AVX512IFMA__                                         \
156         TF_PLAT_STR___AVX512PF__                                           \
157         TF_PLAT_STR___AVX512VBMI__                                         \
158         TF_PLAT_STR___AVX512VL__                                           \
159         TF_PLAT_STR___AVX__                                                \
160         TF_PLAT_STR___BMI2__                                               \
161         TF_PLAT_STR___BMI__                                                \
162         TF_PLAT_STR___CLFLUSHOPT__                                         \
163         TF_PLAT_STR___CLZERO__                                             \
164         TF_PLAT_STR___F16C__                                               \
165         TF_PLAT_STR___FMA4__                                               \
166         TF_PLAT_STR___FMA__                                                \
167         TF_PLAT_STR___FP_FAST_FMA                                          \
168         TF_PLAT_STR___FP_FAST_FMAF                                         \
169         TF_PLAT_STR___FSGSBASE__                                           \
170         TF_PLAT_STR___FXSR__                                               \
171         TF_PLAT_STR___LWP__                                                \
172         TF_PLAT_STR___LZCNT__                                              \
173         TF_PLAT_STR___MMX__                                                \
174         TF_PLAT_STR___MWAITX__                                             \
175         TF_PLAT_STR___PCLMUL__                                             \
176         TF_PLAT_STR___PKU__                                                \
177         TF_PLAT_STR___POPCNT__                                             \
178         TF_PLAT_STR___PRFCHW__                                             \
179         TF_PLAT_STR___RDRND__                                              \
180         TF_PLAT_STR___RDSEED__                                             \
181         TF_PLAT_STR___RTM__                                                \
182         TF_PLAT_STR___SHA__                                                \
183         TF_PLAT_STR___SSE2_MATH__                                          \
184         TF_PLAT_STR___SSE2__                                               \
185         TF_PLAT_STR___SSE_MATH__                                           \
186         TF_PLAT_STR___SSE__                                                \
187         TF_PLAT_STR___SSE3__                                               \
188         TF_PLAT_STR___SSE4A__                                              \
189         TF_PLAT_STR___SSE4_1__                                             \
190         TF_PLAT_STR___SSE4_2__                                             \
191         TF_PLAT_STR___SSSE3__                                              \
192         TF_PLAT_STR___TBM__                                                \
193         TF_PLAT_STR___XOP__                                                \
194         TF_PLAT_STR___XSAVEC__                                             \
195         TF_PLAT_STR___XSAVEOPT__                                           \
196         TF_PLAT_STR___XSAVES__                                             \
197         TF_PLAT_STR___XSAVE__                                              \
198         TF_PLAT_STR_TERMINATOR_
199 
200 // PowerPC (64- and 32-bit) optional features.
201 #define TF_PLAT_STR_LIST___powerpc64__()                                   \
202         TF_PLAT_STR__SOFT_DOUBLE                                           \
203         TF_PLAT_STR__SOFT_FLOAT                                            \
204         TF_PLAT_STR___ALTIVEC__                                            \
205         TF_PLAT_STR___APPLE_ALTIVEC__                                      \
206         TF_PLAT_STR___CRYPTO__                                             \
207         TF_PLAT_STR___FLOAT128_HARDWARE__                                  \
208         TF_PLAT_STR___FLOAT128_TYPE__                                      \
209         TF_PLAT_STR___FP_FAST_FMA                                          \
210         TF_PLAT_STR___FP_FAST_FMAF                                         \
211         TF_PLAT_STR___HTM__                                                \
212         TF_PLAT_STR___NO_FPRS__                                            \
213         TF_PLAT_STR___NO_LWSYNC__                                          \
214         TF_PLAT_STR___POWER8_VECTOR__                                      \
215         TF_PLAT_STR___POWER9_VECTOR__                                      \
216         TF_PLAT_STR___PPC405__                                             \
217         TF_PLAT_STR___QUAD_MEMORY_ATOMIC__                                 \
218         TF_PLAT_STR___RECIPF__                                             \
219         TF_PLAT_STR___RECIP_PRECISION__                                    \
220         TF_PLAT_STR___RECIP__                                              \
221         TF_PLAT_STR___RSQRTEF__                                            \
222         TF_PLAT_STR___RSQRTE__                                             \
223         TF_PLAT_STR___TM_FENCE__                                           \
224         TF_PLAT_STR___UPPER_REGS_DF__                                      \
225         TF_PLAT_STR___UPPER_REGS_SF__                                      \
226         TF_PLAT_STR___VEC__                                                \
227         TF_PLAT_STR___VSX__                                                \
228         TF_PLAT_STR_TERMINATOR_
229 
230 // aarch64 and 32-bit arm optional features
231 #define TF_PLAT_STR_LIST___aarch64__()                                     \
232         TF_PLAT_STR___ARM_ARCH                                             \
233         TF_PLAT_STR___ARM_FEATURE_CLZ                                      \
234         TF_PLAT_STR___ARM_FEATURE_CRC32                                    \
235         TF_PLAT_STR___ARM_FEATURE_CRC32                                    \
236         TF_PLAT_STR___ARM_FEATURE_CRYPTO                                   \
237         TF_PLAT_STR___ARM_FEATURE_DIRECTED_ROUNDING                        \
238         TF_PLAT_STR___ARM_FEATURE_DSP                                      \
239         TF_PLAT_STR___ARM_FEATURE_FMA                                      \
240         TF_PLAT_STR___ARM_FEATURE_IDIV                                     \
241         TF_PLAT_STR___ARM_FEATURE_LDREX                                    \
242         TF_PLAT_STR___ARM_FEATURE_NUMERIC_MAXMIN                           \
243         TF_PLAT_STR___ARM_FEATURE_QBIT                                     \
244         TF_PLAT_STR___ARM_FEATURE_QRDMX                                    \
245         TF_PLAT_STR___ARM_FEATURE_SAT                                      \
246         TF_PLAT_STR___ARM_FEATURE_SIMD32                                   \
247         TF_PLAT_STR___ARM_FEATURE_UNALIGNED                                \
248         TF_PLAT_STR___ARM_FP                                               \
249         TF_PLAT_STR___ARM_NEON_FP                                          \
250         TF_PLAT_STR___ARM_NEON__                                           \
251         TF_PLAT_STR___ARM_WMMX                                             \
252         TF_PLAT_STR___IWMMXT2__                                            \
253         TF_PLAT_STR___IWMMXT__                                             \
254         TF_PLAT_STR___VFP_FP__                                             \
255         TF_PLAT_STR_TERMINATOR_
256 
257 // Generic features, including indication of architecture and OS.
258 // The _M_* macros are defined by Visual Studio.
259 // It doesn't define __LITTLE_ENDIAN__ or __BYTE_ORDER__;
260 // Windows is assumed to be little endian.
261 #define TF_PLAT_STR_LIST___generic__()                                     \
262         TF_PLAT_STR_TARGET_IPHONE_SIMULATOR                                \
263         TF_PLAT_STR_TARGET_OS_IOS                                          \
264         TF_PLAT_STR_TARGET_OS_IPHONE                                       \
265         TF_PLAT_STR__MSC_VER                                               \
266         TF_PLAT_STR__M_ARM                                                 \
267         TF_PLAT_STR__M_ARM64                                               \
268         TF_PLAT_STR__M_ARM_ARMV7VE                                         \
269         TF_PLAT_STR__M_ARM_FP                                              \
270         TF_PLAT_STR__M_IX86                                                \
271         TF_PLAT_STR__M_X64                                                 \
272         TF_PLAT_STR__WIN32                                                 \
273         TF_PLAT_STR__WIN64                                                 \
274         TF_PLAT_STR___ANDROID__                                            \
275         TF_PLAT_STR___APPLE__                                              \
276         TF_PLAT_STR___BYTE_ORDER__                                         \
277         TF_PLAT_STR___CYGWIN__                                             \
278         TF_PLAT_STR___FreeBSD__                                            \
279         TF_PLAT_STR___LITTLE_ENDIAN__                                      \
280         TF_PLAT_STR___NetBSD__                                             \
281         TF_PLAT_STR___OpenBSD__                                            \
282         TF_PLAT_STR_____MSYS__                                             \
283         TF_PLAT_STR___aarch64__                                            \
284         TF_PLAT_STR___alpha__                                              \
285         TF_PLAT_STR___arm__                                                \
286         TF_PLAT_STR___i386__                                               \
287         TF_PLAT_STR___i686__                                               \
288         TF_PLAT_STR___ia64__                                               \
289         TF_PLAT_STR___linux__                                              \
290         TF_PLAT_STR___mips32__                                             \
291         TF_PLAT_STR___mips64__                                             \
292         TF_PLAT_STR___powerpc64__                                          \
293         TF_PLAT_STR___powerpc__                                            \
294         TF_PLAT_STR___riscv___                                             \
295         TF_PLAT_STR___s390x__                                              \
296         TF_PLAT_STR___sparc64__                                            \
297         TF_PLAT_STR___sparc__                                              \
298         TF_PLAT_STR___x86_64__                                             \
299         TF_PLAT_STR_TERMINATOR_
300 
301 #if !defined(__x86_64__) && !defined(_M_X64) && \
302     !defined(__i386__) && !defined(_M_IX86)
303 #undef TF_PLAT_STR_LIST___x86_64__
304 #define TF_PLAT_STR_LIST___x86_64__()
305 #endif
306 #if !defined(__powerpc64__) && !defined(__powerpc__)
307 #undef TF_PLAT_STR_LIST___powerpc64__
308 #define TF_PLAT_STR_LIST___powerpc64__()
309 #endif
310 #if !defined(__aarch64__) && !defined(_M_ARM64) && \
311     !defined(__arm__) && !defined(_M_ARM)
312 #undef TF_PLAT_STR_LIST___aarch64__
313 #define TF_PLAT_STR_LIST___aarch64__()
314 #endif
315 
316 // Macro to be used in each dynamically loadable library.
317 //
318 // The BSS global variable tf_cpu_option_global and the class
319 // instance tf_cpu_option_avoid_omit_class are needed to prevent
320 // compilers/linkers such as clang from omitting the static variable
321 // tf_cpu_option[], which would otherwise appear to be unused.  We cannot make
322 // tf_cpu_option[] global, because we then might get multiply-defined symbols
323 // if TF_PLAT_STR() is used twice in the same library.
324 // (tf_cpu_option_global doesn't see such errors because it is
325 // defined in BSS, so multiple definitions are combined by the linker.)  gcc's
326 // __attribute__((used)) is insufficient because it seems to be ignored by
327 // linkers.
328 #define TF_PLATFORM_STRINGS()                                                  \
329     static const char tf_cpu_option[] =                                        \
330         TF_PLAT_STR_MAGIC_PREFIX_ "TF_PLAT_STR_VERSION=" TF_PLAT_STR_VERSION_  \
331         TF_PLAT_STR_LIST___x86_64__()                                          \
332         TF_PLAT_STR_LIST___powerpc64__()                                       \
333         TF_PLAT_STR_LIST___aarch64__()                                         \
334         TF_PLAT_STR_LIST___generic__()                                         \
335     ;                                                                          \
336     const char *tf_cpu_option_global;                                          \
337     namespace {                                                                \
338     class TFCPUOptionHelper {                                                  \
339      public:                                                                   \
340       TFCPUOptionHelper() {                                                    \
341         /* Compilers/linkers remove unused variables aggressively.  The */     \
342         /* following gyrations subvert most such optimizations. */             \
343         tf_cpu_option_global = tf_cpu_option;                                  \
344         /* Nothing is printed because the string starts with a nul. */         \
345         printf("%s", tf_cpu_option);                                           \
346       }                                                                        \
347     } tf_cpu_option_avoid_omit_class;                                          \
348     }  /* anonymous namespace */
349 // clang-format on
350 
351 namespace tensorflow {
352 
353 class Status;
354 
355 // Retrieves the platform strings from the file at the given path and appends
356 // them to the given vector. If the returned int is non-zero, an error occurred
357 // reading the file and vector may or may not be modified. The returned error
358 // code is suitable for use with strerror().
359 int GetPlatformStrings(const std::string& path,
360                        std::vector<std::string>* found);
361 
362 }  // namespace tensorflow
363 
364 #endif  // TENSORFLOW_CORE_PLATFORM_PLATFORM_STRINGS_H_
365