1 //===-- xray_basic_logging.cpp ----------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is a part of XRay, a dynamic runtime instrumentation system.
10 //
11 // Implementation of a simple in-memory log of XRay events. This defines a
12 // logging function that's compatible with the XRay handler interface, and
13 // routines for exporting data to files.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include <errno.h>
18 #include <fcntl.h>
19 #include <pthread.h>
20 #include <sys/stat.h>
21 #if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_MAC
22 #include <sys/syscall.h>
23 #endif
24 #include <sys/types.h>
25 #include <time.h>
26 #include <unistd.h>
27 
28 #include "sanitizer_common/sanitizer_allocator_internal.h"
29 #include "sanitizer_common/sanitizer_libc.h"
30 #include "xray/xray_records.h"
31 #include "xray_recursion_guard.h"
32 #include "xray_basic_flags.h"
33 #include "xray_basic_logging.h"
34 #include "xray_defs.h"
35 #include "xray_flags.h"
36 #include "xray_interface_internal.h"
37 #include "xray_tsc.h"
38 #include "xray_utils.h"
39 
40 namespace __xray {
41 
42 static SpinMutex LogMutex;
43 
44 namespace {
45 // We use elements of this type to record the entry TSC of every function ID we
46 // see as we're tracing a particular thread's execution.
47 struct alignas(16) StackEntry {
48   int32_t FuncId;
49   uint16_t Type;
50   uint8_t CPU;
51   uint8_t Padding;
52   uint64_t TSC;
53 };
54 
55 static_assert(sizeof(StackEntry) == 16, "Wrong size for StackEntry");
56 
57 struct XRAY_TLS_ALIGNAS(64) ThreadLocalData {
58   void *InMemoryBuffer = nullptr;
59   size_t BufferSize = 0;
60   size_t BufferOffset = 0;
61   void *ShadowStack = nullptr;
62   size_t StackSize = 0;
63   size_t StackEntries = 0;
64   __xray::LogWriter *LogWriter = nullptr;
65 };
66 
67 struct BasicLoggingOptions {
68   int DurationFilterMicros = 0;
69   size_t MaxStackDepth = 0;
70   size_t ThreadBufferSize = 0;
71 };
72 } // namespace
73 
74 static pthread_key_t PThreadKey;
75 
76 static atomic_uint8_t BasicInitialized{0};
77 
78 struct BasicLoggingOptions GlobalOptions;
79 
80 thread_local atomic_uint8_t Guard{0};
81 
82 static atomic_uint8_t UseRealTSC{0};
83 static atomic_uint64_t ThresholdTicks{0};
84 static atomic_uint64_t TicksPerSec{0};
85 static atomic_uint64_t CycleFrequency{NanosecondsPerSecond};
86 
getLog()87 static LogWriter *getLog() XRAY_NEVER_INSTRUMENT {
88   LogWriter* LW = LogWriter::Open();
89   if (LW == nullptr)
90     return LW;
91 
92   static pthread_once_t DetectOnce = PTHREAD_ONCE_INIT;
93   pthread_once(&DetectOnce, +[] {
94     if (atomic_load(&UseRealTSC, memory_order_acquire))
95       atomic_store(&CycleFrequency, getTSCFrequency(), memory_order_release);
96   });
97 
98   // Since we're here, we get to write the header. We set it up so that the
99   // header will only be written once, at the start, and let the threads
100   // logging do writes which just append.
101   XRayFileHeader Header;
102   // Version 2 includes tail exit records.
103   // Version 3 includes pid inside records.
104   Header.Version = 3;
105   Header.Type = FileTypes::NAIVE_LOG;
106   Header.CycleFrequency = atomic_load(&CycleFrequency, memory_order_acquire);
107 
108   // FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc'
109   // before setting the values in the header.
110   Header.ConstantTSC = 1;
111   Header.NonstopTSC = 1;
112   LW->WriteAll(reinterpret_cast<char *>(&Header),
113                reinterpret_cast<char *>(&Header) + sizeof(Header));
114   return LW;
115 }
116 
getGlobalLog()117 static LogWriter *getGlobalLog() XRAY_NEVER_INSTRUMENT {
118   static pthread_once_t OnceInit = PTHREAD_ONCE_INIT;
119   static LogWriter *LW = nullptr;
120   pthread_once(&OnceInit, +[] { LW = getLog(); });
121   return LW;
122 }
123 
getThreadLocalData()124 static ThreadLocalData &getThreadLocalData() XRAY_NEVER_INSTRUMENT {
125   thread_local ThreadLocalData TLD;
126   thread_local bool UNUSED TOnce = [] {
127     if (GlobalOptions.ThreadBufferSize == 0) {
128       if (Verbosity())
129         Report("Not initializing TLD since ThreadBufferSize == 0.\n");
130       return false;
131     }
132     pthread_setspecific(PThreadKey, &TLD);
133     TLD.LogWriter = getGlobalLog();
134     TLD.InMemoryBuffer = reinterpret_cast<XRayRecord *>(
135         InternalAlloc(sizeof(XRayRecord) * GlobalOptions.ThreadBufferSize,
136                       nullptr, alignof(XRayRecord)));
137     TLD.BufferSize = GlobalOptions.ThreadBufferSize;
138     TLD.BufferOffset = 0;
139     if (GlobalOptions.MaxStackDepth == 0) {
140       if (Verbosity())
141         Report("Not initializing the ShadowStack since MaxStackDepth == 0.\n");
142       TLD.StackSize = 0;
143       TLD.StackEntries = 0;
144       TLD.ShadowStack = nullptr;
145       return false;
146     }
147     TLD.ShadowStack = reinterpret_cast<StackEntry *>(
148         InternalAlloc(sizeof(StackEntry) * GlobalOptions.MaxStackDepth, nullptr,
149                       alignof(StackEntry)));
150     TLD.StackSize = GlobalOptions.MaxStackDepth;
151     TLD.StackEntries = 0;
152     return false;
153   }();
154   return TLD;
155 }
156 
157 template <class RDTSC>
InMemoryRawLog(int32_t FuncId,XRayEntryType Type,RDTSC ReadTSC)158 void InMemoryRawLog(int32_t FuncId, XRayEntryType Type,
159                     RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT {
160   auto &TLD = getThreadLocalData();
161   LogWriter *LW = getGlobalLog();
162   if (LW == nullptr)
163     return;
164 
165   // Use a simple recursion guard, to handle cases where we're already logging
166   // and for one reason or another, this function gets called again in the same
167   // thread.
168   RecursionGuard G(Guard);
169   if (!G)
170     return;
171 
172   uint8_t CPU = 0;
173   uint64_t TSC = ReadTSC(CPU);
174 
175   switch (Type) {
176   case XRayEntryType::ENTRY:
177   case XRayEntryType::LOG_ARGS_ENTRY: {
178     // Short circuit if we've reached the maximum depth of the stack.
179     if (TLD.StackEntries++ >= TLD.StackSize)
180       return;
181 
182     // When we encounter an entry event, we keep track of the TSC and the CPU,
183     // and put it in the stack.
184     StackEntry E;
185     E.FuncId = FuncId;
186     E.CPU = CPU;
187     E.Type = Type;
188     E.TSC = TSC;
189     auto StackEntryPtr = static_cast<char *>(TLD.ShadowStack) +
190                          (sizeof(StackEntry) * (TLD.StackEntries - 1));
191     internal_memcpy(StackEntryPtr, &E, sizeof(StackEntry));
192     break;
193   }
194   case XRayEntryType::EXIT:
195   case XRayEntryType::TAIL: {
196     if (TLD.StackEntries == 0)
197       break;
198 
199     if (--TLD.StackEntries >= TLD.StackSize)
200       return;
201 
202     // When we encounter an exit event, we check whether all the following are
203     // true:
204     //
205     // - The Function ID is the same as the most recent entry in the stack.
206     // - The CPU is the same as the most recent entry in the stack.
207     // - The Delta of the TSCs is less than the threshold amount of time we're
208     //   looking to record.
209     //
210     // If all of these conditions are true, we pop the stack and don't write a
211     // record and move the record offset back.
212     StackEntry StackTop;
213     auto StackEntryPtr = static_cast<char *>(TLD.ShadowStack) +
214                          (sizeof(StackEntry) * TLD.StackEntries);
215     internal_memcpy(&StackTop, StackEntryPtr, sizeof(StackEntry));
216     if (StackTop.FuncId == FuncId && StackTop.CPU == CPU &&
217         StackTop.TSC < TSC) {
218       auto Delta = TSC - StackTop.TSC;
219       if (Delta < atomic_load(&ThresholdTicks, memory_order_relaxed)) {
220         DCHECK(TLD.BufferOffset > 0);
221         TLD.BufferOffset -= StackTop.Type == XRayEntryType::ENTRY ? 1 : 2;
222         return;
223       }
224     }
225     break;
226   }
227   default:
228     // Should be unreachable.
229     DCHECK(false && "Unsupported XRayEntryType encountered.");
230     break;
231   }
232 
233   // First determine whether the delta between the function's enter record and
234   // the exit record is higher than the threshold.
235   XRayRecord R;
236   R.RecordType = RecordTypes::NORMAL;
237   R.CPU = CPU;
238   R.TSC = TSC;
239   R.TId = GetTid();
240   R.PId = internal_getpid();
241   R.Type = Type;
242   R.FuncId = FuncId;
243   auto FirstEntry = reinterpret_cast<XRayRecord *>(TLD.InMemoryBuffer);
244   internal_memcpy(FirstEntry + TLD.BufferOffset, &R, sizeof(R));
245   if (++TLD.BufferOffset == TLD.BufferSize) {
246     SpinMutexLock Lock(&LogMutex);
247     LW->WriteAll(reinterpret_cast<char *>(FirstEntry),
248                  reinterpret_cast<char *>(FirstEntry + TLD.BufferOffset));
249     TLD.BufferOffset = 0;
250     TLD.StackEntries = 0;
251   }
252 }
253 
254 template <class RDTSC>
InMemoryRawLogWithArg(int32_t FuncId,XRayEntryType Type,uint64_t Arg1,RDTSC ReadTSC)255 void InMemoryRawLogWithArg(int32_t FuncId, XRayEntryType Type, uint64_t Arg1,
256                            RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT {
257   auto &TLD = getThreadLocalData();
258   auto FirstEntry =
259       reinterpret_cast<XRayArgPayload *>(TLD.InMemoryBuffer);
260   const auto &BuffLen = TLD.BufferSize;
261   LogWriter *LW = getGlobalLog();
262   if (LW == nullptr)
263     return;
264 
265   // First we check whether there's enough space to write the data consecutively
266   // in the thread-local buffer. If not, we first flush the buffer before
267   // attempting to write the two records that must be consecutive.
268   if (TLD.BufferOffset + 2 > BuffLen) {
269     SpinMutexLock Lock(&LogMutex);
270     LW->WriteAll(reinterpret_cast<char *>(FirstEntry),
271                  reinterpret_cast<char *>(FirstEntry + TLD.BufferOffset));
272     TLD.BufferOffset = 0;
273     TLD.StackEntries = 0;
274   }
275 
276   // Then we write the "we have an argument" record.
277   InMemoryRawLog(FuncId, Type, ReadTSC);
278 
279   RecursionGuard G(Guard);
280   if (!G)
281     return;
282 
283   // And, from here on write the arg payload.
284   XRayArgPayload R;
285   R.RecordType = RecordTypes::ARG_PAYLOAD;
286   R.FuncId = FuncId;
287   R.TId = GetTid();
288   R.PId = internal_getpid();
289   R.Arg = Arg1;
290   internal_memcpy(FirstEntry + TLD.BufferOffset, &R, sizeof(R));
291   if (++TLD.BufferOffset == BuffLen) {
292     SpinMutexLock Lock(&LogMutex);
293     LW->WriteAll(reinterpret_cast<char *>(FirstEntry),
294                  reinterpret_cast<char *>(FirstEntry + TLD.BufferOffset));
295     TLD.BufferOffset = 0;
296     TLD.StackEntries = 0;
297   }
298 }
299 
basicLoggingHandleArg0RealTSC(int32_t FuncId,XRayEntryType Type)300 void basicLoggingHandleArg0RealTSC(int32_t FuncId,
301                                    XRayEntryType Type) XRAY_NEVER_INSTRUMENT {
302   InMemoryRawLog(FuncId, Type, readTSC);
303 }
304 
basicLoggingHandleArg0EmulateTSC(int32_t FuncId,XRayEntryType Type)305 void basicLoggingHandleArg0EmulateTSC(int32_t FuncId, XRayEntryType Type)
306     XRAY_NEVER_INSTRUMENT {
307   InMemoryRawLog(FuncId, Type, [](uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
308     timespec TS;
309     int result = clock_gettime(CLOCK_REALTIME, &TS);
310     if (result != 0) {
311       Report("clock_gettimg(2) return %d, errno=%d.", result, int(errno));
312       TS = {0, 0};
313     }
314     CPU = 0;
315     return TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec;
316   });
317 }
318 
basicLoggingHandleArg1RealTSC(int32_t FuncId,XRayEntryType Type,uint64_t Arg1)319 void basicLoggingHandleArg1RealTSC(int32_t FuncId, XRayEntryType Type,
320                                    uint64_t Arg1) XRAY_NEVER_INSTRUMENT {
321   InMemoryRawLogWithArg(FuncId, Type, Arg1, readTSC);
322 }
323 
basicLoggingHandleArg1EmulateTSC(int32_t FuncId,XRayEntryType Type,uint64_t Arg1)324 void basicLoggingHandleArg1EmulateTSC(int32_t FuncId, XRayEntryType Type,
325                                       uint64_t Arg1) XRAY_NEVER_INSTRUMENT {
326   InMemoryRawLogWithArg(
327       FuncId, Type, Arg1, [](uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
328         timespec TS;
329         int result = clock_gettime(CLOCK_REALTIME, &TS);
330         if (result != 0) {
331           Report("clock_gettimg(2) return %d, errno=%d.", result, int(errno));
332           TS = {0, 0};
333         }
334         CPU = 0;
335         return TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec;
336       });
337 }
338 
TLDDestructor(void * P)339 static void TLDDestructor(void *P) XRAY_NEVER_INSTRUMENT {
340   ThreadLocalData &TLD = *reinterpret_cast<ThreadLocalData *>(P);
341   auto ExitGuard = at_scope_exit([&TLD] {
342     // Clean up dynamic resources.
343     if (TLD.InMemoryBuffer)
344       InternalFree(TLD.InMemoryBuffer);
345     if (TLD.ShadowStack)
346       InternalFree(TLD.ShadowStack);
347     if (Verbosity())
348       Report("Cleaned up log for TID: %d\n", GetTid());
349   });
350 
351   if (TLD.LogWriter == nullptr || TLD.BufferOffset == 0) {
352     if (Verbosity())
353       Report("Skipping buffer for TID: %d; Offset = %llu\n", GetTid(),
354              TLD.BufferOffset);
355     return;
356   }
357 
358   {
359     SpinMutexLock L(&LogMutex);
360     TLD.LogWriter->WriteAll(reinterpret_cast<char *>(TLD.InMemoryBuffer),
361                             reinterpret_cast<char *>(TLD.InMemoryBuffer) +
362                             (sizeof(XRayRecord) * TLD.BufferOffset));
363   }
364 
365   // Because this thread's exit could be the last one trying to write to
366   // the file and that we're not able to close out the file properly, we
367   // sync instead and hope that the pending writes are flushed as the
368   // thread exits.
369   TLD.LogWriter->Flush();
370 }
371 
basicLoggingInit(UNUSED size_t BufferSize,UNUSED size_t BufferMax,void * Options,size_t OptionsSize)372 XRayLogInitStatus basicLoggingInit(UNUSED size_t BufferSize,
373                                    UNUSED size_t BufferMax, void *Options,
374                                    size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
375   uint8_t Expected = 0;
376   if (!atomic_compare_exchange_strong(&BasicInitialized, &Expected, 1,
377                                       memory_order_acq_rel)) {
378     if (Verbosity())
379       Report("Basic logging already initialized.\n");
380     return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
381   }
382 
383   static pthread_once_t OnceInit = PTHREAD_ONCE_INIT;
384   pthread_once(&OnceInit, +[] {
385     pthread_key_create(&PThreadKey, TLDDestructor);
386     atomic_store(&UseRealTSC, probeRequiredCPUFeatures(), memory_order_release);
387     // Initialize the global TicksPerSec value.
388     atomic_store(&TicksPerSec,
389                  probeRequiredCPUFeatures() ? getTSCFrequency()
390                                             : NanosecondsPerSecond,
391                  memory_order_release);
392     if (!atomic_load(&UseRealTSC, memory_order_relaxed) && Verbosity())
393       Report("WARNING: Required CPU features missing for XRay instrumentation, "
394              "using emulation instead.\n");
395   });
396 
397   FlagParser P;
398   BasicFlags F;
399   F.setDefaults();
400   registerXRayBasicFlags(&P, &F);
401   P.ParseString(useCompilerDefinedBasicFlags());
402   auto *EnvOpts = GetEnv("XRAY_BASIC_OPTIONS");
403   if (EnvOpts == nullptr)
404     EnvOpts = "";
405 
406   P.ParseString(EnvOpts);
407 
408   // If XRAY_BASIC_OPTIONS was not defined, then we use the deprecated options
409   // set through XRAY_OPTIONS instead.
410   if (internal_strlen(EnvOpts) == 0) {
411     F.func_duration_threshold_us =
412         flags()->xray_naive_log_func_duration_threshold_us;
413     F.max_stack_depth = flags()->xray_naive_log_max_stack_depth;
414     F.thread_buffer_size = flags()->xray_naive_log_thread_buffer_size;
415   }
416 
417   P.ParseString(static_cast<const char *>(Options));
418   GlobalOptions.ThreadBufferSize = F.thread_buffer_size;
419   GlobalOptions.DurationFilterMicros = F.func_duration_threshold_us;
420   GlobalOptions.MaxStackDepth = F.max_stack_depth;
421   *basicFlags() = F;
422 
423   atomic_store(&ThresholdTicks,
424                atomic_load(&TicksPerSec, memory_order_acquire) *
425                    GlobalOptions.DurationFilterMicros / 1000000,
426                memory_order_release);
427   __xray_set_handler_arg1(atomic_load(&UseRealTSC, memory_order_acquire)
428                               ? basicLoggingHandleArg1RealTSC
429                               : basicLoggingHandleArg1EmulateTSC);
430   __xray_set_handler(atomic_load(&UseRealTSC, memory_order_acquire)
431                          ? basicLoggingHandleArg0RealTSC
432                          : basicLoggingHandleArg0EmulateTSC);
433 
434   // TODO: Implement custom event and typed event handling support in Basic
435   // Mode.
436   __xray_remove_customevent_handler();
437   __xray_remove_typedevent_handler();
438 
439   return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
440 }
441 
basicLoggingFinalize()442 XRayLogInitStatus basicLoggingFinalize() XRAY_NEVER_INSTRUMENT {
443   uint8_t Expected = 0;
444   if (!atomic_compare_exchange_strong(&BasicInitialized, &Expected, 0,
445                                       memory_order_acq_rel) &&
446       Verbosity())
447     Report("Basic logging already finalized.\n");
448 
449   // Nothing really to do aside from marking state of the global to be
450   // uninitialized.
451 
452   return XRayLogInitStatus::XRAY_LOG_FINALIZED;
453 }
454 
basicLoggingFlush()455 XRayLogFlushStatus basicLoggingFlush() XRAY_NEVER_INSTRUMENT {
456   // This really does nothing, since flushing the logs happen at the end of a
457   // thread's lifetime, or when the buffers are full.
458   return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
459 }
460 
461 // This is a handler that, effectively, does nothing.
basicLoggingHandleArg0Empty(int32_t,XRayEntryType)462 void basicLoggingHandleArg0Empty(int32_t, XRayEntryType) XRAY_NEVER_INSTRUMENT {
463 }
464 
basicLogDynamicInitializer()465 bool basicLogDynamicInitializer() XRAY_NEVER_INSTRUMENT {
466   XRayLogImpl Impl{
467       basicLoggingInit,
468       basicLoggingFinalize,
469       basicLoggingHandleArg0Empty,
470       basicLoggingFlush,
471   };
472   auto RegistrationResult = __xray_log_register_mode("xray-basic", Impl);
473   if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK &&
474       Verbosity())
475     Report("Cannot register XRay Basic Mode to 'xray-basic'; error = %d\n",
476            RegistrationResult);
477   if (flags()->xray_naive_log ||
478       !internal_strcmp(flags()->xray_mode, "xray-basic")) {
479     auto SelectResult = __xray_log_select_mode("xray-basic");
480     if (SelectResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) {
481       if (Verbosity())
482         Report("Failed selecting XRay Basic Mode; error = %d\n", SelectResult);
483       return false;
484     }
485 
486     // We initialize the implementation using the data we get from the
487     // XRAY_BASIC_OPTIONS environment variable, at this point of the
488     // implementation.
489     auto *Env = GetEnv("XRAY_BASIC_OPTIONS");
490     auto InitResult =
491         __xray_log_init_mode("xray-basic", Env == nullptr ? "" : Env);
492     if (InitResult != XRayLogInitStatus::XRAY_LOG_INITIALIZED) {
493       if (Verbosity())
494         Report("Failed initializing XRay Basic Mode; error = %d\n", InitResult);
495       return false;
496     }
497 
498     // At this point we know that we've successfully initialized Basic mode
499     // tracing, and the only chance we're going to get for the current thread to
500     // clean-up may be at thread/program exit. To ensure that we're going to get
501     // the cleanup even without calling the finalization routines, we're
502     // registering a program exit function that will do the cleanup.
503     static pthread_once_t DynamicOnce = PTHREAD_ONCE_INIT;
504     pthread_once(&DynamicOnce, +[] {
505       static void *FakeTLD = nullptr;
506       FakeTLD = &getThreadLocalData();
507       Atexit(+[] { TLDDestructor(FakeTLD); });
508     });
509   }
510   return true;
511 }
512 
513 } // namespace __xray
514 
515 static auto UNUSED Unused = __xray::basicLogDynamicInitializer();
516