1 #include "cpuid.h"
2 #include "sanitizer_common/sanitizer_common.h"
3 #if !SANITIZER_FUCHSIA
4 #include "sanitizer_common/sanitizer_posix.h"
5 #endif
6 #include "xray_defs.h"
7 #include "xray_interface_internal.h"
8
9 #if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_MAC
10 #include <sys/types.h>
11 #include <sys/sysctl.h>
12 #elif SANITIZER_FUCHSIA
13 #include <zircon/syscalls.h>
14 #endif
15
16 #include <atomic>
17 #include <cstdint>
18 #include <errno.h>
19 #include <fcntl.h>
20 #include <iterator>
21 #include <limits>
22 #include <tuple>
23 #include <unistd.h>
24
25 namespace __xray {
26
27 #if SANITIZER_LINUX
28 static std::pair<ssize_t, bool>
retryingReadSome(int Fd,char * Begin,char * End)29 retryingReadSome(int Fd, char *Begin, char *End) XRAY_NEVER_INSTRUMENT {
30 auto BytesToRead = std::distance(Begin, End);
31 ssize_t BytesRead;
32 ssize_t TotalBytesRead = 0;
33 while (BytesToRead && (BytesRead = read(Fd, Begin, BytesToRead))) {
34 if (BytesRead == -1) {
35 if (errno == EINTR)
36 continue;
37 Report("Read error; errno = %d\n", errno);
38 return std::make_pair(TotalBytesRead, false);
39 }
40
41 TotalBytesRead += BytesRead;
42 BytesToRead -= BytesRead;
43 Begin += BytesRead;
44 }
45 return std::make_pair(TotalBytesRead, true);
46 }
47
readValueFromFile(const char * Filename,long long * Value)48 static bool readValueFromFile(const char *Filename,
49 long long *Value) XRAY_NEVER_INSTRUMENT {
50 int Fd = open(Filename, O_RDONLY | O_CLOEXEC);
51 if (Fd == -1)
52 return false;
53 static constexpr size_t BufSize = 256;
54 char Line[BufSize] = {};
55 ssize_t BytesRead;
56 bool Success;
57 std::tie(BytesRead, Success) = retryingReadSome(Fd, Line, Line + BufSize);
58 close(Fd);
59 if (!Success)
60 return false;
61 const char *End = nullptr;
62 long long Tmp = internal_simple_strtoll(Line, &End, 10);
63 bool Result = false;
64 if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) {
65 *Value = Tmp;
66 Result = true;
67 }
68 return Result;
69 }
70
getTSCFrequency()71 uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
72 long long TSCFrequency = -1;
73 if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz",
74 &TSCFrequency)) {
75 TSCFrequency *= 1000;
76 } else if (readValueFromFile(
77 "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
78 &TSCFrequency)) {
79 TSCFrequency *= 1000;
80 } else {
81 Report("Unable to determine CPU frequency for TSC accounting.\n");
82 }
83 return TSCFrequency == -1 ? 0 : static_cast<uint64_t>(TSCFrequency);
84 }
85 #elif SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_MAC
86 uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
87 long long TSCFrequency = -1;
88 size_t tscfreqsz = sizeof(TSCFrequency);
89 #if SANITIZER_MAC
90 if (internal_sysctlbyname("machdep.tsc.frequency", &TSCFrequency,
91 &tscfreqsz, NULL, 0) != -1) {
92
93 #else
94 if (internal_sysctlbyname("machdep.tsc_freq", &TSCFrequency, &tscfreqsz,
95 NULL, 0) != -1) {
96 #endif
97 return static_cast<uint64_t>(TSCFrequency);
98 } else {
99 Report("Unable to determine CPU frequency for TSC accounting.\n");
100 }
101
102 return 0;
103 }
104 #elif !SANITIZER_FUCHSIA
105 uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
106 /* Not supported */
107 return 0;
108 }
109 #endif
110
111 static constexpr uint8_t CallOpCode = 0xe8;
112 static constexpr uint16_t MovR10Seq = 0xba41;
113 static constexpr uint16_t Jmp9Seq = 0x09eb;
114 static constexpr uint16_t Jmp20Seq = 0x14eb;
115 static constexpr uint16_t Jmp15Seq = 0x0feb;
116 static constexpr uint8_t JmpOpCode = 0xe9;
117 static constexpr uint8_t RetOpCode = 0xc3;
118 static constexpr uint16_t NopwSeq = 0x9066;
119
120 static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()};
121 static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()};
122
patchFunctionEntry(const bool Enable,const uint32_t FuncId,const XRaySledEntry & Sled,void (* Trampoline)())123 bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
124 const XRaySledEntry &Sled,
125 void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
126 // Here we do the dance of replacing the following sled:
127 //
128 // xray_sled_n:
129 // jmp +9
130 // <9 byte nop>
131 //
132 // With the following:
133 //
134 // mov r10d, <function id>
135 // call <relative 32bit offset to entry trampoline>
136 //
137 // We need to do this in the following order:
138 //
139 // 1. Put the function id first, 2 bytes from the start of the sled (just
140 // after the 2-byte jmp instruction).
141 // 2. Put the call opcode 6 bytes from the start of the sled.
142 // 3. Put the relative offset 7 bytes from the start of the sled.
143 // 4. Do an atomic write over the jmp instruction for the "mov r10d"
144 // opcode and first operand.
145 //
146 // Prerequisite is to compute the relative offset to the trampoline's address.
147 const uint64_t Address = Sled.address();
148 int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) -
149 (static_cast<int64_t>(Address) + 11);
150 if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
151 Report("XRay Entry trampoline (%p) too far from sled (%p)\n", Trampoline,
152 reinterpret_cast<void *>(Address));
153 return false;
154 }
155 if (Enable) {
156 *reinterpret_cast<uint32_t *>(Address + 2) = FuncId;
157 *reinterpret_cast<uint8_t *>(Address + 6) = CallOpCode;
158 *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset;
159 std::atomic_store_explicit(
160 reinterpret_cast<std::atomic<uint16_t> *>(Address), MovR10Seq,
161 std::memory_order_release);
162 } else {
163 std::atomic_store_explicit(
164 reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp9Seq,
165 std::memory_order_release);
166 // FIXME: Write out the nops still?
167 }
168 return true;
169 }
170
patchFunctionExit(const bool Enable,const uint32_t FuncId,const XRaySledEntry & Sled)171 bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
172 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
173 // Here we do the dance of replacing the following sled:
174 //
175 // xray_sled_n:
176 // ret
177 // <10 byte nop>
178 //
179 // With the following:
180 //
181 // mov r10d, <function id>
182 // jmp <relative 32bit offset to exit trampoline>
183 //
184 // 1. Put the function id first, 2 bytes from the start of the sled (just
185 // after the 1-byte ret instruction).
186 // 2. Put the jmp opcode 6 bytes from the start of the sled.
187 // 3. Put the relative offset 7 bytes from the start of the sled.
188 // 4. Do an atomic write over the jmp instruction for the "mov r10d"
189 // opcode and first operand.
190 //
191 // Prerequisite is to compute the relative offset fo the
192 // __xray_FunctionExit function's address.
193 const uint64_t Address = Sled.address();
194 int64_t TrampolineOffset = reinterpret_cast<int64_t>(__xray_FunctionExit) -
195 (static_cast<int64_t>(Address) + 11);
196 if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
197 Report("XRay Exit trampoline (%p) too far from sled (%p)\n",
198 __xray_FunctionExit, reinterpret_cast<void *>(Address));
199 return false;
200 }
201 if (Enable) {
202 *reinterpret_cast<uint32_t *>(Address + 2) = FuncId;
203 *reinterpret_cast<uint8_t *>(Address + 6) = JmpOpCode;
204 *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset;
205 std::atomic_store_explicit(
206 reinterpret_cast<std::atomic<uint16_t> *>(Address), MovR10Seq,
207 std::memory_order_release);
208 } else {
209 std::atomic_store_explicit(
210 reinterpret_cast<std::atomic<uint8_t> *>(Address), RetOpCode,
211 std::memory_order_release);
212 // FIXME: Write out the nops still?
213 }
214 return true;
215 }
216
patchFunctionTailExit(const bool Enable,const uint32_t FuncId,const XRaySledEntry & Sled)217 bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
218 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
219 // Here we do the dance of replacing the tail call sled with a similar
220 // sequence as the entry sled, but calls the tail exit sled instead.
221 const uint64_t Address = Sled.address();
222 int64_t TrampolineOffset =
223 reinterpret_cast<int64_t>(__xray_FunctionTailExit) -
224 (static_cast<int64_t>(Address) + 11);
225 if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
226 Report("XRay Tail Exit trampoline (%p) too far from sled (%p)\n",
227 __xray_FunctionTailExit, reinterpret_cast<void *>(Address));
228 return false;
229 }
230 if (Enable) {
231 *reinterpret_cast<uint32_t *>(Address + 2) = FuncId;
232 *reinterpret_cast<uint8_t *>(Address + 6) = CallOpCode;
233 *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset;
234 std::atomic_store_explicit(
235 reinterpret_cast<std::atomic<uint16_t> *>(Address), MovR10Seq,
236 std::memory_order_release);
237 } else {
238 std::atomic_store_explicit(
239 reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp9Seq,
240 std::memory_order_release);
241 // FIXME: Write out the nops still?
242 }
243 return true;
244 }
245
patchCustomEvent(const bool Enable,const uint32_t FuncId,const XRaySledEntry & Sled)246 bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
247 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
248 // Here we do the dance of replacing the following sled:
249 //
250 // In Version 0:
251 //
252 // xray_sled_n:
253 // jmp +20 // 2 bytes
254 // ...
255 //
256 // With the following:
257 //
258 // nopw // 2 bytes*
259 // ...
260 //
261 //
262 // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'.
263 //
264 // ---
265 //
266 // In Version 1 or 2:
267 //
268 // The jump offset is now 15 bytes (0x0f), so when restoring the nopw back
269 // to a jmp, use 15 bytes instead.
270 //
271 const uint64_t Address = Sled.address();
272 if (Enable) {
273 std::atomic_store_explicit(
274 reinterpret_cast<std::atomic<uint16_t> *>(Address), NopwSeq,
275 std::memory_order_release);
276 } else {
277 switch (Sled.Version) {
278 case 1:
279 case 2:
280 std::atomic_store_explicit(
281 reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp15Seq,
282 std::memory_order_release);
283 break;
284 case 0:
285 default:
286 std::atomic_store_explicit(
287 reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp20Seq,
288 std::memory_order_release);
289 break;
290 }
291 }
292 return false;
293 }
294
patchTypedEvent(const bool Enable,const uint32_t FuncId,const XRaySledEntry & Sled)295 bool patchTypedEvent(const bool Enable, const uint32_t FuncId,
296 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
297 // Here we do the dance of replacing the following sled:
298 //
299 // xray_sled_n:
300 // jmp +20 // 2 byte instruction
301 // ...
302 //
303 // With the following:
304 //
305 // nopw // 2 bytes
306 // ...
307 //
308 //
309 // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'.
310 // The 20 byte sled stashes three argument registers, calls the trampoline,
311 // unstashes the registers and returns. If the arguments are already in
312 // the correct registers, the stashing and unstashing become equivalently
313 // sized nops.
314 const uint64_t Address = Sled.address();
315 if (Enable) {
316 std::atomic_store_explicit(
317 reinterpret_cast<std::atomic<uint16_t> *>(Address), NopwSeq,
318 std::memory_order_release);
319 } else {
320 std::atomic_store_explicit(
321 reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp20Seq,
322 std::memory_order_release);
323 }
324 return false;
325 }
326
327 #if !SANITIZER_FUCHSIA
328 // We determine whether the CPU we're running on has the correct features we
329 // need. In x86_64 this will be rdtscp support.
probeRequiredCPUFeatures()330 bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT {
331 unsigned int EAX, EBX, ECX, EDX;
332
333 // We check whether rdtscp support is enabled. According to the x86_64 manual,
334 // level should be set at 0x80000001, and we should have a look at bit 27 in
335 // EDX. That's 0x8000000 (or 1u << 27).
336 __asm__ __volatile__("cpuid" : "=a"(EAX), "=b"(EBX), "=c"(ECX), "=d"(EDX)
337 : "0"(0x80000001));
338 if (!(EDX & (1u << 27))) {
339 Report("Missing rdtscp support.\n");
340 return false;
341 }
342 // Also check whether we can determine the CPU frequency, since if we cannot,
343 // we should use the emulated TSC instead.
344 if (!getTSCFrequency()) {
345 Report("Unable to determine CPU frequency.\n");
346 return false;
347 }
348 return true;
349 }
350 #endif
351
352 } // namespace __xray
353