1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SRC_TRACED_PROBES_FTRACE_CPU_READER_H_
18 #define SRC_TRACED_PROBES_FTRACE_CPU_READER_H_
19 
20 #include <stdint.h>
21 #include <string.h>
22 
23 #include <array>
24 #include <atomic>
25 #include <memory>
26 #include <set>
27 #include <thread>
28 
29 #include "perfetto/ext/base/optional.h"
30 #include "perfetto/ext/base/paged_memory.h"
31 #include "perfetto/ext/base/pipe.h"
32 #include "perfetto/ext/base/scoped_file.h"
33 #include "perfetto/ext/base/thread_checker.h"
34 #include "perfetto/ext/traced/data_source_types.h"
35 #include "perfetto/ext/tracing/core/trace_writer.h"
36 #include "perfetto/protozero/message.h"
37 #include "perfetto/protozero/message_handle.h"
38 #include "src/traced/probes/ftrace/compact_sched.h"
39 #include "src/traced/probes/ftrace/ftrace_metadata.h"
40 #include "src/traced/probes/ftrace/proto_translation_table.h"
41 
42 namespace perfetto {
43 
44 class FtraceDataSource;
45 class LazyKernelSymbolizer;
46 class ProtoTranslationTable;
47 struct FtraceDataSourceConfig;
48 
49 namespace protos {
50 namespace pbzero {
51 class FtraceEventBundle;
52 }  // namespace pbzero
53 }  // namespace protos
54 
55 // Reads raw ftrace data for a cpu, parses it, and writes it into the perfetto
56 // tracing buffers.
57 class CpuReader {
58  public:
59   using FtraceEventBundle = protos::pbzero::FtraceEventBundle;
60 
61   struct PageHeader {
62     uint64_t timestamp;
63     uint64_t size;
64     bool lost_events;
65   };
66 
67   CpuReader(size_t cpu,
68             const ProtoTranslationTable* table,
69             LazyKernelSymbolizer* symbolizer,
70             base::ScopedFile trace_fd);
71   ~CpuReader();
72 
73   // Reads and parses all ftrace data for this cpu (in batches), until we catch
74   // up to the writer, or hit |max_pages|. Returns number of pages read.
75   size_t ReadCycle(uint8_t* parsing_buf,
76                    size_t parsing_buf_size_pages,
77                    size_t max_pages,
78                    const std::set<FtraceDataSource*>& started_data_sources);
79 
80   template <typename T>
ReadAndAdvance(const uint8_t ** ptr,const uint8_t * end,T * out)81   static bool ReadAndAdvance(const uint8_t** ptr, const uint8_t* end, T* out) {
82     if (*ptr > end - sizeof(T))
83       return false;
84     memcpy(reinterpret_cast<void*>(out), reinterpret_cast<const void*>(*ptr),
85            sizeof(T));
86     *ptr += sizeof(T);
87     return true;
88   }
89 
90   // Caller must do the bounds check:
91   // [start + offset, start + offset + sizeof(T))
92   // Returns the raw value not the varint.
93   template <typename T>
ReadIntoVarInt(const uint8_t * start,uint32_t field_id,protozero::Message * out)94   static T ReadIntoVarInt(const uint8_t* start,
95                           uint32_t field_id,
96                           protozero::Message* out) {
97     T t;
98     memcpy(&t, reinterpret_cast<const void*>(start), sizeof(T));
99     out->AppendVarInt<T>(field_id, t);
100     return t;
101   }
102 
103   template <typename T>
ReadInode(const uint8_t * start,uint32_t field_id,protozero::Message * out,FtraceMetadata * metadata)104   static void ReadInode(const uint8_t* start,
105                         uint32_t field_id,
106                         protozero::Message* out,
107                         FtraceMetadata* metadata) {
108     T t = ReadIntoVarInt<T>(start, field_id, out);
109     metadata->AddInode(static_cast<Inode>(t));
110   }
111 
112   template <typename T>
ReadDevId(const uint8_t * start,uint32_t field_id,protozero::Message * out,FtraceMetadata * metadata)113   static void ReadDevId(const uint8_t* start,
114                         uint32_t field_id,
115                         protozero::Message* out,
116                         FtraceMetadata* metadata) {
117     T t;
118     memcpy(&t, reinterpret_cast<const void*>(start), sizeof(T));
119     BlockDeviceID dev_id = TranslateBlockDeviceIDToUserspace<T>(t);
120     out->AppendVarInt<BlockDeviceID>(field_id, dev_id);
121     metadata->AddDevice(dev_id);
122   }
123 
124   template <typename T>
ReadSymbolAddr(const uint8_t * start,uint32_t field_id,protozero::Message * out,FtraceMetadata * metadata)125   static void ReadSymbolAddr(const uint8_t* start,
126                              uint32_t field_id,
127                              protozero::Message* out,
128                              FtraceMetadata* metadata) {
129     // ReadSymbolAddr is a bit special. In order to not disclose KASLR layout
130     // via traces, we put in the trace only a mangled address (which really is
131     // the insertion order into metadata.kernel_addrs). We don't care about the
132     // actual symbol addesses. We just need to match that against the symbol
133     // name in the names in the FtraceEventBundle.KernelSymbols.
134     T full_addr;
135     memcpy(&full_addr, reinterpret_cast<const void*>(start), sizeof(T));
136     uint32_t interned_index = metadata->AddSymbolAddr(full_addr);
137     out->AppendVarInt(field_id, interned_index);
138   }
139 
ReadPid(const uint8_t * start,uint32_t field_id,protozero::Message * out,FtraceMetadata * metadata)140   static void ReadPid(const uint8_t* start,
141                       uint32_t field_id,
142                       protozero::Message* out,
143                       FtraceMetadata* metadata) {
144     int32_t pid = ReadIntoVarInt<int32_t>(start, field_id, out);
145     metadata->AddPid(pid);
146   }
147 
ReadCommonPid(const uint8_t * start,uint32_t field_id,protozero::Message * out,FtraceMetadata * metadata)148   static void ReadCommonPid(const uint8_t* start,
149                             uint32_t field_id,
150                             protozero::Message* out,
151                             FtraceMetadata* metadata) {
152     int32_t pid = ReadIntoVarInt<int32_t>(start, field_id, out);
153     metadata->AddCommonPid(pid);
154   }
155 
156   // Internally the kernel stores device ids in a different layout to that
157   // exposed to userspace via stat etc. There's no userspace function to convert
158   // between the formats so we have to do it ourselves.
159   template <typename T>
TranslateBlockDeviceIDToUserspace(T kernel_dev)160   static BlockDeviceID TranslateBlockDeviceIDToUserspace(T kernel_dev) {
161     // Provided search index s_dev from
162     // https://github.com/torvalds/linux/blob/v4.12/include/linux/fs.h#L404
163     // Convert to user space id using
164     // https://github.com/torvalds/linux/blob/v4.12/include/linux/kdev_t.h#L10
165     // TODO(azappone): see if this is the same on all platforms
166     uint64_t maj = static_cast<uint64_t>(kernel_dev) >> 20;
167     uint64_t min = static_cast<uint64_t>(kernel_dev) & ((1U << 20) - 1);
168     return static_cast<BlockDeviceID>(  // From makedev()
169         ((maj & 0xfffff000ULL) << 32) | ((maj & 0xfffULL) << 8) |
170         ((min & 0xffffff00ULL) << 12) | ((min & 0xffULL)));
171   }
172 
173   // Returns a parsed representation of the given raw ftrace page's header.
174   static base::Optional<CpuReader::PageHeader> ParsePageHeader(
175       const uint8_t** ptr,
176       uint16_t page_header_size_len);
177 
178   // Parse the payload of a raw ftrace page, and write the events as protos
179   // into the provided bundle (and/or compact buffer).
180   // |table| contains the mix of compile time (e.g. proto field ids) and
181   // run time (e.g. field offset and size) information necessary to do this.
182   // The table is initialized once at start time by the ftrace controller
183   // which passes it to the CpuReader which passes it here.
184   // The caller is responsible for validating that the page_header->size stays
185   // within the current page.
186   static size_t ParsePagePayload(const uint8_t* start_of_payload,
187                                  const PageHeader* page_header,
188                                  const ProtoTranslationTable* table,
189                                  const FtraceDataSourceConfig* ds_config,
190                                  CompactSchedBuffer* compact_sched_buffer,
191                                  FtraceEventBundle* bundle,
192                                  FtraceMetadata* metadata);
193 
194   // Parse a single raw ftrace event beginning at |start| and ending at |end|
195   // and write it into the provided bundle as a proto.
196   // |table| contains the mix of compile time (e.g. proto field ids) and
197   // run time (e.g. field offset and size) information necessary to do this.
198   // The table is initialized once at start time by the ftrace controller
199   // which passes it to the CpuReader which passes it to ParsePage which
200   // passes it here.
201   static bool ParseEvent(uint16_t ftrace_event_id,
202                          const uint8_t* start,
203                          const uint8_t* end,
204                          const ProtoTranslationTable* table,
205                          protozero::Message* message,
206                          FtraceMetadata* metadata);
207 
208   static bool ParseField(const Field& field,
209                          const uint8_t* start,
210                          const uint8_t* end,
211                          const ProtoTranslationTable* table,
212                          protozero::Message* message,
213                          FtraceMetadata* metadata);
214 
215   // Parse a sched_switch event according to pre-validated format, and buffer
216   // the individual fields in the given compact encoding batch.
217   static void ParseSchedSwitchCompact(const uint8_t* start,
218                                       uint64_t timestamp,
219                                       const CompactSchedSwitchFormat* format,
220                                       CompactSchedBuffer* compact_buf,
221                                       FtraceMetadata* metadata);
222 
223   // Parse a sched_waking event according to pre-validated format, and buffer
224   // the individual fields in the given compact encoding batch.
225   static void ParseSchedWakingCompact(const uint8_t* start,
226                                       uint64_t timestamp,
227                                       const CompactSchedWakingFormat* format,
228                                       CompactSchedBuffer* compact_buf,
229                                       FtraceMetadata* metadata);
230 
231   // Parses & encodes the given range of contiguous tracing pages. Called by
232   // |ReadAndProcessBatch| for each active data source.
233   //
234   // public and static for testing
235   static bool ProcessPagesForDataSource(TraceWriter* trace_writer,
236                                         FtraceMetadata* metadata,
237                                         size_t cpu,
238                                         const FtraceDataSourceConfig* ds_config,
239                                         const uint8_t* parsing_buf,
240                                         const size_t pages_read,
241                                         const ProtoTranslationTable* table,
242                                         LazyKernelSymbolizer* symbolizer);
243 
244  private:
245   CpuReader(const CpuReader&) = delete;
246   CpuReader& operator=(const CpuReader&) = delete;
247 
248   // Reads at most |max_pages| of ftrace data, parses it, and writes it
249   // into |started_data_sources|. Returns number of pages read.
250   // See comment on ftrace_controller.cc:kMaxParsingWorkingSetPages for
251   // rationale behind the batching.
252   size_t ReadAndProcessBatch(
253       uint8_t* parsing_buf,
254       size_t max_pages,
255       bool first_batch_in_cycle,
256       const std::set<FtraceDataSource*>& started_data_sources);
257 
258   const size_t cpu_;
259   const ProtoTranslationTable* const table_;
260   LazyKernelSymbolizer* const symbolizer_;
261   base::ScopedFile trace_fd_;
262 };
263 
264 }  // namespace perfetto
265 
266 #endif  // SRC_TRACED_PROBES_FTRACE_CPU_READER_H_
267