1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License At
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SRC_TRACED_PROBES_FTRACE_PAGE_POOL_H_
18 #define SRC_TRACED_PROBES_FTRACE_PAGE_POOL_H_
19 
20 #include <stdint.h>
21 
22 #include <mutex>
23 #include <vector>
24 
25 #include "perfetto/base/logging.h"
26 #include "perfetto/base/optional.h"
27 #include "perfetto/base/paged_memory.h"
28 #include "perfetto/base/thread_checker.h"
29 #include "perfetto/base/utils.h"
30 
31 namespace perfetto {
32 
33 // This class is a page pool tailored around the needs of the ftrace CpuReader.
34 // It has two responsibilities:
35 // 1) A cheap bump-pointer page allocator for the writing side of CpuReader.
36 // 2) A thread-safe producer/consumer queue to synchronize the read/write
37 //    threads of CpuReader.
38 // For context, CpuReader (and hence this class) is used on two threads:
39 // (1) A worker thread that writes into the buffer and (2) the main thread which
40 // reads all the content in big batches and turn them into protos.
41 // There is at most one thread writing and at most one thread reading. In rare
42 // circumstances they can be active At the same time.
43 // This class is optimized for the following use case:
44 // - Most of the times CpuReader wants to write 4096 bytes. In some rare cases
45 //   (read() during flush) it wants to write < 4096 bytes.
46 // - Even when it writes < 4096 bytes, CpuReader can figure out the size of the
47 //   payload from the ftrace header. We don't need extra tracking to tell how
48 //   much of each page is used.
49 // - Doing a syscall for each page write is overkill. In most occasions
50 //   CpuReader writes bursts of several pages in one go.
51 // - We can't really predict upfront how big the write bursts will be, hence we
52 //   cannot predict the size of the pool, unless we accept a very high bound.
53 //   In extreme, yet rare, conditions, CpuReader will read the whole per-cpu
54 //   ftrace buffer, while the reader is still reading the previous batch.
55 // - Write burst should not be too frequent, so once they are over it's worth
56 //   spending some extra cycles to release the memory.
57 // - The reader side always wants to read *all* the written pages in one batch.
58 //   While this happens though, the write might want to write more.
59 //
60 // The architecture of this class is as follows. Pages are organized in
61 // PageBlock(s). A PageBlock is simply an array of pages and is the elementary
62 // unit of memory allocation and frees. Pages within one block are cheaply
63 // allocated with a simple bump-pointer allocator.
64 //
65 //      [      Writer (thread worker)    ] | [    Reader (main thread)   ]
66 //                                  ~~~~~~~~~~~~~~~~~~~~~
67 //      +---> write queue ------------> ready queue --+
68 //      |                                             |
69 //      +------------------------------- freelist <---+
70 //                                  ~~~~~~~~~~~~~~~~~~~~~
71 //                                  ~  mutex protected  ~
72 //                                  ~~~~~~~~~~~~~~~~~~~~~
73 class PagePool {
74  public:
75   class PageBlock {
76    public:
77     static constexpr size_t kPagesPerBlock = 32;  // 32 * 4KB = 128 KB.
78     static constexpr size_t kBlockSize = kPagesPerBlock * base::kPageSize;
79 
80     // This factory method is just that we accidentally create extra blocks
81     // without realizing by triggering the default constructor in containers.
Create()82     static PageBlock Create() { return PageBlock(); }
83 
84     PageBlock(PageBlock&&) noexcept = default;
85     PageBlock& operator=(PageBlock&&) = default;
86 
size()87     size_t size() const { return size_; }
IsFull()88     bool IsFull() const { return size_ >= kPagesPerBlock; }
89 
90     // Returns the pointer to the contents of the i-th page in the block.
At(size_t i)91     uint8_t* At(size_t i) const {
92       PERFETTO_DCHECK(i < kPagesPerBlock);
93       return reinterpret_cast<uint8_t*>(mem_.Get()) + i * base::kPageSize;
94     }
95 
CurPage()96     uint8_t* CurPage() const { return At(size_); }
97 
NextPage()98     void NextPage() {
99       PERFETTO_DCHECK(!IsFull());
100       size_++;
101     }
102 
103     // Releases memory of the block and marks it available for reuse.
Clear()104     void Clear() {
105       size_ = 0;
106       mem_.AdviseDontNeed(mem_.Get(), kBlockSize);
107     }
108 
109    private:
110     PageBlock(const PageBlock&) = delete;
111     PageBlock& operator=(const PageBlock&) = delete;
PageBlock()112     PageBlock() { mem_ = base::PagedMemory::Allocate(kBlockSize); }
113 
114     base::PagedMemory mem_;
115     size_t size_ = 0;
116   };
117 
PagePool()118   PagePool() {
119     PERFETTO_DETACH_FROM_THREAD(writer_thread_);
120     PERFETTO_DETACH_FROM_THREAD(reader_thread_);
121   }
122 
123   // Grabs a new page, eventually allocating a whole new PageBlock.
124   // If contents are written to the page, the caller must call EndWrite().
125   // If no data is written, it is okay to leave the BeginWrite() unpaired
126   // (e.g., in case of a non-blocking read returning no data) and call again
127   // BeginWrite() in the future.
BeginWrite()128   uint8_t* BeginWrite() {
129     PERFETTO_DCHECK_THREAD(writer_thread_);
130     if (write_queue_.empty() || write_queue_.back().IsFull())
131       NewPageBlock();  // Slowpath. Tries the freelist first, then allocates.
132     return write_queue_.back().CurPage();
133   }
134 
135   // Marks the last page as written and bumps the write pointer.
EndWrite()136   void EndWrite() {
137     PERFETTO_DCHECK_THREAD(writer_thread_);
138     PERFETTO_DCHECK(!write_queue_.empty() && !write_queue_.back().IsFull());
139     write_queue_.back().NextPage();
140   }
141 
142   // Makes all written pages available to the reader.
CommitWrittenPages()143   void CommitWrittenPages() {
144     PERFETTO_DCHECK_THREAD(writer_thread_);
145     std::lock_guard<std::mutex> lock(mutex_);
146     read_queue_.insert(read_queue_.end(),
147                        std::make_move_iterator(write_queue_.begin()),
148                        std::make_move_iterator(write_queue_.end()));
149     write_queue_.clear();
150   }
151 
152   // Moves ownership of all the page blocks in the read queue to the caller.
153   // The caller is expected to move them back after reading through EndRead().
154   // PageBlocks will be freed if the caller doesn't call EndRead().
BeginRead()155   std::vector<PageBlock> BeginRead() {
156     PERFETTO_DCHECK_THREAD(reader_thread_);
157     std::lock_guard<std::mutex> lock(mutex_);
158     auto res = std::move(read_queue_);
159     read_queue_.clear();
160     return res;
161   }
162 
163   // Returns the page blocks borrowed for read and makes them available for
164   // reuse. This allows the writer to avoid doing syscalls after the initial
165   // writes.
166   void EndRead(std::vector<PageBlock> page_blocks);
167 
freelist_size_for_testing()168   size_t freelist_size_for_testing() const { return freelist_.size(); }
169 
170  private:
171   PagePool(const PagePool&) = delete;
172   PagePool& operator=(const PagePool&) = delete;
173   void NewPageBlock();
174 
175   PERFETTO_THREAD_CHECKER(writer_thread_)
176   std::vector<PageBlock> write_queue_;  // Accessed exclusively by the writer.
177 
178   std::mutex mutex_;  // Protects both the read queue and the freelist.
179 
180   PERFETTO_THREAD_CHECKER(reader_thread_)
181   std::vector<PageBlock> read_queue_;  // Accessed by both threads.
182   std::vector<PageBlock> freelist_;    // Accessed by both threads.
183 };
184 
185 }  // namespace perfetto
186 
187 #endif  // SRC_TRACED_PROBES_FTRACE_PAGE_POOL_H_
188