1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "CallChainJoiner.h"
18 
19 #include <android-base/logging.h>
20 
21 #include "environment.h"
22 #include "utils.h"
23 
24 namespace simpleperf {
25 namespace call_chain_joiner_impl {
26 
LRUCache(size_t cache_size,size_t matched_node_count_to_extend_callchain)27 LRUCache::LRUCache(size_t cache_size, size_t matched_node_count_to_extend_callchain)
28     : node_set_(1024 * 16, CacheNodeHash, CacheNodeEqual) {
29   cache_stat_.cache_size = cache_size;
30   cache_stat_.max_node_count = cache_size / sizeof(CacheNode);
31   CHECK_GE(cache_stat_.max_node_count, 2u);
32   CHECK_GE(matched_node_count_to_extend_callchain, 1u);
33   cache_stat_.matched_node_count_to_extend_callchain = matched_node_count_to_extend_callchain;
34   nodes_ = new CacheNode[cache_stat_.max_node_count + 1];  // with 1 sentinel node
35   // nodes_[0] is the sentinel node of the LRU linked list.
36   nodes_[0].is_leaf = 1;
37   nodes_[0].parent_index = 0;
38   nodes_[0].leaf_link_prev = nodes_[0].leaf_link_next = 0;
39 }
40 
~LRUCache()41 LRUCache::~LRUCache() {
42   delete[] nodes_;
43 }
44 
AddCallChain(pid_t tid,std::vector<uint64_t> & ips,std::vector<uint64_t> & sps)45 void LRUCache::AddCallChain(pid_t tid, std::vector<uint64_t>& ips, std::vector<uint64_t>& sps) {
46   // 1. Build current call chain.
47   std::vector<CacheNode*> chain;
48   for (size_t i = 0; i < ips.size(); ++i) {
49     CacheNode* node = GetNode(tid, ips[i], sps[i]);
50     chain.push_back(node);
51   }
52 
53   // 2. Check if the (tid, ip, sp) tuples of the top [matched_node_count_to_extend_callchain]
54   // nodes of current call chain exist in the cache and have the same relation as in current
55   // call chain. If true, we can extend current call chain.
56   bool can_extend = true;
57   if (chain.size() < cache_stat_.matched_node_count_to_extend_callchain) {
58     can_extend = false;
59   } else {
60     size_t chain_pos = chain.size() - 2;
61     for (size_t i = 1; i < cache_stat_.matched_node_count_to_extend_callchain; ++i) {
62       if (GetParent(chain[chain_pos]) != chain[chain_pos + 1]) {
63         can_extend = false;
64         break;
65       }
66     }
67   }
68   std::vector<uint64_t> origin_ip = ips;
69 
70   // 3. Add current call chain to the cache.
71   for (size_t i = 0; i + 1 < chain.size(); ++i) {
72     LinkParent(chain[i], chain[i + 1]);
73   }
74   // 4. Optionally extend current call chain.
75   if (can_extend) {
76     CacheNode* top = chain.back();
77     while ((top = GetParent(top)) != nullptr) {
78       if (top->sp == chain.back()->sp) {
79         // This is to prevent a loop case as below, which is unlikely to happen.
80         // The cache has node A.parent = B, while current call chain has node B.parent = A.
81         // This makes a loop of A -> B -> A -> B...
82         bool has_loop = false;
83         for (auto it = chain.rbegin(); it != chain.rend() && (*it)->sp == top->sp; ++it) {
84           if ((*it)->ip == top->ip) {
85             has_loop = true;
86             break;
87           }
88         }
89         if (has_loop) {
90           UnlinkParent(chain.back());
91           ips.resize(chain.size());
92           sps.resize(chain.size());
93           break;
94         }
95       }
96       ips.push_back(top->ip);
97       sps.push_back(top->sp);
98     }
99   } else {
100     UnlinkParent(chain.back());
101   }
102 }
103 
CacheNodeEqual(const CacheNode * n1,const CacheNode * n2)104 bool LRUCache::CacheNodeEqual(const CacheNode* n1, const CacheNode* n2) {
105   return n1->tid == n2->tid && n1->ip == n2->ip && n1->sp == n2->sp;
106 }
107 
CacheNodeHash(const CacheNode * n)108 size_t LRUCache::CacheNodeHash(const CacheNode* n) {
109   return static_cast<size_t>(n->tid ^ n->ip ^ n->sp);
110 }
111 
GetNode(uint32_t tid,uint64_t ip,uint64_t sp)112 CacheNode* LRUCache::GetNode(uint32_t tid, uint64_t ip, uint64_t sp) {
113   CacheNode* node = FindNode(tid, ip, sp);
114   if (node != nullptr) {
115     if (node->is_leaf) {
116       // Update the node's position in the LRU linked list.
117       RemoveNodeFromLRUList(node);
118       AppendNodeToLRUList(node);
119     }
120     return node;
121   }
122   node = AllocNode();
123   node->tid = tid;
124   node->ip = ip;
125   node->sp = sp;
126   node->is_leaf = 1;
127   node->parent_index = 0;
128   node->leaf_link_prev = node->leaf_link_next = GetNodeIndex(node);
129   node_set_.insert(node);
130   AppendNodeToLRUList(node);
131   return node;
132 }
133 
AllocNode()134 CacheNode* LRUCache::AllocNode() {
135   if (cache_stat_.used_node_count < cache_stat_.max_node_count) {
136     return &nodes_[++cache_stat_.used_node_count];
137   }
138   // Recycle the node at the front of the LRU linked list.
139   CacheNode* node = &nodes_[nodes_->leaf_link_next];
140   RemoveNodeFromLRUList(node);
141   node_set_.erase(node);
142   CacheNode* parent = GetParent(node);
143   if (parent != nullptr) {
144     DecreaseChildCountOfNode(parent);
145   }
146   cache_stat_.recycled_node_count++;
147   return node;
148 }
149 
LinkParent(CacheNode * child,CacheNode * new_parent)150 void LRUCache::LinkParent(CacheNode* child, CacheNode* new_parent) {
151   CacheNode* old_parent = GetParent(child);
152   if (old_parent != nullptr) {
153     DecreaseChildCountOfNode(old_parent);
154   }
155   child->parent_index = GetNodeIndex(new_parent);
156   if (new_parent->is_leaf) {
157     RemoveNodeFromLRUList(new_parent);
158     new_parent->is_leaf = 0;
159     new_parent->children_count = 1;
160   } else {
161     new_parent->children_count++;
162   }
163 }
164 
UnlinkParent(CacheNode * child)165 void LRUCache::UnlinkParent(CacheNode* child) {
166   CacheNode* old_parent = GetParent(child);
167   if (old_parent != nullptr) {
168     DecreaseChildCountOfNode(old_parent);
169   }
170   child->parent_index = 0;
171 }
172 
173 }  // namespace call_chain_joiner_impl
174 
175 using namespace call_chain_joiner_impl;
176 
WriteCallChain(FILE * fp,pid_t pid,pid_t tid,CallChainJoiner::ChainType type,const std::vector<uint64_t> & ips,const std::vector<uint64_t> & sps,size_t ip_count)177 static bool WriteCallChain(FILE* fp, pid_t pid, pid_t tid, CallChainJoiner::ChainType type,
178                            const std::vector<uint64_t>& ips, const std::vector<uint64_t>& sps,
179                            size_t ip_count) {
180   // Below is the content of a call chain stored in file.
181   //   uint32_t pid;
182   //   uint32_t tid;
183   //   uint32_t chain_type;
184   //   uint32_t ip_count;
185   //   uint64_t ips[];
186   //   uint64_t sps[];
187   //   uint32_t size;
188   uint32_t size = 4 * sizeof(uint32_t) + sizeof(uint64_t) * ip_count * 2 + sizeof(uint32_t);
189   std::vector<char> data(size);
190   char* p = data.data();
191   MoveToBinaryFormat(pid, p);
192   MoveToBinaryFormat(tid, p);
193   MoveToBinaryFormat(type, p);
194   MoveToBinaryFormat(static_cast<uint32_t>(ip_count), p);
195   MoveToBinaryFormat(ips.data(), ip_count, p);
196   MoveToBinaryFormat(sps.data(), ip_count, p);
197   MoveToBinaryFormat(size, p);
198   if (fwrite(data.data(), size, 1, fp) != 1) {
199     PLOG(ERROR) << "fwrite";
200     return false;
201   }
202   return true;
203 }
204 
ReadCallChain(FILE * fp,pid_t & pid,pid_t & tid,CallChainJoiner::ChainType & type,std::vector<uint64_t> & ips,std::vector<uint64_t> & sps)205 static bool ReadCallChain(FILE* fp, pid_t& pid, pid_t& tid, CallChainJoiner::ChainType& type,
206                           std::vector<uint64_t>& ips, std::vector<uint64_t>& sps) {
207   std::vector<char> data(4 * sizeof(uint32_t));
208   if (fread(data.data(), data.size(), 1, fp) != 1) {
209     PLOG(ERROR) << "fread";
210     return false;
211   }
212   const char* p = data.data();
213   MoveFromBinaryFormat(pid, p);
214   MoveFromBinaryFormat(tid, p);
215   MoveFromBinaryFormat(type, p);
216   uint32_t ip_count;
217   MoveFromBinaryFormat(ip_count, p);
218   data.resize(sizeof(uint64_t) * ip_count * 2 + sizeof(uint32_t));
219   if (fread(data.data(), data.size(), 1, fp) != 1) {
220     PLOG(ERROR) << "fread";
221     return false;
222   }
223   p = data.data();
224   ips.resize(ip_count);
225   MoveFromBinaryFormat(ips.data(), ip_count, p);
226   sps.resize(ip_count);
227   MoveFromBinaryFormat(sps.data(), ip_count, p);
228   return true;
229 }
230 
ReadCallChainInReverseOrder(FILE * fp,pid_t & pid,pid_t & tid,CallChainJoiner::ChainType & type,std::vector<uint64_t> & ips,std::vector<uint64_t> & sps)231 static bool ReadCallChainInReverseOrder(FILE* fp, pid_t& pid, pid_t& tid,
232                                         CallChainJoiner::ChainType& type,
233                                         std::vector<uint64_t>& ips, std::vector<uint64_t>& sps) {
234   uint32_t size;
235   if (fseek(fp, -4, SEEK_CUR) != 0 || fread(&size, sizeof(size), 1, fp) != 1) {
236     PLOG(ERROR) << "fread";
237     return false;
238   }
239   std::vector<char> data(size - 4);
240   if (fseek(fp, -static_cast<int>(size), SEEK_CUR) != 0 ||
241       fread(data.data(), data.size(), 1, fp) != 1 ||
242       fseek(fp, -static_cast<int>(data.size()), SEEK_CUR) != 0) {
243     PLOG(ERROR) << "fread";
244     return false;
245   }
246   const char* p = data.data();
247   MoveFromBinaryFormat(pid, p);
248   MoveFromBinaryFormat(tid, p);
249   MoveFromBinaryFormat(type, p);
250   uint32_t ip_count;
251   MoveFromBinaryFormat(ip_count, p);
252   ips.resize(ip_count);
253   MoveFromBinaryFormat(ips.data(), ip_count, p);
254   sps.resize(ip_count);
255   MoveFromBinaryFormat(sps.data(), ip_count, p);
256   return true;
257 }
258 
CreateTempFp()259 static FILE* CreateTempFp() {
260   std::unique_ptr<TemporaryFile> tmpfile = ScopedTempFiles::CreateTempFile();
261   FILE* fp = fdopen(tmpfile->release(), "web+");
262   if (fp == nullptr) {
263     PLOG(ERROR) << "fdopen";
264     return nullptr;
265   }
266   return fp;
267 }
268 
CallChainJoiner(size_t cache_size,size_t matched_node_count_to_extend_callchain,bool keep_original_callchains)269 CallChainJoiner::CallChainJoiner(size_t cache_size, size_t matched_node_count_to_extend_callchain,
270                                  bool keep_original_callchains)
271     : keep_original_callchains_(keep_original_callchains),
272       original_chains_fp_(nullptr),
273       joined_chains_fp_(nullptr),
274       next_chain_index_(0u) {
275   cache_stat_.cache_size = cache_size;
276   cache_stat_.matched_node_count_to_extend_callchain = matched_node_count_to_extend_callchain;
277 }
278 
~CallChainJoiner()279 CallChainJoiner::~CallChainJoiner() {
280   if (original_chains_fp_ != nullptr) {
281     fclose(original_chains_fp_);
282   }
283   if (joined_chains_fp_ != nullptr) {
284     fclose(joined_chains_fp_);
285   }
286 }
287 
AddCallChain(pid_t pid,pid_t tid,ChainType type,const std::vector<uint64_t> & ips,const std::vector<uint64_t> & sps)288 bool CallChainJoiner::AddCallChain(pid_t pid, pid_t tid, ChainType type,
289                                    const std::vector<uint64_t>& ips,
290                                    const std::vector<uint64_t>& sps) {
291   CHECK_EQ(ips.size(), sps.size());
292   CHECK_GT(ips.size(), 0u);
293   CHECK(type == ORIGINAL_OFFLINE || type == ORIGINAL_REMOTE);
294   // Make sure sps are in non-decreasing order, and there is no duplicated items.
295   size_t ip_count = ips.size();
296   for (size_t i = 1; i < ips.size(); ++i) {
297     if (sps[i] < sps[i - 1]) {
298       ip_count = i;
299       break;
300     } else if (sps[i] == sps[i - 1]) {
301       bool has_duplicated_items = false;
302       for (size_t j = i; j > 0 && sps[j - 1] == sps[i]; --j) {
303         if (ips[j - 1] == ips[i]) {
304           has_duplicated_items = true;
305           break;
306         }
307       }
308       if (has_duplicated_items) {
309         ip_count = i;
310         break;
311       }
312     }
313   }
314 
315   if (original_chains_fp_ == nullptr) {
316     original_chains_fp_ = CreateTempFp();
317     if (original_chains_fp_ == nullptr) {
318       return false;
319     }
320   }
321   stat_.chain_count++;
322   return WriteCallChain(original_chains_fp_, pid, tid, type, ips, sps, ip_count);
323 }
324 
JoinCallChains()325 bool CallChainJoiner::JoinCallChains() {
326   if (stat_.chain_count == 0u) {
327     return true;
328   }
329   LRUCache cache(cache_stat_.cache_size, cache_stat_.matched_node_count_to_extend_callchain);
330   std::unique_ptr<FILE, decltype(&fclose)> tmp_fp(CreateTempFp(), fclose);
331   if (!tmp_fp) {
332     return false;
333   }
334   joined_chains_fp_ = CreateTempFp();
335   if (joined_chains_fp_ == nullptr) {
336     return false;
337   }
338   pid_t pid;
339   pid_t tid;
340   ChainType type;
341   std::vector<uint64_t> ips;
342   std::vector<uint64_t> sps;
343   if (fseek(original_chains_fp_, 0, SEEK_END) != 0) {
344     PLOG(ERROR) << "fseek";
345     return false;
346   }
347   std::vector<std::pair<FILE*, FILE*>> file_pairs = {
348       std::make_pair(original_chains_fp_, tmp_fp.get()),
349       std::make_pair(tmp_fp.get(), joined_chains_fp_)};
350   for (size_t pass = 0; pass < 2u; ++pass) {
351     auto& pair = file_pairs[pass];
352     for (size_t i = 0; i < stat_.chain_count; ++i) {
353       if (!ReadCallChainInReverseOrder(pair.first, pid, tid, type, ips, sps)) {
354         return false;
355       }
356       if (pass == 0u) {
357         if (type == ORIGINAL_OFFLINE) {
358           type = JOINED_OFFLINE;
359         } else if (type == ORIGINAL_REMOTE) {
360           type = JOINED_REMOTE;
361         }
362         stat_.before_join_node_count += ips.size();
363       }
364 
365       cache.AddCallChain(tid, ips, sps);
366 
367       if (pass == 1u) {
368         stat_.after_join_node_count += ips.size();
369         stat_.after_join_max_chain_length = std::max(stat_.after_join_max_chain_length, ips.size());
370       }
371 
372       if (!WriteCallChain(pair.second, pid, tid, type, ips, sps, ips.size())) {
373         return false;
374       }
375     }
376   }
377   cache_stat_ = cache.Stat();
378   return true;
379 }
380 
GetNextCallChain(pid_t & pid,pid_t & tid,ChainType & type,std::vector<uint64_t> & ips,std::vector<uint64_t> & sps)381 bool CallChainJoiner::GetNextCallChain(pid_t& pid, pid_t& tid, ChainType& type,
382                                        std::vector<uint64_t>& ips, std::vector<uint64_t>& sps) {
383   if (next_chain_index_ == stat_.chain_count * 2) {
384     // No more chains.
385     return false;
386   }
387   if (next_chain_index_ == 0u) {
388     if (fseek(original_chains_fp_, 0, SEEK_SET) != 0 ||
389         fseek(joined_chains_fp_, 0, SEEK_SET) != 0) {
390       PLOG(ERROR) << "fseek";
391       return false;
392     }
393   }
394   FILE* fp;
395   if (keep_original_callchains_) {
396     fp = (next_chain_index_ & 1) ? joined_chains_fp_ : original_chains_fp_;
397     next_chain_index_++;
398   } else {
399     fp = joined_chains_fp_;
400     next_chain_index_ += 2;
401   }
402   return ReadCallChain(fp, pid, tid, type, ips, sps);
403 }
404 
DumpStat()405 void CallChainJoiner::DumpStat() {
406   LOG(DEBUG) << "call chain joiner stat:";
407   LOG(DEBUG) << "  cache_size: " << cache_stat_.cache_size;
408   LOG(DEBUG) << "  matched_node_count_to_extend_callchain: "
409              << cache_stat_.matched_node_count_to_extend_callchain;
410   LOG(DEBUG) << "  max_node_count in cache: " << cache_stat_.max_node_count;
411   LOG(DEBUG) << "  used_node_count in cache: " << cache_stat_.used_node_count;
412   LOG(DEBUG) << "  recycled_node_count in cache: " << cache_stat_.recycled_node_count;
413   LOG(DEBUG) << "  call_chain_count: " << stat_.chain_count;
414   LOG(DEBUG) << "  before_join_node_count: " << stat_.before_join_node_count;
415   if (stat_.chain_count > 0u) {
416     LOG(DEBUG) << "  before_join_average_chain_length: "
417                << (stat_.before_join_node_count * 1.0 / stat_.chain_count);
418   }
419   LOG(DEBUG) << "  after_join_node_count: " << stat_.after_join_node_count;
420   if (stat_.chain_count > 0u) {
421     LOG(DEBUG) << "  after_join_average_chain_length: "
422                << (stat_.after_join_node_count * 1.0 / stat_.chain_count);
423   }
424   LOG(DEBUG) << "  after_join_max_chain_length: " << stat_.after_join_max_chain_length;
425 }
426 
427 }  // namespace simpleperf
428