1 /*
2  * Copyright (C) 2020 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "tools/trace_to_text/trace_to_hprof.h"
18 
19 #include <algorithm>
20 #include <limits>
21 #include <string>
22 #include <unordered_map>
23 #include <unordered_set>
24 #include <vector>
25 
26 #include "perfetto/base/logging.h"
27 #include "perfetto/ext/base/endian.h"
28 #include "perfetto/ext/base/optional.h"
29 #include "perfetto/ext/base/string_utils.h"
30 #include "tools/trace_to_text/utils.h"
31 
32 // Spec
33 // http://hg.openjdk.java.net/jdk6/jdk6/jdk/raw-file/tip/src/share/demo/jvmti/hprof/manual.html#Basic_Type
34 // Parser
35 // https://cs.android.com/android/platform/superproject/+/master:art/tools/ahat/src/main/com/android/ahat/heapdump/Parser.java
36 
37 namespace perfetto {
38 namespace trace_to_text {
39 
40 namespace {
41 constexpr char kHeader[] = "PERFETTO_JAVA_HEAP";
42 constexpr uint32_t kIdSz = 8;
43 constexpr uint32_t kStackTraceSerialNumber = 1;
44 
45 class BigEndianBuffer {
46  public:
WriteId(uint64_t val)47   void WriteId(uint64_t val) { WriteU8(val); }
48 
WriteU8(uint64_t val)49   void WriteU8(uint64_t val) {
50     val = base::HostToBE64(val);
51     Write(reinterpret_cast<char*>(&val), sizeof(uint64_t));
52   }
53 
WriteU4(uint32_t val)54   void WriteU4(uint32_t val) {
55     val = base::HostToBE32(val);
56     Write(reinterpret_cast<char*>(&val), sizeof(uint32_t));
57   }
58 
SetU4(uint32_t val,size_t pos)59   void SetU4(uint32_t val, size_t pos) {
60     val = base::HostToBE32(val);
61     PERFETTO_CHECK(pos + 4 <= buf_.size());
62     memcpy(buf_.data() + pos, &val, sizeof(uint32_t));
63   }
64 
65   // Uncomment when needed
66   // void WriteU2(uint16_t val) {
67   //   val = base::HostToBE16(val);
68   //   Write(reinterpret_cast<char*>(&val), sizeof(uint16_t));
69   // }
70 
WriteByte(uint8_t val)71   void WriteByte(uint8_t val) { buf_.emplace_back(val); }
72 
Write(const char * val,uint32_t sz)73   void Write(const char* val, uint32_t sz) {
74     const char* end = val + sz;
75     while (val < end) {
76       WriteByte(static_cast<uint8_t>(*val));
77       val++;
78     }
79   }
80 
written() const81   size_t written() const { return buf_.size(); }
82 
Flush(std::ostream * out) const83   void Flush(std::ostream* out) const {
84     out->write(buf_.data(), static_cast<std::streamsize>(buf_.size()));
85   }
86 
87  private:
88   std::vector<char> buf_;
89 };
90 
91 class HprofWriter {
92  public:
HprofWriter(std::ostream * output)93   HprofWriter(std::ostream* output) : output_(output) {}
94 
WriteBuffer(const BigEndianBuffer & buf)95   void WriteBuffer(const BigEndianBuffer& buf) { buf.Flush(output_); }
96 
WriteRecord(const uint8_t type,const std::function<void (BigEndianBuffer *)> && writer)97   void WriteRecord(const uint8_t type,
98                    const std::function<void(BigEndianBuffer*)>&& writer) {
99     BigEndianBuffer buf;
100     buf.WriteByte(type);
101     // ts offset
102     buf.WriteU4(0);
103     // size placeholder
104     buf.WriteU4(0);
105     writer(&buf);
106     uint32_t record_sz = static_cast<uint32_t>(buf.written() - 9);
107     buf.SetU4(record_sz, 5);
108     WriteBuffer(buf);
109   }
110 
111  private:
112   std::ostream* output_;
113 };
114 
115 // A Class from the heap dump.
116 class ClassData {
117  public:
ClassData(uint64_t class_name_string_id)118   explicit ClassData(uint64_t class_name_string_id)
119       : class_name_string_id_(class_name_string_id) {}
120 
121   // Writes a HPROF LOAD_CLASS record for this Class
WriteHprofLoadClass(HprofWriter * writer,uint64_t class_object_id,uint32_t class_serial_number) const122   void WriteHprofLoadClass(HprofWriter* writer,
123                            uint64_t class_object_id,
124                            uint32_t class_serial_number) const {
125     writer->WriteRecord(0x02, [class_object_id, class_serial_number,
126                                this](BigEndianBuffer* buf) {
127       buf->WriteU4(class_serial_number);
128       buf->WriteId(class_object_id);
129       buf->WriteU4(kStackTraceSerialNumber);
130       buf->WriteId(class_name_string_id_);
131     });
132   }
133 
134  private:
135   uint64_t class_name_string_id_;
136 };
137 
138 // Ingested data from a Java Heap Profile for a name, location pair.
139 // We need to support multiple class datas per pair as name, location is
140 // not unique. Classloader should guarantee uniqueness but is not available
141 // until S.
142 class RawClassData {
143  public:
AddClass(uint64_t id,base::Optional<uint64_t> superclass_id)144   void AddClass(uint64_t id, base::Optional<uint64_t> superclass_id) {
145     ids_.push_back(std::make_pair(id, superclass_id));
146   }
147 
AddTemplate(uint64_t template_id)148   void AddTemplate(uint64_t template_id) {
149     template_ids_.push_back(template_id);
150   }
151 
152   // Transforms the raw data into one or more ClassData and adds them to the
153   // parameter map.
ToClassData(std::unordered_map<uint64_t,ClassData> * id_to_class,uint64_t class_name_string_id) const154   void ToClassData(std::unordered_map<uint64_t, ClassData>* id_to_class,
155                    uint64_t class_name_string_id) const {
156     // TODO(dinoderek) assert the two vectors have same length, iterate on both
157     for (auto it_ids = ids_.begin(); it_ids != ids_.end(); ++it_ids) {
158       // TODO(dinoderek) more data will be needed to write CLASS_DUMP
159       id_to_class->emplace(it_ids->first, ClassData(class_name_string_id));
160     }
161   }
162 
163  private:
164   // Pair contains class ID and super class ID.
165   std::vector<std::pair<uint64_t, base::Optional<uint64_t>>> ids_;
166   // Class id of the template
167   std::vector<uint64_t> template_ids_;
168 };
169 
170 // The Heap Dump data
171 class HeapDump {
172  public:
HeapDump(trace_processor::TraceProcessor * tp)173   explicit HeapDump(trace_processor::TraceProcessor* tp) : tp_(tp) {}
174 
Ingest()175   void Ingest() { IngestClasses(); }
176 
Write(HprofWriter * writer)177   void Write(HprofWriter* writer) {
178     WriteStrings(writer);
179     WriteLoadClass(writer);
180   }
181 
182  private:
183   trace_processor::TraceProcessor* tp_;
184 
185   // String IDs start from 1 as 0 appears to be reserved.
186   uint64_t next_string_id_ = 1;
187   // Strings to corresponding String ID
188   std::unordered_map<std::string, uint64_t> string_to_id_;
189   // Type ID to corresponding Class
190   std::unordered_map<uint64_t, ClassData> id_to_class_;
191 
192   // Ingests and processes the class data from the heap dump.
IngestClasses()193   void IngestClasses() {
194     // TODO(dinoderek): heap_graph_class does not support pid or ts filtering
195 
196     std::map<std::pair<uint64_t, std::string>, RawClassData> raw_classes;
197 
198     auto it = tp_->ExecuteQuery(R"(SELECT
199           id,
200           IFNULL(deobfuscated_name, name),
201           superclass_id,
202           location
203         FROM heap_graph_class )");
204 
205     while (it.Next()) {
206       uint64_t id = static_cast<uint64_t>(it.Get(0).AsLong());
207 
208       std::string raw_dname(it.Get(1).AsString());
209       std::string dname;
210       bool is_template_class =
211           base::StartsWith(raw_dname, std::string("java.lang.Class<"));
212       if (is_template_class) {
213         dname = raw_dname.substr(17, raw_dname.size() - 18);
214       } else {
215         dname = raw_dname;
216       }
217       uint64_t name_id = IngestString(dname);
218 
219       auto raw_super_id = it.Get(2);
220       base::Optional<uint64_t> maybe_super_id =
221           raw_super_id.is_null()
222               ? base::nullopt
223               : base::Optional<uint64_t>(
224                     static_cast<uint64_t>(raw_super_id.AsLong()));
225 
226       std::string location(it.Get(3).AsString());
227 
228       auto raw_classes_it =
229           raw_classes.emplace(std::make_pair(name_id, location), RawClassData())
230               .first;
231       if (is_template_class) {
232         raw_classes_it->second.AddTemplate(id);
233       } else {
234         raw_classes_it->second.AddClass(id, maybe_super_id);
235       }
236     }
237 
238     for (const auto& raw : raw_classes) {
239       auto class_name_string_id = raw.first.first;
240       raw.second.ToClassData(&id_to_class_, class_name_string_id);
241     }
242   }
243 
244   // Ingests the parameter string and returns the HPROF ID for the string.
IngestString(const std::string & s)245   uint64_t IngestString(const std::string& s) {
246     auto maybe_id = string_to_id_.find(s);
247     if (maybe_id != string_to_id_.end()) {
248       return maybe_id->second;
249     } else {
250       auto id = next_string_id_;
251       next_string_id_ += 1;
252       string_to_id_[s] = id;
253       return id;
254     }
255   }
256 
257   // Writes STRING sections to the output
WriteStrings(HprofWriter * writer)258   void WriteStrings(HprofWriter* writer) {
259     for (const auto& it : string_to_id_) {
260       writer->WriteRecord(0x01, [it](BigEndianBuffer* buf) {
261         buf->WriteId(it.second);
262         // TODO(dinoderek): UTF-8 encoding
263         buf->Write(it.first.c_str(), static_cast<uint32_t>(it.first.length()));
264       });
265     }
266   }
267 
268   // Writes LOAD CLASS sections to the output
WriteLoadClass(HprofWriter * writer)269   void WriteLoadClass(HprofWriter* writer) {
270     uint32_t class_serial_number = 1;
271     for (const auto& it : id_to_class_) {
272       it.second.WriteHprofLoadClass(writer, it.first, class_serial_number);
273       class_serial_number += 1;
274     }
275   }
276 };
277 
WriteHeaderAndStack(HprofWriter * writer)278 void WriteHeaderAndStack(HprofWriter* writer) {
279   BigEndianBuffer header;
280   header.Write(kHeader, sizeof(kHeader));
281   // Identifier size
282   header.WriteU4(kIdSz);
283   // walltime high (unused)
284   header.WriteU4(0);
285   // walltime low (unused)
286   header.WriteU4(0);
287   writer->WriteBuffer(header);
288 
289   // Add placeholder stack trace (required by the format).
290   writer->WriteRecord(0x05, [](BigEndianBuffer* buf) {
291     buf->WriteU4(kStackTraceSerialNumber);
292     buf->WriteU4(0);
293     buf->WriteU4(0);
294   });
295 }
296 }  // namespace
297 
TraceToHprof(trace_processor::TraceProcessor * tp,std::ostream * output,uint64_t pid,uint64_t ts)298 int TraceToHprof(trace_processor::TraceProcessor* tp,
299                  std::ostream* output,
300                  uint64_t pid,
301                  uint64_t ts) {
302   PERFETTO_DCHECK(tp != nullptr && pid != 0 && ts != 0);
303 
304   HprofWriter writer(output);
305   HeapDump dump(tp);
306 
307   dump.Ingest();
308   WriteHeaderAndStack(&writer);
309   dump.Write(&writer);
310 
311   return 0;
312 }
313 
TraceToHprof(std::istream * input,std::ostream * output,uint64_t pid,std::vector<uint64_t> timestamps)314 int TraceToHprof(std::istream* input,
315                  std::ostream* output,
316                  uint64_t pid,
317                  std::vector<uint64_t> timestamps) {
318   // TODO: Simplify this for cmdline users. For example, if there is a single
319   // heap graph, use this, and only fail when there is ambiguity.
320   if (pid == 0) {
321     PERFETTO_ELOG("Must specify pid");
322     return -1;
323   }
324   if (timestamps.size() != 1) {
325     PERFETTO_ELOG("Must specify single timestamp");
326     return -1;
327   }
328   trace_processor::Config config;
329   std::unique_ptr<trace_processor::TraceProcessor> tp =
330       trace_processor::TraceProcessor::CreateInstance(config);
331   if (!ReadTrace(tp.get(), input))
332     return false;
333   tp->NotifyEndOfFile();
334   return TraceToHprof(tp.get(), output, pid, timestamps[0]);
335 }
336 
337 }  // namespace trace_to_text
338 }  // namespace perfetto
339