1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/trace_processor/importers/systrace/systrace_trace_parser.h"
18 
19 #include "perfetto/base/logging.h"
20 #include "perfetto/ext/base/string_splitter.h"
21 #include "perfetto/ext/base/string_utils.h"
22 #include "src/trace_processor/importers/common/process_tracker.h"
23 #include "src/trace_processor/trace_sorter.h"
24 
25 #include <inttypes.h>
26 #include <cctype>
27 #include <string>
28 #include <unordered_map>
29 
30 namespace perfetto {
31 namespace trace_processor {
32 namespace {
33 
SplitOnSpaces(base::StringView str)34 std::vector<base::StringView> SplitOnSpaces(base::StringView str) {
35   std::vector<base::StringView> result;
36   for (size_t i = 0; i < str.size(); ++i) {
37     // Consume all spaces.
38     for (; i < str.size() && str.data()[i] == ' '; ++i)
39       ;
40     // If we haven't reached the end consume all non-spaces and add result.
41     if (i != str.size()) {
42       size_t start = i;
43       for (; i < str.size() && str.data()[i] != ' '; ++i)
44         ;
45       result.push_back(base::StringView(str.data() + start, i - start));
46     }
47   }
48   return result;
49 }
50 
IsProcessDumpShortHeader(const std::vector<base::StringView> & tokens)51 bool IsProcessDumpShortHeader(const std::vector<base::StringView>& tokens) {
52   return tokens.size() == 4 && tokens[0] == "USER" && tokens[1] == "PID" &&
53          tokens[2] == "TID" && tokens[3] == "CMD";
54 }
55 
IsProcessDumpLongHeader(const std::vector<base::StringView> & tokens)56 bool IsProcessDumpLongHeader(const std::vector<base::StringView>& tokens) {
57   return tokens.size() > 4 && tokens[0] == "USER" && tokens[1] == "PID" &&
58          tokens[2] == "PPID" && tokens[3] == "VSZ";
59 }
60 
61 }  // namespace
62 
SystraceTraceParser(TraceProcessorContext * ctx)63 SystraceTraceParser::SystraceTraceParser(TraceProcessorContext* ctx)
64     : line_parser_(ctx), ctx_(ctx) {}
65 SystraceTraceParser::~SystraceTraceParser() = default;
66 
Parse(std::unique_ptr<uint8_t[]> owned_buf,size_t size)67 util::Status SystraceTraceParser::Parse(std::unique_ptr<uint8_t[]> owned_buf,
68                                         size_t size) {
69   if (state_ == ParseState::kEndOfSystrace)
70     return util::OkStatus();
71   partial_buf_.insert(partial_buf_.end(), &owned_buf[0], &owned_buf[size]);
72 
73   if (state_ == ParseState::kBeforeParse) {
74     state_ = partial_buf_[0] == '<' ? ParseState::kHtmlBeforeSystrace
75                                     : ParseState::kSystrace;
76   }
77 
78   // There can be multiple trace data sections in an HTML trace, we want to
79   // ignore any that don't contain systrace data. In the future it would be
80   // good to also parse the process dump section.
81   const char kTraceDataSection[] =
82       R"(<script class="trace-data" type="application/text">)";
83   auto start_it = partial_buf_.begin();
84   for (;;) {
85     auto line_it = std::find(start_it, partial_buf_.end(), '\n');
86     if (line_it == partial_buf_.end())
87       break;
88 
89     std::string buffer(start_it, line_it);
90 
91     if (state_ == ParseState::kHtmlBeforeSystrace) {
92       if (base::Contains(buffer, kTraceDataSection)) {
93         state_ = ParseState::kTraceDataSection;
94       }
95     } else if (state_ == ParseState::kTraceDataSection) {
96       if (base::StartsWith(buffer, "#") && base::Contains(buffer, "TASK-PID")) {
97         state_ = ParseState::kSystrace;
98       } else if (base::StartsWith(buffer, "PROCESS DUMP")) {
99         state_ = ParseState::kProcessDumpLong;
100       } else if (base::StartsWith(buffer, "CGROUP DUMP")) {
101         state_ = ParseState::kCgroupDump;
102       } else if (base::Contains(buffer, R"(</script>)")) {
103         state_ = ParseState::kHtmlBeforeSystrace;
104       }
105     } else if (state_ == ParseState::kSystrace) {
106       if (base::Contains(buffer, R"(</script>)")) {
107         state_ = ParseState::kEndOfSystrace;
108         break;
109       } else if (!base::StartsWith(buffer, "#") && !buffer.empty()) {
110         SystraceLine line;
111         util::Status status = line_tokenizer_.Tokenize(buffer, &line);
112         if (status.ok()) {
113           line_parser_.ParseLine(std::move(line));
114         } else {
115           ctx_->storage->IncrementStats(stats::systrace_parse_failure);
116         }
117       }
118     } else if (state_ == ParseState::kProcessDumpLong ||
119                state_ == ParseState::kProcessDumpShort) {
120       if (base::Contains(buffer, R"(</script>)")) {
121         state_ = ParseState::kHtmlBeforeSystrace;
122       } else {
123         std::vector<base::StringView> tokens =
124             SplitOnSpaces(base::StringView(buffer));
125         if (IsProcessDumpShortHeader(tokens)) {
126           state_ = ParseState::kProcessDumpShort;
127         } else if (IsProcessDumpLongHeader(tokens)) {
128           state_ = ParseState::kProcessDumpLong;
129         } else if (state_ == ParseState::kProcessDumpLong &&
130                    tokens.size() >= 10) {
131           // Format is:
132           // user pid ppid vsz rss wchan pc s name my cmd line
133           const base::Optional<uint32_t> pid =
134               base::StringToUInt32(tokens[1].ToStdString());
135           const base::Optional<uint32_t> ppid =
136               base::StringToUInt32(tokens[2].ToStdString());
137           base::StringView name = tokens[8];
138           // Command line may contain spaces, merge all remaining tokens:
139           const char* cmd_start = tokens[9].data();
140           base::StringView cmd(
141               cmd_start,
142               static_cast<size_t>((buffer.data() + buffer.size()) - cmd_start));
143           if (!pid || !ppid) {
144             PERFETTO_ELOG("Could not parse line '%s'", buffer.c_str());
145             return util::ErrStatus("Could not parse PROCESS DUMP line");
146           }
147           ctx_->process_tracker->SetProcessMetadata(pid.value(), ppid, name,
148                                                     base::StringView());
149         } else if (state_ == ParseState::kProcessDumpShort &&
150                    tokens.size() >= 4) {
151           // Format is:
152           // username pid tid my cmd line
153           const base::Optional<uint32_t> tgid =
154               base::StringToUInt32(tokens[1].ToStdString());
155           const base::Optional<uint32_t> tid =
156               base::StringToUInt32(tokens[2].ToStdString());
157           // Command line may contain spaces, merge all remaining tokens:
158           const char* cmd_start = tokens[3].data();
159           base::StringView cmd(
160               cmd_start,
161               static_cast<size_t>((buffer.data() + buffer.size()) - cmd_start));
162           StringId cmd_id =
163               ctx_->storage->mutable_string_pool()->InternString(cmd);
164           if (!tid || !tgid) {
165             PERFETTO_ELOG("Could not parse line '%s'", buffer.c_str());
166             return util::ErrStatus("Could not parse PROCESS DUMP line");
167           }
168           UniqueTid utid =
169               ctx_->process_tracker->UpdateThread(tid.value(), tgid.value());
170           ctx_->process_tracker->UpdateThreadNameByUtid(
171               utid, cmd_id, ThreadNamePriority::kOther);
172         }
173       }
174     } else if (state_ == ParseState::kCgroupDump) {
175       if (base::Contains(buffer, R"(</script>)")) {
176         state_ = ParseState::kHtmlBeforeSystrace;
177       }
178       // TODO(lalitm): see if it is important to parse this.
179     }
180     start_it = line_it + 1;
181   }
182   if (state_ == ParseState::kEndOfSystrace) {
183     partial_buf_.clear();
184   } else {
185     partial_buf_.erase(partial_buf_.begin(), start_it);
186   }
187   return util::OkStatus();
188 }
189 
NotifyEndOfFile()190 void SystraceTraceParser::NotifyEndOfFile() {}
191 
192 }  // namespace trace_processor
193 }  // namespace perfetto
194