1 /*
2  * Copyright (C) 2020 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/trace_processor/importers/systrace/systrace_line_tokenizer.h"
18 
19 #include "perfetto/ext/base/string_utils.h"
20 
21 // On windows std::isspace if overloaded in <locale>. MSBUILD via bazel
22 // attempts to use that version instead of the intended one defined in
23 // <cctype>
24 #include <cctype>
25 
26 namespace perfetto {
27 namespace trace_processor {
28 
29 namespace {
SubstrTrim(const std::string & input)30 std::string SubstrTrim(const std::string& input) {
31   std::string s = input;
32   s.erase(s.begin(), std::find_if(s.begin(), s.end(),
33                                   [](char ch) { return !std::isspace(ch); }));
34   s.erase(std::find_if(s.rbegin(), s.rend(),
35                        [](char ch) { return !std::isspace(ch); })
36               .base(),
37           s.end());
38   return s;
39 }
40 }  // namespace
41 
SystraceLineTokenizer()42 SystraceLineTokenizer::SystraceLineTokenizer()
43     : line_matcher_(std::regex(R"(-(\d+)\s+\(?\s*(\d+|-+)?\)?\s?\[(\d+)\]\s*)"
44                                R"([a-zA-Z0-9.]{0,5}\s+(\d+\.\d+):\s+(\S+):)")) {
45 }
46 
47 // TODO(hjd): This should be more robust to being passed random input.
48 // This can happen if we mess up detecting a gzip trace for example.
Tokenize(const std::string & buffer,SystraceLine * line)49 util::Status SystraceLineTokenizer::Tokenize(const std::string& buffer,
50                                              SystraceLine* line) {
51   // An example line from buffer looks something like the following:
52   // kworker/u16:1-77    (   77) [004] ....   316.196720: 0:
53   // B|77|__scm_call_armv8_64|0
54   //
55   // However, sometimes the tgid can be missing and buffer looks like this:
56   // <idle>-0     [000] ...2     0.002188: task_newtask: pid=1 ...
57   //
58   // Also the irq fields can be missing (we don't parse these anyway)
59   // <idle>-0     [000]  0.002188: task_newtask: pid=1 ...
60   //
61   // The task name can contain any characters e.g -:[(/ and for this reason
62   // it is much easier to use a regex (even though it is slower than parsing
63   // manually)
64 
65   std::smatch matches;
66   bool matched = std::regex_search(buffer, matches, line_matcher_);
67   if (!matched) {
68     return util::ErrStatus("Not a known systrace event format (line: %s)",
69                            buffer.c_str());
70   }
71 
72   std::string pid_str = matches[1].str();
73   std::string cpu_str = matches[3].str();
74   std::string ts_str = matches[4].str();
75 
76   line->task = SubstrTrim(matches.prefix());
77   line->tgid_str = matches[2].str();
78   line->event_name = matches[5].str();
79   line->args_str = SubstrTrim(matches.suffix());
80 
81   base::Optional<uint32_t> maybe_pid = base::StringToUInt32(pid_str);
82   if (!maybe_pid.has_value()) {
83     return util::Status("Could not convert pid " + pid_str);
84   }
85   line->pid = maybe_pid.value();
86 
87   base::Optional<uint32_t> maybe_cpu = base::StringToUInt32(cpu_str);
88   if (!maybe_cpu.has_value()) {
89     return util::Status("Could not convert cpu " + cpu_str);
90   }
91   line->cpu = maybe_cpu.value();
92 
93   base::Optional<double> maybe_ts = base::StringToDouble(ts_str);
94   if (!maybe_ts.has_value()) {
95     return util::Status("Could not convert ts");
96   }
97   line->ts = static_cast<int64_t>(maybe_ts.value() * 1e9);
98 
99   return util::OkStatus();
100 }
101 
102 }  // namespace trace_processor
103 }  // namespace perfetto
104