1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/trace_processor/importers/json/json_trace_tokenizer.h"
18 
19 #include <memory>
20 
21 #include "perfetto/base/build_config.h"
22 #include "perfetto/ext/base/string_utils.h"
23 
24 #include "src/trace_processor/importers/common/trace_blob_view.h"
25 #include "src/trace_processor/importers/json/json_tracker.h"
26 #include "src/trace_processor/importers/json/json_utils.h"
27 #include "src/trace_processor/storage/stats.h"
28 #include "src/trace_processor/trace_sorter.h"
29 #include "src/trace_processor/util/status_macros.h"
30 
31 namespace perfetto {
32 namespace trace_processor {
33 
34 namespace {
35 
36 #if PERFETTO_BUILDFLAG(PERFETTO_TP_JSON)
37 
AppendUnescapedCharacter(char c,bool is_escaping,std::string * key)38 util::Status AppendUnescapedCharacter(char c,
39                                       bool is_escaping,
40                                       std::string* key) {
41   if (is_escaping) {
42     switch (c) {
43       case '"':
44       case '\\':
45       case '/':
46         key->push_back(c);
47         break;
48       case 'b':
49         key->push_back('\b');
50         break;
51       case 'f':
52         key->push_back('\f');
53         break;
54       case 'n':
55         key->push_back('\n');
56         break;
57       case 'r':
58         key->push_back('\r');
59         break;
60       case 't':
61         key->push_back('\t');
62         break;
63       default:
64         // We don't support any other escape sequences (concretely \uxxxx
65         // which JSON supports but is too much effort for us to parse).
66         return util::ErrStatus("Illegal character in JSON");
67     }
68   } else if (c != '\\') {
69     key->push_back(c);
70   }
71   return util::OkStatus();
72 }
73 
74 enum class ReadStringRes {
75   kEndOfString,
76   kNeedsMoreData,
77   kFatalError,
78 };
ReadOneJsonString(const char * start,const char * end,std::string * key,const char ** next)79 ReadStringRes ReadOneJsonString(const char* start,
80                                 const char* end,
81                                 std::string* key,
82                                 const char** next) {
83   bool is_escaping = false;
84   for (const char* s = start; s < end; s++) {
85     // Control characters are not allowed in JSON strings.
86     if (iscntrl(*s))
87       return ReadStringRes::kFatalError;
88 
89     // If we get a quote character end of the string.
90     if (*s == '"' && !is_escaping) {
91       *next = s + 1;
92       return ReadStringRes::kEndOfString;
93     }
94 
95     util::Status status = AppendUnescapedCharacter(*s, is_escaping, key);
96     if (!status.ok())
97       return ReadStringRes::kFatalError;
98 
99     // If we're in a string and we see a backslash and the last character was
100     // not a backslash the next character is escaped:
101     is_escaping = *s == '\\' && !is_escaping;
102   }
103   return ReadStringRes::kNeedsMoreData;
104 }
105 
106 #endif  // PERFETTO_BUILDFLAG(PERFETTO_TP_JSON)
107 
108 }  // namespace
109 
110 #if PERFETTO_BUILDFLAG(PERFETTO_TP_JSON)
ReadOneJsonDict(const char * start,const char * end,base::StringView * value,const char ** next)111 ReadDictRes ReadOneJsonDict(const char* start,
112                             const char* end,
113                             base::StringView* value,
114                             const char** next) {
115   int braces = 0;
116   int square_brackets = 0;
117   const char* dict_begin = nullptr;
118   bool in_string = false;
119   bool is_escaping = false;
120   for (const char* s = start; s < end; s++) {
121     if (isspace(*s) || *s == ',')
122       continue;
123     if (*s == '"' && !is_escaping) {
124       in_string = !in_string;
125       continue;
126     }
127     if (in_string) {
128       // If we're in a string and we see a backslash and the last character was
129       // not a backslash the next character is escaped:
130       is_escaping = *s == '\\' && !is_escaping;
131       // If we're currently parsing a string we should ignore otherwise special
132       // characters:
133       continue;
134     }
135     if (*s == '{') {
136       if (braces == 0)
137         dict_begin = s;
138       braces++;
139       continue;
140     }
141     if (*s == '}') {
142       if (braces <= 0)
143         return ReadDictRes::kEndOfTrace;
144       if (--braces > 0)
145         continue;
146       size_t len = static_cast<size_t>((s + 1) - dict_begin);
147       *value = base::StringView(dict_begin, len);
148       *next = s + 1;
149       return ReadDictRes::kFoundDict;
150     }
151     if (*s == '[') {
152       square_brackets++;
153       continue;
154     }
155     if (*s == ']') {
156       if (square_brackets == 0) {
157         // We've reached the end of [traceEvents] array.
158         // There might be other top level keys in the json (e.g. metadata)
159         // after.
160         *next = s + 1;
161         return ReadDictRes::kEndOfArray;
162       }
163       square_brackets--;
164     }
165   }
166   return ReadDictRes::kNeedsMoreData;
167 }
168 
ReadOneJsonKey(const char * start,const char * end,std::string * key,const char ** next)169 ReadKeyRes ReadOneJsonKey(const char* start,
170                           const char* end,
171                           std::string* key,
172                           const char** next) {
173   enum class NextToken {
174     kStringOrEndOfDict,
175     kColon,
176     kValue,
177   };
178 
179   NextToken next_token = NextToken::kStringOrEndOfDict;
180   for (const char* s = start; s < end; s++) {
181     // Whitespace characters anywhere can be skipped.
182     if (isspace(*s))
183       continue;
184 
185     switch (next_token) {
186       case NextToken::kStringOrEndOfDict: {
187         // If we see a closing brace, that means we've reached the end of the
188         // wrapping dictionary.
189         if (*s == '}') {
190           *next = s + 1;
191           return ReadKeyRes::kEndOfDictionary;
192         }
193 
194         // If we see a comma separator, just ignore it.
195         if (*s == ',')
196           continue;
197 
198         // If we see anything else but a quote character here, this cannot be a
199         // valid key.
200         if (*s != '"')
201           return ReadKeyRes::kFatalError;
202 
203         auto res = ReadOneJsonString(s + 1, end, key, &s);
204         if (res == ReadStringRes::kFatalError)
205           return ReadKeyRes::kFatalError;
206         if (res == ReadStringRes::kNeedsMoreData)
207           return ReadKeyRes::kNeedsMoreData;
208 
209         // We need to decrement from the pointer as the loop will increment
210         // it back up.
211         s--;
212         next_token = NextToken::kColon;
213         break;
214       }
215       case NextToken::kColon:
216         if (*s != ':')
217           return ReadKeyRes::kFatalError;
218         next_token = NextToken::kValue;
219         break;
220       case NextToken::kValue:
221         // Allowed value starting chars: [ { digit - "
222         // Also allowed: true, false, null. For simplicities sake, we only check
223         // against the first character as we're not trying to be super accurate.
224         if (*s == '[' || *s == '{' || isdigit(*s) || *s == '-' || *s == '"' ||
225             *s == 't' || *s == 'f' || *s == 'n') {
226           *next = s;
227           return ReadKeyRes::kFoundKey;
228         }
229         return ReadKeyRes::kFatalError;
230     }
231   }
232   return ReadKeyRes::kNeedsMoreData;
233 }
234 
ExtractValueForJsonKey(base::StringView dict,const std::string & key,base::Optional<std::string> * value)235 util::Status ExtractValueForJsonKey(base::StringView dict,
236                                     const std::string& key,
237                                     base::Optional<std::string>* value) {
238   PERFETTO_DCHECK(dict.size() >= 2);
239 
240   const char* start = dict.data();
241   const char* end = dict.data() + dict.size();
242 
243   enum ExtractValueState {
244     kBeforeDict,
245     kInsideDict,
246     kAfterDict,
247   };
248 
249   ExtractValueState state = kBeforeDict;
250   for (const char* s = start; s < end;) {
251     if (isspace(*s)) {
252       ++s;
253       continue;
254     }
255 
256     if (state == kBeforeDict) {
257       if (*s == '{') {
258         ++s;
259         state = kInsideDict;
260         continue;
261       }
262       return util::ErrStatus("Unexpected character before JSON dict");
263     }
264 
265     if (state == kAfterDict)
266       return util::ErrStatus("Unexpected character after JSON dict");
267 
268     PERFETTO_DCHECK(state == kInsideDict);
269     PERFETTO_DCHECK(s < end);
270 
271     if (*s == '}') {
272       ++s;
273       state = kAfterDict;
274       continue;
275     }
276 
277     std::string current_key;
278     auto res = ReadOneJsonKey(s, end, &current_key, &s);
279     if (res == ReadKeyRes::kEndOfDictionary)
280       break;
281 
282     if (res == ReadKeyRes::kFatalError)
283       return util::ErrStatus("Failure parsing JSON: encountered fatal error");
284 
285     if (res == ReadKeyRes::kNeedsMoreData) {
286       return util::ErrStatus("Failure parsing JSON: partial JSON dictionary");
287     }
288 
289     PERFETTO_DCHECK(res == ReadKeyRes::kFoundKey);
290 
291     if (*s == '[') {
292       return util::ErrStatus(
293           "Failure parsing JSON: unsupported JSON dictionary with array");
294     }
295 
296     std::string value_str;
297     if (*s == '{') {
298       base::StringView dict_str;
299       ReadDictRes dict_res = ReadOneJsonDict(s, end, &dict_str, &s);
300       if (dict_res == ReadDictRes::kNeedsMoreData ||
301           dict_res == ReadDictRes::kEndOfArray ||
302           dict_res == ReadDictRes::kEndOfTrace) {
303         return util::ErrStatus(
304             "Failure parsing JSON: unable to parse dictionary");
305       }
306       value_str = dict_str.ToStdString();
307     } else if (*s == '"') {
308       auto str_res = ReadOneJsonString(s + 1, end, &value_str, &s);
309       if (str_res == ReadStringRes::kNeedsMoreData ||
310           str_res == ReadStringRes::kFatalError) {
311         return util::ErrStatus("Failure parsing JSON: unable to parse string");
312       }
313     } else {
314       const char* value_start = s;
315       const char* value_end = end;
316       for (; s < end; ++s) {
317         if (*s == ',' || isspace(*s) || *s == '}') {
318           value_end = s;
319           break;
320         }
321       }
322       value_str = std::string(value_start, value_end);
323     }
324 
325     if (key == current_key) {
326       *value = value_str;
327       return util::OkStatus();
328     }
329   }
330 
331   if (state != kAfterDict)
332     return util::ErrStatus("Failure parsing JSON: malformed dictionary");
333 
334   *value = base::nullopt;
335   return util::OkStatus();
336 }
337 
ReadOneSystemTraceLine(const char * start,const char * end,std::string * line,const char ** next)338 ReadSystemLineRes ReadOneSystemTraceLine(const char* start,
339                                          const char* end,
340                                          std::string* line,
341                                          const char** next) {
342   bool is_escaping = false;
343   for (const char* s = start; s < end; s++) {
344     // If we get a quote character and we're not escaping, we are done with the
345     // system trace string.
346     if (*s == '"' && !is_escaping) {
347       *next = s + 1;
348       return ReadSystemLineRes::kEndOfSystemTrace;
349     }
350 
351     // If we are escaping n, that means this is a new line which is a delimiter
352     // for a system trace line.
353     if (*s == 'n' && is_escaping) {
354       *next = s + 1;
355       return ReadSystemLineRes::kFoundLine;
356     }
357 
358     util::Status status = AppendUnescapedCharacter(*s, is_escaping, line);
359     if (!status.ok())
360       return ReadSystemLineRes::kFatalError;
361 
362     // If we're in a string and we see a backslash and the last character was
363     // not a backslash the next character is escaped:
364     is_escaping = *s == '\\' && !is_escaping;
365   }
366   return ReadSystemLineRes::kNeedsMoreData;
367 }
368 #endif  // PERFETTO_BUILDFLAG(PERFETTO_TP_JSON)
369 
JsonTraceTokenizer(TraceProcessorContext * ctx)370 JsonTraceTokenizer::JsonTraceTokenizer(TraceProcessorContext* ctx)
371     : context_(ctx) {}
372 JsonTraceTokenizer::~JsonTraceTokenizer() = default;
373 
Parse(std::unique_ptr<uint8_t[]> data,size_t size)374 util::Status JsonTraceTokenizer::Parse(std::unique_ptr<uint8_t[]> data,
375                                        size_t size) {
376   PERFETTO_DCHECK(json::IsJsonSupported());
377 
378 #if PERFETTO_BUILDFLAG(PERFETTO_TP_JSON)
379   buffer_.insert(buffer_.end(), data.get(), data.get() + size);
380   const char* buf = buffer_.data();
381   const char* next = buf;
382   const char* end = buf + buffer_.size();
383 
384   JsonTracker* json_tracker = JsonTracker::GetOrCreate(context_);
385 
386   // It's possible the displayTimeUnit key is at the end of the json
387   // file so to be correct we ought to parse the whole file looking
388   // for this key before parsing any events however this would require
389   // two passes on the file so for now we only handle displayTimeUnit
390   // correctly if it is at the beginning of the file.
391   const base::StringView view(buf, size);
392   if (view.find("\"displayTimeUnit\":\"ns\"") != base::StringView::npos) {
393     json_tracker->SetTimeUnit(json::TimeUnit::kNs);
394   } else if (view.find("\"displayTimeUnit\":\"ms\"") !=
395              base::StringView::npos) {
396     json_tracker->SetTimeUnit(json::TimeUnit::kMs);
397   }
398 
399   if (offset_ == 0) {
400     // Strip leading whitespace.
401     while (next != end && isspace(*next)) {
402       next++;
403     }
404     if (next == end) {
405       return util::ErrStatus(
406           "Failure parsing JSON: first chunk has only whitespace");
407     }
408 
409     // Trace could begin in any of these ways:
410     // {"traceEvents":[{
411     // { "traceEvents": [{
412     // [{
413     if (*next != '{' && *next != '[') {
414       return util::ErrStatus(
415           "Failure parsing JSON: first non-whitespace character is not [ or {");
416     }
417 
418     // Figure out the format of the JSON file based on the first non-whitespace
419     // character.
420     format_ = *next == '{' ? TraceFormat::kOuterDictionary
421                            : TraceFormat::kOnlyTraceEvents;
422 
423     // Skip the '[' or '{' character.
424     next++;
425 
426     // Set our current position based on the format of the trace.
427     position_ = format_ == TraceFormat::kOuterDictionary
428                     ? TracePosition::kDictionaryKey
429                     : TracePosition::kTraceEventsArray;
430   }
431 
432   auto status = ParseInternal(next, end, &next);
433   if (!status.ok())
434     return status;
435 
436   offset_ += static_cast<uint64_t>(next - buf);
437   buffer_.erase(buffer_.begin(), buffer_.begin() + (next - buf));
438   return util::OkStatus();
439 #else
440   perfetto::base::ignore_result(data);
441   perfetto::base::ignore_result(size);
442   perfetto::base::ignore_result(context_);
443   perfetto::base::ignore_result(format_);
444   perfetto::base::ignore_result(position_);
445   perfetto::base::ignore_result(offset_);
446   return util::ErrStatus("Cannot parse JSON trace due to missing JSON support");
447 #endif  // PERFETTO_BUILDFLAG(PERFETTO_TP_JSON)
448 }
449 
450 #if PERFETTO_BUILDFLAG(PERFETTO_TP_JSON)
ParseInternal(const char * start,const char * end,const char ** out)451 util::Status JsonTraceTokenizer::ParseInternal(const char* start,
452                                                const char* end,
453                                                const char** out) {
454   PERFETTO_DCHECK(json::IsJsonSupported());
455   JsonTracker* json_tracker = JsonTracker::GetOrCreate(context_);
456   auto* trace_sorter = context_->sorter.get();
457 
458   const char* next = start;
459   switch (position_) {
460     case TracePosition::kDictionaryKey: {
461       if (format_ != TraceFormat::kOuterDictionary) {
462         return util::ErrStatus(
463             "Failure parsing JSON: illegal format when parsing dictionary key");
464       }
465 
466       std::string key;
467       auto res = ReadOneJsonKey(start, end, &key, &next);
468       if (res == ReadKeyRes::kFatalError)
469         return util::ErrStatus("Failure parsing JSON: encountered fatal error");
470 
471       if (res == ReadKeyRes::kEndOfDictionary ||
472           res == ReadKeyRes::kNeedsMoreData) {
473         break;
474       }
475 
476       if (key == "traceEvents") {
477         position_ = TracePosition::kTraceEventsArray;
478         return ParseInternal(next + 1, end, out);
479       } else if (key == "systemTraceEvents") {
480         position_ = TracePosition::kSystemTraceEventsString;
481         return ParseInternal(next + 1, end, out);
482       } else if (key == "metadata") {
483         position_ = TracePosition::kWaitingForMetadataDictionary;
484         return ParseInternal(next + 1, end, out);
485       } else if (key == "displayTimeUnit") {
486         std::string time_unit;
487         auto string_res = ReadOneJsonString(next + 1, end, &time_unit, &next);
488         if (string_res == ReadStringRes::kFatalError)
489           return util::ErrStatus("Could not parse displayTimeUnit");
490         if (string_res == ReadStringRes::kNeedsMoreData)
491           return util::ErrStatus("displayTimeUnit too large");
492         if (time_unit != "ms" && time_unit != "ns")
493           return util::ErrStatus("displayTimeUnit unknown");
494         return ParseInternal(next, end, out);
495       } else {
496         // If we don't recognize the key, just ignore the rest of the trace and
497         // go to EOF.
498         // TODO(lalitm): do something better here.
499         position_ = TracePosition::kEof;
500         break;
501       }
502     }
503     case TracePosition::kSystemTraceEventsString: {
504       if (format_ != TraceFormat::kOuterDictionary) {
505         return util::ErrStatus(
506             "Failure parsing JSON: illegal format when parsing system events");
507       }
508 
509       while (next < end) {
510         std::string raw_line;
511         auto res = ReadOneSystemTraceLine(next, end, &raw_line, &next);
512         if (res == ReadSystemLineRes::kFatalError)
513           return util::ErrStatus(
514               "Failure parsing JSON: encountered fatal error");
515 
516         if (res == ReadSystemLineRes::kNeedsMoreData)
517           break;
518 
519         if (res == ReadSystemLineRes::kEndOfSystemTrace) {
520           position_ = TracePosition::kDictionaryKey;
521           return ParseInternal(next, end, out);
522         }
523 
524         if (base::StartsWith(raw_line, "#") || raw_line.empty())
525           continue;
526 
527         std::unique_ptr<SystraceLine> line(new SystraceLine());
528         util::Status status =
529             systrace_line_tokenizer_.Tokenize(raw_line, line.get());
530         if (!status.ok())
531           return status;
532         trace_sorter->PushSystraceLine(std::move(line));
533       }
534       break;
535     }
536     case TracePosition::kWaitingForMetadataDictionary: {
537       if (format_ != TraceFormat::kOuterDictionary) {
538         return util::ErrStatus(
539             "Failure parsing JSON: illegal format when parsing metadata");
540       }
541 
542       base::StringView unparsed;
543       const auto res = ReadOneJsonDict(next, end, &unparsed, &next);
544       if (res == ReadDictRes::kEndOfArray)
545         return util::ErrStatus("Failure parsing JSON: encountered fatal error");
546       if (res == ReadDictRes::kEndOfTrace ||
547           res == ReadDictRes::kNeedsMoreData) {
548         break;
549       }
550 
551       // TODO(lalitm): read and ingest the relevant data inside |value|.
552       position_ = TracePosition::kDictionaryKey;
553       break;
554     }
555     case TracePosition::kTraceEventsArray: {
556       while (next < end) {
557         base::StringView unparsed;
558         const auto res = ReadOneJsonDict(next, end, &unparsed, &next);
559         if (res == ReadDictRes::kEndOfTrace ||
560             res == ReadDictRes::kNeedsMoreData) {
561           break;
562         }
563 
564         if (res == ReadDictRes::kEndOfArray) {
565           position_ = format_ == TraceFormat::kOuterDictionary
566                           ? TracePosition::kDictionaryKey
567                           : TracePosition::kEof;
568           break;
569         }
570 
571         base::Optional<std::string> opt_raw_ts;
572         RETURN_IF_ERROR(ExtractValueForJsonKey(unparsed, "ts", &opt_raw_ts));
573         base::Optional<int64_t> opt_ts =
574             opt_raw_ts ? json_tracker->CoerceToTs(*opt_raw_ts) : base::nullopt;
575         int64_t ts = 0;
576         if (opt_ts.has_value()) {
577           ts = opt_ts.value();
578         } else {
579           // Metadata events may omit ts. In all other cases error:
580           base::Optional<std::string> opt_raw_ph;
581           RETURN_IF_ERROR(ExtractValueForJsonKey(unparsed, "ph", &opt_raw_ph));
582           if (!opt_raw_ph || *opt_raw_ph != "M") {
583             context_->storage->IncrementStats(stats::json_tokenizer_failure);
584             continue;
585           }
586         }
587         trace_sorter->PushJsonValue(ts, unparsed.ToStdString());
588       }
589       break;
590     }
591     case TracePosition::kEof: {
592       break;
593     }
594   }
595   *out = next;
596   return util::OkStatus();
597 }
598 #endif  // PERFETTO_BUILDFLAG(PERFETTO_TP_JSON)
599 
NotifyEndOfFile()600 void JsonTraceTokenizer::NotifyEndOfFile() {}
601 
602 }  // namespace trace_processor
603 }  // namespace perfetto
604