1 /*
2  * Copyright 2018 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "SkJSON.h"
9 
10 #include "SkMalloc.h"
11 #include "SkParse.h"
12 #include "SkStream.h"
13 #include "SkString.h"
14 #include "SkUTF.h"
15 
16 #include <cmath>
17 #include <tuple>
18 #include <vector>
19 
20 namespace skjson {
21 
22 // #define SK_JSON_REPORT_ERRORS
23 
24 static_assert( sizeof(Value) == 8, "");
25 static_assert(alignof(Value) == 8, "");
26 
27 static constexpr size_t kRecAlign = alignof(Value);
28 
init_tagged(Tag t)29 void Value::init_tagged(Tag t) {
30     memset(fData8, 0, sizeof(fData8));
31     fData8[Value::kTagOffset] = SkTo<uint8_t>(t);
32     SkASSERT(this->getTag() == t);
33 }
34 
35 // Pointer values store a type (in the upper kTagBits bits) and a pointer.
init_tagged_pointer(Tag t,void * p)36 void Value::init_tagged_pointer(Tag t, void* p) {
37     *this->cast<uintptr_t>() = reinterpret_cast<uintptr_t>(p);
38 
39     if (sizeof(Value) == sizeof(uintptr_t)) {
40         // For 64-bit, we rely on the pointer upper bits being unused/zero.
41         SkASSERT(!(fData8[kTagOffset] & kTagMask));
42         fData8[kTagOffset] |= SkTo<uint8_t>(t);
43     } else {
44         // For 32-bit, we need to zero-initialize the upper 32 bits
45         SkASSERT(sizeof(Value) == sizeof(uintptr_t) * 2);
46         this->cast<uintptr_t>()[kTagOffset >> 2] = 0;
47         fData8[kTagOffset] = SkTo<uint8_t>(t);
48     }
49 
50     SkASSERT(this->getTag()    == t);
51     SkASSERT(this->ptr<void>() == p);
52 }
53 
NullValue()54 NullValue::NullValue() {
55     this->init_tagged(Tag::kNull);
56     SkASSERT(this->getTag() == Tag::kNull);
57 }
58 
BoolValue(bool b)59 BoolValue::BoolValue(bool b) {
60     this->init_tagged(Tag::kBool);
61     *this->cast<bool>() = b;
62     SkASSERT(this->getTag() == Tag::kBool);
63 }
64 
NumberValue(int32_t i)65 NumberValue::NumberValue(int32_t i) {
66     this->init_tagged(Tag::kInt);
67     *this->cast<int32_t>() = i;
68     SkASSERT(this->getTag() == Tag::kInt);
69 }
70 
NumberValue(float f)71 NumberValue::NumberValue(float f) {
72     this->init_tagged(Tag::kFloat);
73     *this->cast<float>() = f;
74     SkASSERT(this->getTag() == Tag::kFloat);
75 }
76 
77 // Vector recs point to externally allocated slabs with the following layout:
78 //
79 //   [size_t n] [REC_0] ... [REC_n-1] [optional extra trailing storage]
80 //
81 // Long strings use extra_alloc_size == 1 to store the \0 terminator.
82 //
83 template <typename T, size_t extra_alloc_size = 0>
MakeVector(const void * src,size_t size,SkArenaAlloc & alloc)84 static void* MakeVector(const void* src, size_t size, SkArenaAlloc& alloc) {
85     // The Ts are already in memory, so their size should be safe.
86     const auto total_size = sizeof(size_t) + size * sizeof(T) + extra_alloc_size;
87     auto* size_ptr = reinterpret_cast<size_t*>(alloc.makeBytesAlignedTo(total_size, kRecAlign));
88 
89     *size_ptr = size;
90     sk_careful_memcpy(size_ptr + 1, src, size * sizeof(T));
91 
92     return size_ptr;
93 }
94 
ArrayValue(const Value * src,size_t size,SkArenaAlloc & alloc)95 ArrayValue::ArrayValue(const Value* src, size_t size, SkArenaAlloc& alloc) {
96     this->init_tagged_pointer(Tag::kArray, MakeVector<Value>(src, size, alloc));
97     SkASSERT(this->getTag() == Tag::kArray);
98 }
99 
100 // Strings have two flavors:
101 //
102 // -- short strings (len <= 7) -> these are stored inline, in the record
103 //    (one byte reserved for null terminator/type):
104 //
105 //        [str] [\0]|[max_len - actual_len]
106 //
107 //    Storing [max_len - actual_len] allows the 'len' field to double-up as a
108 //    null terminator when size == max_len (this works 'cause kShortString == 0).
109 //
110 // -- long strings (len > 7) -> these are externally allocated vectors (VectorRec<char>).
111 //
112 // The string data plus a null-char terminator are copied over.
113 //
114 namespace {
115 
116 // An internal string builder with a fast 8 byte short string load path
117 // (for the common case where the string is not at the end of the stream).
118 class FastString final : public Value {
119 public:
FastString(const char * src,size_t size,const char * eos,SkArenaAlloc & alloc)120     FastString(const char* src, size_t size, const char* eos, SkArenaAlloc& alloc) {
121         SkASSERT(src <= eos);
122 
123         if (size > kMaxInlineStringSize) {
124             this->initLongString(src, size, alloc);
125             SkASSERT(this->getTag() == Tag::kString);
126             return;
127         }
128 
129         static_assert(static_cast<uint8_t>(Tag::kShortString) == 0, "please don't break this");
130         static_assert(sizeof(Value) == 8, "");
131 
132         // TODO: LIKELY
133         if (src + 7 <= eos) {
134             this->initFastShortString(src, size);
135         } else {
136             this->initShortString(src, size);
137         }
138 
139         SkASSERT(this->getTag() == Tag::kShortString);
140     }
141 
142 private:
143     static constexpr size_t kMaxInlineStringSize = sizeof(Value) - 1;
144 
initLongString(const char * src,size_t size,SkArenaAlloc & alloc)145     void initLongString(const char* src, size_t size, SkArenaAlloc& alloc) {
146         SkASSERT(size > kMaxInlineStringSize);
147 
148         this->init_tagged_pointer(Tag::kString, MakeVector<char, 1>(src, size, alloc));
149 
150         auto* data = this->cast<VectorValue<char, Value::Type::kString>>()->begin();
151         const_cast<char*>(data)[size] = '\0';
152     }
153 
initShortString(const char * src,size_t size)154     void initShortString(const char* src, size_t size) {
155         SkASSERT(size <= kMaxInlineStringSize);
156 
157         this->init_tagged(Tag::kShortString);
158         sk_careful_memcpy(this->cast<char>(), src, size);
159         // Null terminator provided by init_tagged() above (fData8 is zero-initialized).
160     }
161 
initFastShortString(const char * src,size_t size)162     void initFastShortString(const char* src, size_t size) {
163         SkASSERT(size <= kMaxInlineStringSize);
164 
165         // Load 8 chars and mask out the tag and \0 terminator.
166         uint64_t* s64 = this->cast<uint64_t>();
167         memcpy(s64, src, 8);
168 
169 #if defined(SK_CPU_LENDIAN)
170         *s64 &= 0x00ffffffffffffffULL >> ((kMaxInlineStringSize - size) * 8);
171 #else
172         static_assert(false, "Big-endian builds are not supported at this time.");
173 #endif
174     }
175 };
176 
177 } // namespace
178 
StringValue(const char * src,size_t size,SkArenaAlloc & alloc)179 StringValue::StringValue(const char* src, size_t size, SkArenaAlloc& alloc) {
180     new (this) FastString(src, size, src, alloc);
181 }
182 
ObjectValue(const Member * src,size_t size,SkArenaAlloc & alloc)183 ObjectValue::ObjectValue(const Member* src, size_t size, SkArenaAlloc& alloc) {
184     this->init_tagged_pointer(Tag::kObject, MakeVector<Member>(src, size, alloc));
185     SkASSERT(this->getTag() == Tag::kObject);
186 }
187 
188 
189 // Boring public Value glue.
190 
inline_strcmp(const char a[],const char b[])191 static int inline_strcmp(const char a[], const char b[]) {
192     for (;;) {
193         char c = *a++;
194         if (c == 0) {
195             break;
196         }
197         if (c != *b++) {
198             return 1;
199         }
200     }
201     return *b != 0;
202 }
203 
operator [](const char * key) const204 const Value& ObjectValue::operator[](const char* key) const {
205     // Reverse search for duplicates resolution (policy: return last).
206     const auto* begin  = this->begin();
207     const auto* member = this->end();
208 
209     while (member > begin) {
210         --member;
211         if (0 == inline_strcmp(key, member->fKey.as<StringValue>().begin())) {
212             return member->fValue;
213         }
214     }
215 
216     static const Value g_null = NullValue();
217     return g_null;
218 }
219 
220 namespace {
221 
222 // Lexer/parser inspired by rapidjson [1], sajson [2] and pjson [3].
223 //
224 // [1] https://github.com/Tencent/rapidjson/
225 // [2] https://github.com/chadaustin/sajson
226 // [3] https://pastebin.com/hnhSTL3h
227 
228 
229 // bit 0 (0x01) - plain ASCII string character
230 // bit 1 (0x02) - whitespace
231 // bit 2 (0x04) - string terminator (" \\ \0 [control chars] **AND } ]** <- see matchString notes)
232 // bit 3 (0x08) - 0-9
233 // bit 4 (0x10) - 0-9 e E .
234 // bit 5 (0x20) - scope terminator (} ])
235 static constexpr uint8_t g_token_flags[256] = {
236  // 0    1    2    3    4    5    6    7      8    9    A    B    C    D    E    F
237     4,   4,   4,   4,   4,   4,   4,   4,     4,   6,   6,   4,   4,   6,   4,   4, // 0
238     4,   4,   4,   4,   4,   4,   4,   4,     4,   4,   4,   4,   4,   4,   4,   4, // 1
239     3,   1,   4,   1,   1,   1,   1,   1,     1,   1,   1,   1,   1,   1,   0x11,1, // 2
240  0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,  0x19,0x19,   1,   1,   1,   1,   1,   1, // 3
241     1,   1,   1,   1,   1,   0x11,1,   1,     1,   1,   1,   1,   1,   1,   1,   1, // 4
242     1,   1,   1,   1,   1,   1,   1,   1,     1,   1,   1,   1,   4,0x25,   1,   1, // 5
243     1,   1,   1,   1,   1,   0x11,1,   1,     1,   1,   1,   1,   1,   1,   1,   1, // 6
244     1,   1,   1,   1,   1,   1,   1,   1,     1,   1,   1,   1,   1,0x25,   1,   1, // 7
245 
246  // 128-255
247     0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,
248     0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,
249     0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,
250     0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0
251 };
252 
is_ws(char c)253 static inline bool is_ws(char c)       { return g_token_flags[static_cast<uint8_t>(c)] & 0x02; }
is_eostring(char c)254 static inline bool is_eostring(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x04; }
is_digit(char c)255 static inline bool is_digit(char c)    { return g_token_flags[static_cast<uint8_t>(c)] & 0x08; }
is_numeric(char c)256 static inline bool is_numeric(char c)  { return g_token_flags[static_cast<uint8_t>(c)] & 0x10; }
is_eoscope(char c)257 static inline bool is_eoscope(char c)  { return g_token_flags[static_cast<uint8_t>(c)] & 0x20; }
258 
skip_ws(const char * p)259 static inline const char* skip_ws(const char* p) {
260     while (is_ws(*p)) ++p;
261     return p;
262 }
263 
pow10(int32_t exp)264 static inline float pow10(int32_t exp) {
265     static constexpr float g_pow10_table[63] =
266     {
267        1.e-031f, 1.e-030f, 1.e-029f, 1.e-028f, 1.e-027f, 1.e-026f, 1.e-025f, 1.e-024f,
268        1.e-023f, 1.e-022f, 1.e-021f, 1.e-020f, 1.e-019f, 1.e-018f, 1.e-017f, 1.e-016f,
269        1.e-015f, 1.e-014f, 1.e-013f, 1.e-012f, 1.e-011f, 1.e-010f, 1.e-009f, 1.e-008f,
270        1.e-007f, 1.e-006f, 1.e-005f, 1.e-004f, 1.e-003f, 1.e-002f, 1.e-001f, 1.e+000f,
271        1.e+001f, 1.e+002f, 1.e+003f, 1.e+004f, 1.e+005f, 1.e+006f, 1.e+007f, 1.e+008f,
272        1.e+009f, 1.e+010f, 1.e+011f, 1.e+012f, 1.e+013f, 1.e+014f, 1.e+015f, 1.e+016f,
273        1.e+017f, 1.e+018f, 1.e+019f, 1.e+020f, 1.e+021f, 1.e+022f, 1.e+023f, 1.e+024f,
274        1.e+025f, 1.e+026f, 1.e+027f, 1.e+028f, 1.e+029f, 1.e+030f, 1.e+031f
275     };
276 
277     static constexpr int32_t k_exp_offset = SK_ARRAY_COUNT(g_pow10_table) / 2;
278 
279     // We only support negative exponents for now.
280     SkASSERT(exp <= 0);
281 
282     return (exp >= -k_exp_offset) ? g_pow10_table[exp + k_exp_offset]
283                                   : std::pow(10.0f, static_cast<float>(exp));
284 }
285 
286 class DOMParser {
287 public:
DOMParser(SkArenaAlloc & alloc)288     explicit DOMParser(SkArenaAlloc& alloc)
289         : fAlloc(alloc) {
290         fValueStack.reserve(kValueStackReserve);
291         fUnescapeBuffer.reserve(kUnescapeBufferReserve);
292     }
293 
parse(const char * p,size_t size)294     const Value parse(const char* p, size_t size) {
295         if (!size) {
296             return this->error(NullValue(), p, "invalid empty input");
297         }
298 
299         const char* p_stop = p + size - 1;
300 
301         // We're only checking for end-of-stream on object/array close('}',']'),
302         // so we must trim any whitespace from the buffer tail.
303         while (p_stop > p && is_ws(*p_stop)) --p_stop;
304 
305         SkASSERT(p_stop >= p && p_stop < p + size);
306         if (!is_eoscope(*p_stop)) {
307             return this->error(NullValue(), p_stop, "invalid top-level value");
308         }
309 
310         p = skip_ws(p);
311 
312         switch (*p) {
313         case '{':
314             goto match_object;
315         case '[':
316             goto match_array;
317         default:
318             return this->error(NullValue(), p, "invalid top-level value");
319         }
320 
321     match_object:
322         SkASSERT(*p == '{');
323         p = skip_ws(p + 1);
324 
325         this->pushObjectScope();
326 
327         if (*p == '}') goto pop_object;
328 
329         // goto match_object_key;
330     match_object_key:
331         p = skip_ws(p);
332         if (*p != '"') return this->error(NullValue(), p, "expected object key");
333 
334         p = this->matchString(p, p_stop, [this](const char* key, size_t size, const char* eos) {
335             this->pushObjectKey(key, size, eos);
336         });
337         if (!p) return NullValue();
338 
339         p = skip_ws(p);
340         if (*p != ':') return this->error(NullValue(), p, "expected ':' separator");
341 
342         ++p;
343 
344         // goto match_value;
345     match_value:
346         p = skip_ws(p);
347 
348         switch (*p) {
349         case '\0':
350             return this->error(NullValue(), p, "unexpected input end");
351         case '"':
352             p = this->matchString(p, p_stop, [this](const char* str, size_t size, const char* eos) {
353                 this->pushString(str, size, eos);
354             });
355             break;
356         case '[':
357             goto match_array;
358         case 'f':
359             p = this->matchFalse(p);
360             break;
361         case 'n':
362             p = this->matchNull(p);
363             break;
364         case 't':
365             p = this->matchTrue(p);
366             break;
367         case '{':
368             goto match_object;
369         default:
370             p = this->matchNumber(p);
371             break;
372         }
373 
374         if (!p) return NullValue();
375 
376         // goto match_post_value;
377     match_post_value:
378         SkASSERT(!this->inTopLevelScope());
379 
380         p = skip_ws(p);
381         switch (*p) {
382         case ',':
383             ++p;
384             if (this->inObjectScope()) {
385                 goto match_object_key;
386             } else {
387                 SkASSERT(this->inArrayScope());
388                 goto match_value;
389             }
390         case ']':
391             goto pop_array;
392         case '}':
393             goto pop_object;
394         default:
395             return this->error(NullValue(), p - 1, "unexpected value-trailing token");
396         }
397 
398         // unreachable
399         SkASSERT(false);
400 
401     pop_object:
402         SkASSERT(*p == '}');
403 
404         if (this->inArrayScope()) {
405             return this->error(NullValue(), p, "unexpected object terminator");
406         }
407 
408         this->popObjectScope();
409 
410         // goto pop_common
411     pop_common:
412         SkASSERT(is_eoscope(*p));
413 
414         if (this->inTopLevelScope()) {
415             SkASSERT(fValueStack.size() == 1);
416 
417             // Success condition: parsed the top level element and reached the stop token.
418             return p == p_stop
419                 ? fValueStack.front()
420                 : this->error(NullValue(), p + 1, "trailing root garbage");
421         }
422 
423         if (p == p_stop) {
424             return this->error(NullValue(), p, "unexpected end-of-input");
425         }
426 
427         ++p;
428 
429         goto match_post_value;
430 
431     match_array:
432         SkASSERT(*p == '[');
433         p = skip_ws(p + 1);
434 
435         this->pushArrayScope();
436 
437         if (*p != ']') goto match_value;
438 
439         // goto pop_array;
440     pop_array:
441         SkASSERT(*p == ']');
442 
443         if (this->inObjectScope()) {
444             return this->error(NullValue(), p, "unexpected array terminator");
445         }
446 
447         this->popArrayScope();
448 
449         goto pop_common;
450 
451         SkASSERT(false);
452         return NullValue();
453     }
454 
getError() const455     std::tuple<const char*, const SkString> getError() const {
456         return std::make_tuple(fErrorToken, fErrorMessage);
457     }
458 
459 private:
460     SkArenaAlloc&         fAlloc;
461 
462     // Pending values stack.
463     static constexpr size_t kValueStackReserve = 256;
464     std::vector<Value>    fValueStack;
465 
466     // String unescape buffer.
467     static constexpr size_t kUnescapeBufferReserve = 512;
468     std::vector<char>     fUnescapeBuffer;
469 
470     // Tracks the current object/array scope, as an index into fStack:
471     //
472     //   - for objects: fScopeIndex =  (index of first value in scope)
473     //   - for arrays : fScopeIndex = -(index of first value in scope)
474     //
475     // fScopeIndex == 0 IFF we are at the top level (no current/active scope).
476     intptr_t              fScopeIndex = 0;
477 
478     // Error reporting.
479     const char*           fErrorToken = nullptr;
480     SkString              fErrorMessage;
481 
inTopLevelScope() const482     bool inTopLevelScope() const { return fScopeIndex == 0; }
inObjectScope() const483     bool inObjectScope()   const { return fScopeIndex >  0; }
inArrayScope() const484     bool inArrayScope()    const { return fScopeIndex <  0; }
485 
486     // Helper for masquerading raw primitive types as Values (bypassing tagging, etc).
487     template <typename T>
488     class RawValue final : public Value {
489     public:
RawValue(T v)490         explicit RawValue(T v) {
491             static_assert(sizeof(T) <= sizeof(Value), "");
492             *this->cast<T>() = v;
493         }
494 
operator *() const495         T operator *() const { return *this->cast<T>(); }
496     };
497 
498     template <typename VectorT>
popScopeAsVec(size_t scope_start)499     void popScopeAsVec(size_t scope_start) {
500         SkASSERT(scope_start > 0);
501         SkASSERT(scope_start <= fValueStack.size());
502 
503         using T = typename VectorT::ValueT;
504         static_assert( sizeof(T) >=  sizeof(Value), "");
505         static_assert( sizeof(T)  %  sizeof(Value) == 0, "");
506         static_assert(alignof(T) == alignof(Value), "");
507 
508         const auto scope_count = fValueStack.size() - scope_start,
509                          count = scope_count / (sizeof(T) / sizeof(Value));
510         SkASSERT(scope_count % (sizeof(T) / sizeof(Value)) == 0);
511 
512         const auto* begin = reinterpret_cast<const T*>(fValueStack.data() + scope_start);
513 
514         // Restore the previous scope index from saved placeholder value,
515         // and instantiate as a vector of values in scope.
516         auto& placeholder = fValueStack[scope_start - 1];
517         fScopeIndex = *static_cast<RawValue<intptr_t>&>(placeholder);
518         placeholder = VectorT(begin, count, fAlloc);
519 
520         // Drop the (consumed) values in scope.
521         fValueStack.resize(scope_start);
522     }
523 
pushObjectScope()524     void pushObjectScope() {
525         // Save a scope index now, and then later we'll overwrite this value as the Object itself.
526         fValueStack.push_back(RawValue<intptr_t>(fScopeIndex));
527 
528         // New object scope.
529         fScopeIndex = SkTo<intptr_t>(fValueStack.size());
530     }
531 
popObjectScope()532     void popObjectScope() {
533         SkASSERT(this->inObjectScope());
534         this->popScopeAsVec<ObjectValue>(SkTo<size_t>(fScopeIndex));
535 
536         SkDEBUGCODE(
537             const auto& obj = fValueStack.back().as<ObjectValue>();
538             SkASSERT(obj.is<ObjectValue>());
539             for (const auto& member : obj) {
540                 SkASSERT(member.fKey.is<StringValue>());
541             }
542         )
543     }
544 
pushArrayScope()545     void pushArrayScope() {
546         // Save a scope index now, and then later we'll overwrite this value as the Array itself.
547         fValueStack.push_back(RawValue<intptr_t>(fScopeIndex));
548 
549         // New array scope.
550         fScopeIndex = -SkTo<intptr_t>(fValueStack.size());
551     }
552 
popArrayScope()553     void popArrayScope() {
554         SkASSERT(this->inArrayScope());
555         this->popScopeAsVec<ArrayValue>(SkTo<size_t>(-fScopeIndex));
556 
557         SkDEBUGCODE(
558             const auto& arr = fValueStack.back().as<ArrayValue>();
559             SkASSERT(arr.is<ArrayValue>());
560         )
561     }
562 
pushObjectKey(const char * key,size_t size,const char * eos)563     void pushObjectKey(const char* key, size_t size, const char* eos) {
564         SkASSERT(this->inObjectScope());
565         SkASSERT(fValueStack.size() >= SkTo<size_t>(fScopeIndex));
566         SkASSERT(!((fValueStack.size() - SkTo<size_t>(fScopeIndex)) & 1));
567         this->pushString(key, size, eos);
568     }
569 
pushTrue()570     void pushTrue() {
571         fValueStack.push_back(BoolValue(true));
572     }
573 
pushFalse()574     void pushFalse() {
575         fValueStack.push_back(BoolValue(false));
576     }
577 
pushNull()578     void pushNull() {
579         fValueStack.push_back(NullValue());
580     }
581 
pushString(const char * s,size_t size,const char * eos)582     void pushString(const char* s, size_t size, const char* eos) {
583         fValueStack.push_back(FastString(s, size, eos, fAlloc));
584     }
585 
pushInt32(int32_t i)586     void pushInt32(int32_t i) {
587         fValueStack.push_back(NumberValue(i));
588     }
589 
pushFloat(float f)590     void pushFloat(float f) {
591         fValueStack.push_back(NumberValue(f));
592     }
593 
594     template <typename T>
error(T && ret_val,const char * p,const char * msg)595     T error(T&& ret_val, const char* p, const char* msg) {
596 #if defined(SK_JSON_REPORT_ERRORS)
597         fErrorToken = p;
598         fErrorMessage.set(msg);
599 #endif
600         return ret_val;
601     }
602 
matchTrue(const char * p)603     const char* matchTrue(const char* p) {
604         SkASSERT(p[0] == 't');
605 
606         if (p[1] == 'r' && p[2] == 'u' && p[3] == 'e') {
607             this->pushTrue();
608             return p + 4;
609         }
610 
611         return this->error(nullptr, p, "invalid token");
612     }
613 
matchFalse(const char * p)614     const char* matchFalse(const char* p) {
615         SkASSERT(p[0] == 'f');
616 
617         if (p[1] == 'a' && p[2] == 'l' && p[3] == 's' && p[4] == 'e') {
618             this->pushFalse();
619             return p + 5;
620         }
621 
622         return this->error(nullptr, p, "invalid token");
623     }
624 
matchNull(const char * p)625     const char* matchNull(const char* p) {
626         SkASSERT(p[0] == 'n');
627 
628         if (p[1] == 'u' && p[2] == 'l' && p[3] == 'l') {
629             this->pushNull();
630             return p + 4;
631         }
632 
633         return this->error(nullptr, p, "invalid token");
634     }
635 
unescapeString(const char * begin,const char * end)636     const std::vector<char>* unescapeString(const char* begin, const char* end) {
637         fUnescapeBuffer.clear();
638 
639         for (const auto* p = begin; p != end; ++p) {
640             if (*p != '\\') {
641                 fUnescapeBuffer.push_back(*p);
642                 continue;
643             }
644 
645             if (++p == end) {
646                 return nullptr;
647             }
648 
649             switch (*p) {
650             case  '"': fUnescapeBuffer.push_back( '"'); break;
651             case '\\': fUnescapeBuffer.push_back('\\'); break;
652             case  '/': fUnescapeBuffer.push_back( '/'); break;
653             case  'b': fUnescapeBuffer.push_back('\b'); break;
654             case  'f': fUnescapeBuffer.push_back('\f'); break;
655             case  'n': fUnescapeBuffer.push_back('\n'); break;
656             case  'r': fUnescapeBuffer.push_back('\r'); break;
657             case  't': fUnescapeBuffer.push_back('\t'); break;
658             case  'u': {
659                 if (p + 4 >= end) {
660                     return nullptr;
661                 }
662 
663                 uint32_t hexed;
664                 const char hex_str[] = {p[1], p[2], p[3], p[4], '\0'};
665                 const auto* eos = SkParse::FindHex(hex_str, &hexed);
666                 if (!eos || *eos) {
667                     return nullptr;
668                 }
669 
670                 char utf8[SkUTF::kMaxBytesInUTF8Sequence];
671                 const auto utf8_len = SkUTF::ToUTF8(SkTo<SkUnichar>(hexed), utf8);
672                 fUnescapeBuffer.insert(fUnescapeBuffer.end(), utf8, utf8 + utf8_len);
673                 p += 4;
674             } break;
675             default: return nullptr;
676             }
677         }
678 
679         return &fUnescapeBuffer;
680     }
681 
682     template <typename MatchFunc>
matchString(const char * p,const char * p_stop,MatchFunc && func)683     const char* matchString(const char* p, const char* p_stop, MatchFunc&& func) {
684         SkASSERT(*p == '"');
685         const auto* s_begin = p + 1;
686         bool requires_unescape = false;
687 
688         do {
689             // Consume string chars.
690             // This is the fast path, and hopefully we only hit it once then quick-exit below.
691             for (p = p + 1; !is_eostring(*p); ++p);
692 
693             if (*p == '"') {
694                 // Valid string found.
695                 if (!requires_unescape) {
696                     func(s_begin, p - s_begin, p_stop);
697                 } else {
698                     // Slow unescape.  We could avoid this extra copy with some effort,
699                     // but in practice escaped strings should be rare.
700                     const auto* buf = this->unescapeString(s_begin, p);
701                     if (!buf) {
702                         break;
703                     }
704 
705                     SkASSERT(!buf->empty());
706                     func(buf->data(), buf->size(), buf->data() + buf->size() - 1);
707                 }
708                 return p + 1;
709             }
710 
711             if (*p == '\\') {
712                 requires_unescape = true;
713                 ++p;
714                 continue;
715             }
716 
717             // End-of-scope chars are special: we use them to tag the end of the input.
718             // Thus they cannot be consumed indiscriminately -- we need to check if we hit the
719             // end of the input.  To that effect, we treat them as string terminators above,
720             // then we catch them here.
721             if (is_eoscope(*p)) {
722                 continue;
723             }
724 
725             // Invalid/unexpected char.
726             break;
727         } while (p != p_stop);
728 
729         // Premature end-of-input, or illegal string char.
730         return this->error(nullptr, s_begin - 1, "invalid string");
731     }
732 
matchFastFloatDecimalPart(const char * p,int sign,float f,int exp)733     const char* matchFastFloatDecimalPart(const char* p, int sign, float f, int exp) {
734         SkASSERT(exp <= 0);
735 
736         for (;;) {
737             if (!is_digit(*p)) break;
738             f = f * 10.f + (*p++ - '0'); --exp;
739             if (!is_digit(*p)) break;
740             f = f * 10.f + (*p++ - '0'); --exp;
741         }
742 
743         const auto decimal_scale = pow10(exp);
744         if (is_numeric(*p) || !decimal_scale) {
745             SkASSERT((*p == '.' || *p == 'e' || *p == 'E') || !decimal_scale);
746             // Malformed input, or an (unsupported) exponent, or a collapsed decimal factor.
747             return nullptr;
748         }
749 
750         this->pushFloat(sign * f * decimal_scale);
751 
752         return p;
753     }
754 
matchFastFloatPart(const char * p,int sign,float f)755     const char* matchFastFloatPart(const char* p, int sign, float f) {
756         for (;;) {
757             if (!is_digit(*p)) break;
758             f = f * 10.f + (*p++ - '0');
759             if (!is_digit(*p)) break;
760             f = f * 10.f + (*p++ - '0');
761         }
762 
763         if (!is_numeric(*p)) {
764             // Matched (integral) float.
765             this->pushFloat(sign * f);
766             return p;
767         }
768 
769         return (*p == '.') ? this->matchFastFloatDecimalPart(p + 1, sign, f, 0)
770                            : nullptr;
771     }
772 
matchFast32OrFloat(const char * p)773     const char* matchFast32OrFloat(const char* p) {
774         int sign = 1;
775         if (*p == '-') {
776             sign = -1;
777             ++p;
778         }
779 
780         const auto* digits_start = p;
781 
782         int32_t n32 = 0;
783 
784         // This is the largest absolute int32 value we can handle before
785         // risking overflow *on the next digit* (214748363).
786         static constexpr int32_t kMaxInt32 = (std::numeric_limits<int32_t>::max() - 9) / 10;
787 
788         if (is_digit(*p)) {
789             n32 = (*p++ - '0');
790             for (;;) {
791                 if (!is_digit(*p) || n32 > kMaxInt32) break;
792                 n32 = n32 * 10 + (*p++ - '0');
793             }
794         }
795 
796         if (!is_numeric(*p)) {
797             // Did we actually match any digits?
798             if (p > digits_start) {
799                 this->pushInt32(sign * n32);
800                 return p;
801             }
802             return nullptr;
803         }
804 
805         if (*p == '.') {
806             const auto* decimals_start = ++p;
807 
808             int exp = 0;
809 
810             for (;;) {
811                 if (!is_digit(*p) || n32 > kMaxInt32) break;
812                 n32 = n32 * 10 + (*p++ - '0'); --exp;
813                 if (!is_digit(*p) || n32 > kMaxInt32) break;
814                 n32 = n32 * 10 + (*p++ - '0'); --exp;
815             }
816 
817             if (!is_numeric(*p)) {
818                 // Did we actually match any digits?
819                 if (p > decimals_start) {
820                     this->pushFloat(sign * n32 * pow10(exp));
821                     return p;
822                 }
823                 return nullptr;
824             }
825 
826             if (n32 > kMaxInt32) {
827                 // we ran out on n32 bits
828                 return this->matchFastFloatDecimalPart(p, sign, n32, exp);
829             }
830         }
831 
832         return this->matchFastFloatPart(p, sign, n32);
833     }
834 
matchNumber(const char * p)835     const char* matchNumber(const char* p) {
836         if (const auto* fast = this->matchFast32OrFloat(p)) return fast;
837 
838         // slow fallback
839         char* matched;
840         float f = strtof(p, &matched);
841         if (matched > p) {
842             this->pushFloat(f);
843             return matched;
844         }
845         return this->error(nullptr, p, "invalid numeric token");
846     }
847 };
848 
Write(const Value & v,SkWStream * stream)849 void Write(const Value& v, SkWStream* stream) {
850     switch (v.getType()) {
851     case Value::Type::kNull:
852         stream->writeText("null");
853         break;
854     case Value::Type::kBool:
855         stream->writeText(*v.as<BoolValue>() ? "true" : "false");
856         break;
857     case Value::Type::kNumber:
858         stream->writeScalarAsText(*v.as<NumberValue>());
859         break;
860     case Value::Type::kString:
861         stream->writeText("\"");
862         stream->writeText(v.as<StringValue>().begin());
863         stream->writeText("\"");
864         break;
865     case Value::Type::kArray: {
866         const auto& array = v.as<ArrayValue>();
867         stream->writeText("[");
868         bool first_value = true;
869         for (const auto& v : array) {
870             if (!first_value) stream->writeText(",");
871             Write(v, stream);
872             first_value = false;
873         }
874         stream->writeText("]");
875         break;
876     }
877     case Value::Type::kObject:
878         const auto& object = v.as<ObjectValue>();
879         stream->writeText("{");
880         bool first_member = true;
881         for (const auto& member : object) {
882             SkASSERT(member.fKey.getType() == Value::Type::kString);
883             if (!first_member) stream->writeText(",");
884             Write(member.fKey, stream);
885             stream->writeText(":");
886             Write(member.fValue, stream);
887             first_member = false;
888         }
889         stream->writeText("}");
890         break;
891     }
892 }
893 
894 } // namespace
895 
toString() const896 SkString Value::toString() const {
897     SkDynamicMemoryWStream wstream;
898     Write(*this, &wstream);
899     const auto data = wstream.detachAsData();
900     // TODO: is there a better way to pass data around without copying?
901     return SkString(static_cast<const char*>(data->data()), data->size());
902 }
903 
904 static constexpr size_t kMinChunkSize = 4096;
905 
DOM(const char * data,size_t size)906 DOM::DOM(const char* data, size_t size)
907     : fAlloc(kMinChunkSize) {
908     DOMParser parser(fAlloc);
909 
910     fRoot = parser.parse(data, size);
911 }
912 
write(SkWStream * stream) const913 void DOM::write(SkWStream* stream) const {
914     Write(fRoot, stream);
915 }
916 
917 } // namespace skjson
918