1 /*
2 * Copyright 2018 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "SkJSON.h"
9
10 #include "SkMalloc.h"
11 #include "SkParse.h"
12 #include "SkStream.h"
13 #include "SkString.h"
14 #include "SkUTF.h"
15
16 #include <cmath>
17 #include <tuple>
18 #include <vector>
19
20 namespace skjson {
21
22 // #define SK_JSON_REPORT_ERRORS
23
24 static_assert( sizeof(Value) == 8, "");
25 static_assert(alignof(Value) == 8, "");
26
27 static constexpr size_t kRecAlign = alignof(Value);
28
init_tagged(Tag t)29 void Value::init_tagged(Tag t) {
30 memset(fData8, 0, sizeof(fData8));
31 fData8[Value::kTagOffset] = SkTo<uint8_t>(t);
32 SkASSERT(this->getTag() == t);
33 }
34
35 // Pointer values store a type (in the upper kTagBits bits) and a pointer.
init_tagged_pointer(Tag t,void * p)36 void Value::init_tagged_pointer(Tag t, void* p) {
37 *this->cast<uintptr_t>() = reinterpret_cast<uintptr_t>(p);
38
39 if (sizeof(Value) == sizeof(uintptr_t)) {
40 // For 64-bit, we rely on the pointer upper bits being unused/zero.
41 SkASSERT(!(fData8[kTagOffset] & kTagMask));
42 fData8[kTagOffset] |= SkTo<uint8_t>(t);
43 } else {
44 // For 32-bit, we need to zero-initialize the upper 32 bits
45 SkASSERT(sizeof(Value) == sizeof(uintptr_t) * 2);
46 this->cast<uintptr_t>()[kTagOffset >> 2] = 0;
47 fData8[kTagOffset] = SkTo<uint8_t>(t);
48 }
49
50 SkASSERT(this->getTag() == t);
51 SkASSERT(this->ptr<void>() == p);
52 }
53
NullValue()54 NullValue::NullValue() {
55 this->init_tagged(Tag::kNull);
56 SkASSERT(this->getTag() == Tag::kNull);
57 }
58
BoolValue(bool b)59 BoolValue::BoolValue(bool b) {
60 this->init_tagged(Tag::kBool);
61 *this->cast<bool>() = b;
62 SkASSERT(this->getTag() == Tag::kBool);
63 }
64
NumberValue(int32_t i)65 NumberValue::NumberValue(int32_t i) {
66 this->init_tagged(Tag::kInt);
67 *this->cast<int32_t>() = i;
68 SkASSERT(this->getTag() == Tag::kInt);
69 }
70
NumberValue(float f)71 NumberValue::NumberValue(float f) {
72 this->init_tagged(Tag::kFloat);
73 *this->cast<float>() = f;
74 SkASSERT(this->getTag() == Tag::kFloat);
75 }
76
77 // Vector recs point to externally allocated slabs with the following layout:
78 //
79 // [size_t n] [REC_0] ... [REC_n-1] [optional extra trailing storage]
80 //
81 // Long strings use extra_alloc_size == 1 to store the \0 terminator.
82 //
83 template <typename T, size_t extra_alloc_size = 0>
MakeVector(const void * src,size_t size,SkArenaAlloc & alloc)84 static void* MakeVector(const void* src, size_t size, SkArenaAlloc& alloc) {
85 // The Ts are already in memory, so their size should be safe.
86 const auto total_size = sizeof(size_t) + size * sizeof(T) + extra_alloc_size;
87 auto* size_ptr = reinterpret_cast<size_t*>(alloc.makeBytesAlignedTo(total_size, kRecAlign));
88
89 *size_ptr = size;
90 sk_careful_memcpy(size_ptr + 1, src, size * sizeof(T));
91
92 return size_ptr;
93 }
94
ArrayValue(const Value * src,size_t size,SkArenaAlloc & alloc)95 ArrayValue::ArrayValue(const Value* src, size_t size, SkArenaAlloc& alloc) {
96 this->init_tagged_pointer(Tag::kArray, MakeVector<Value>(src, size, alloc));
97 SkASSERT(this->getTag() == Tag::kArray);
98 }
99
100 // Strings have two flavors:
101 //
102 // -- short strings (len <= 7) -> these are stored inline, in the record
103 // (one byte reserved for null terminator/type):
104 //
105 // [str] [\0]|[max_len - actual_len]
106 //
107 // Storing [max_len - actual_len] allows the 'len' field to double-up as a
108 // null terminator when size == max_len (this works 'cause kShortString == 0).
109 //
110 // -- long strings (len > 7) -> these are externally allocated vectors (VectorRec<char>).
111 //
112 // The string data plus a null-char terminator are copied over.
113 //
114 namespace {
115
116 // An internal string builder with a fast 8 byte short string load path
117 // (for the common case where the string is not at the end of the stream).
118 class FastString final : public Value {
119 public:
FastString(const char * src,size_t size,const char * eos,SkArenaAlloc & alloc)120 FastString(const char* src, size_t size, const char* eos, SkArenaAlloc& alloc) {
121 SkASSERT(src <= eos);
122
123 if (size > kMaxInlineStringSize) {
124 this->initLongString(src, size, alloc);
125 SkASSERT(this->getTag() == Tag::kString);
126 return;
127 }
128
129 static_assert(static_cast<uint8_t>(Tag::kShortString) == 0, "please don't break this");
130 static_assert(sizeof(Value) == 8, "");
131
132 // TODO: LIKELY
133 if (src + 7 <= eos) {
134 this->initFastShortString(src, size);
135 } else {
136 this->initShortString(src, size);
137 }
138
139 SkASSERT(this->getTag() == Tag::kShortString);
140 }
141
142 private:
143 static constexpr size_t kMaxInlineStringSize = sizeof(Value) - 1;
144
initLongString(const char * src,size_t size,SkArenaAlloc & alloc)145 void initLongString(const char* src, size_t size, SkArenaAlloc& alloc) {
146 SkASSERT(size > kMaxInlineStringSize);
147
148 this->init_tagged_pointer(Tag::kString, MakeVector<char, 1>(src, size, alloc));
149
150 auto* data = this->cast<VectorValue<char, Value::Type::kString>>()->begin();
151 const_cast<char*>(data)[size] = '\0';
152 }
153
initShortString(const char * src,size_t size)154 void initShortString(const char* src, size_t size) {
155 SkASSERT(size <= kMaxInlineStringSize);
156
157 this->init_tagged(Tag::kShortString);
158 sk_careful_memcpy(this->cast<char>(), src, size);
159 // Null terminator provided by init_tagged() above (fData8 is zero-initialized).
160 }
161
initFastShortString(const char * src,size_t size)162 void initFastShortString(const char* src, size_t size) {
163 SkASSERT(size <= kMaxInlineStringSize);
164
165 // Load 8 chars and mask out the tag and \0 terminator.
166 uint64_t* s64 = this->cast<uint64_t>();
167 memcpy(s64, src, 8);
168
169 #if defined(SK_CPU_LENDIAN)
170 *s64 &= 0x00ffffffffffffffULL >> ((kMaxInlineStringSize - size) * 8);
171 #else
172 static_assert(false, "Big-endian builds are not supported at this time.");
173 #endif
174 }
175 };
176
177 } // namespace
178
StringValue(const char * src,size_t size,SkArenaAlloc & alloc)179 StringValue::StringValue(const char* src, size_t size, SkArenaAlloc& alloc) {
180 new (this) FastString(src, size, src, alloc);
181 }
182
ObjectValue(const Member * src,size_t size,SkArenaAlloc & alloc)183 ObjectValue::ObjectValue(const Member* src, size_t size, SkArenaAlloc& alloc) {
184 this->init_tagged_pointer(Tag::kObject, MakeVector<Member>(src, size, alloc));
185 SkASSERT(this->getTag() == Tag::kObject);
186 }
187
188
189 // Boring public Value glue.
190
inline_strcmp(const char a[],const char b[])191 static int inline_strcmp(const char a[], const char b[]) {
192 for (;;) {
193 char c = *a++;
194 if (c == 0) {
195 break;
196 }
197 if (c != *b++) {
198 return 1;
199 }
200 }
201 return *b != 0;
202 }
203
operator [](const char * key) const204 const Value& ObjectValue::operator[](const char* key) const {
205 // Reverse search for duplicates resolution (policy: return last).
206 const auto* begin = this->begin();
207 const auto* member = this->end();
208
209 while (member > begin) {
210 --member;
211 if (0 == inline_strcmp(key, member->fKey.as<StringValue>().begin())) {
212 return member->fValue;
213 }
214 }
215
216 static const Value g_null = NullValue();
217 return g_null;
218 }
219
220 namespace {
221
222 // Lexer/parser inspired by rapidjson [1], sajson [2] and pjson [3].
223 //
224 // [1] https://github.com/Tencent/rapidjson/
225 // [2] https://github.com/chadaustin/sajson
226 // [3] https://pastebin.com/hnhSTL3h
227
228
229 // bit 0 (0x01) - plain ASCII string character
230 // bit 1 (0x02) - whitespace
231 // bit 2 (0x04) - string terminator (" \\ \0 [control chars] **AND } ]** <- see matchString notes)
232 // bit 3 (0x08) - 0-9
233 // bit 4 (0x10) - 0-9 e E .
234 // bit 5 (0x20) - scope terminator (} ])
235 static constexpr uint8_t g_token_flags[256] = {
236 // 0 1 2 3 4 5 6 7 8 9 A B C D E F
237 4, 4, 4, 4, 4, 4, 4, 4, 4, 6, 6, 4, 4, 6, 4, 4, // 0
238 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 1
239 3, 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0x11,1, // 2
240 0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19, 0x19,0x19, 1, 1, 1, 1, 1, 1, // 3
241 1, 1, 1, 1, 1, 0x11,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4
242 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4,0x25, 1, 1, // 5
243 1, 1, 1, 1, 1, 0x11,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6
244 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,0x25, 1, 1, // 7
245
246 // 128-255
247 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
248 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
249 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
250 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0
251 };
252
is_ws(char c)253 static inline bool is_ws(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x02; }
is_eostring(char c)254 static inline bool is_eostring(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x04; }
is_digit(char c)255 static inline bool is_digit(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x08; }
is_numeric(char c)256 static inline bool is_numeric(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x10; }
is_eoscope(char c)257 static inline bool is_eoscope(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x20; }
258
skip_ws(const char * p)259 static inline const char* skip_ws(const char* p) {
260 while (is_ws(*p)) ++p;
261 return p;
262 }
263
pow10(int32_t exp)264 static inline float pow10(int32_t exp) {
265 static constexpr float g_pow10_table[63] =
266 {
267 1.e-031f, 1.e-030f, 1.e-029f, 1.e-028f, 1.e-027f, 1.e-026f, 1.e-025f, 1.e-024f,
268 1.e-023f, 1.e-022f, 1.e-021f, 1.e-020f, 1.e-019f, 1.e-018f, 1.e-017f, 1.e-016f,
269 1.e-015f, 1.e-014f, 1.e-013f, 1.e-012f, 1.e-011f, 1.e-010f, 1.e-009f, 1.e-008f,
270 1.e-007f, 1.e-006f, 1.e-005f, 1.e-004f, 1.e-003f, 1.e-002f, 1.e-001f, 1.e+000f,
271 1.e+001f, 1.e+002f, 1.e+003f, 1.e+004f, 1.e+005f, 1.e+006f, 1.e+007f, 1.e+008f,
272 1.e+009f, 1.e+010f, 1.e+011f, 1.e+012f, 1.e+013f, 1.e+014f, 1.e+015f, 1.e+016f,
273 1.e+017f, 1.e+018f, 1.e+019f, 1.e+020f, 1.e+021f, 1.e+022f, 1.e+023f, 1.e+024f,
274 1.e+025f, 1.e+026f, 1.e+027f, 1.e+028f, 1.e+029f, 1.e+030f, 1.e+031f
275 };
276
277 static constexpr int32_t k_exp_offset = SK_ARRAY_COUNT(g_pow10_table) / 2;
278
279 // We only support negative exponents for now.
280 SkASSERT(exp <= 0);
281
282 return (exp >= -k_exp_offset) ? g_pow10_table[exp + k_exp_offset]
283 : std::pow(10.0f, static_cast<float>(exp));
284 }
285
286 class DOMParser {
287 public:
DOMParser(SkArenaAlloc & alloc)288 explicit DOMParser(SkArenaAlloc& alloc)
289 : fAlloc(alloc) {
290 fValueStack.reserve(kValueStackReserve);
291 fUnescapeBuffer.reserve(kUnescapeBufferReserve);
292 }
293
parse(const char * p,size_t size)294 const Value parse(const char* p, size_t size) {
295 if (!size) {
296 return this->error(NullValue(), p, "invalid empty input");
297 }
298
299 const char* p_stop = p + size - 1;
300
301 // We're only checking for end-of-stream on object/array close('}',']'),
302 // so we must trim any whitespace from the buffer tail.
303 while (p_stop > p && is_ws(*p_stop)) --p_stop;
304
305 SkASSERT(p_stop >= p && p_stop < p + size);
306 if (!is_eoscope(*p_stop)) {
307 return this->error(NullValue(), p_stop, "invalid top-level value");
308 }
309
310 p = skip_ws(p);
311
312 switch (*p) {
313 case '{':
314 goto match_object;
315 case '[':
316 goto match_array;
317 default:
318 return this->error(NullValue(), p, "invalid top-level value");
319 }
320
321 match_object:
322 SkASSERT(*p == '{');
323 p = skip_ws(p + 1);
324
325 this->pushObjectScope();
326
327 if (*p == '}') goto pop_object;
328
329 // goto match_object_key;
330 match_object_key:
331 p = skip_ws(p);
332 if (*p != '"') return this->error(NullValue(), p, "expected object key");
333
334 p = this->matchString(p, p_stop, [this](const char* key, size_t size, const char* eos) {
335 this->pushObjectKey(key, size, eos);
336 });
337 if (!p) return NullValue();
338
339 p = skip_ws(p);
340 if (*p != ':') return this->error(NullValue(), p, "expected ':' separator");
341
342 ++p;
343
344 // goto match_value;
345 match_value:
346 p = skip_ws(p);
347
348 switch (*p) {
349 case '\0':
350 return this->error(NullValue(), p, "unexpected input end");
351 case '"':
352 p = this->matchString(p, p_stop, [this](const char* str, size_t size, const char* eos) {
353 this->pushString(str, size, eos);
354 });
355 break;
356 case '[':
357 goto match_array;
358 case 'f':
359 p = this->matchFalse(p);
360 break;
361 case 'n':
362 p = this->matchNull(p);
363 break;
364 case 't':
365 p = this->matchTrue(p);
366 break;
367 case '{':
368 goto match_object;
369 default:
370 p = this->matchNumber(p);
371 break;
372 }
373
374 if (!p) return NullValue();
375
376 // goto match_post_value;
377 match_post_value:
378 SkASSERT(!this->inTopLevelScope());
379
380 p = skip_ws(p);
381 switch (*p) {
382 case ',':
383 ++p;
384 if (this->inObjectScope()) {
385 goto match_object_key;
386 } else {
387 SkASSERT(this->inArrayScope());
388 goto match_value;
389 }
390 case ']':
391 goto pop_array;
392 case '}':
393 goto pop_object;
394 default:
395 return this->error(NullValue(), p - 1, "unexpected value-trailing token");
396 }
397
398 // unreachable
399 SkASSERT(false);
400
401 pop_object:
402 SkASSERT(*p == '}');
403
404 if (this->inArrayScope()) {
405 return this->error(NullValue(), p, "unexpected object terminator");
406 }
407
408 this->popObjectScope();
409
410 // goto pop_common
411 pop_common:
412 SkASSERT(is_eoscope(*p));
413
414 if (this->inTopLevelScope()) {
415 SkASSERT(fValueStack.size() == 1);
416
417 // Success condition: parsed the top level element and reached the stop token.
418 return p == p_stop
419 ? fValueStack.front()
420 : this->error(NullValue(), p + 1, "trailing root garbage");
421 }
422
423 if (p == p_stop) {
424 return this->error(NullValue(), p, "unexpected end-of-input");
425 }
426
427 ++p;
428
429 goto match_post_value;
430
431 match_array:
432 SkASSERT(*p == '[');
433 p = skip_ws(p + 1);
434
435 this->pushArrayScope();
436
437 if (*p != ']') goto match_value;
438
439 // goto pop_array;
440 pop_array:
441 SkASSERT(*p == ']');
442
443 if (this->inObjectScope()) {
444 return this->error(NullValue(), p, "unexpected array terminator");
445 }
446
447 this->popArrayScope();
448
449 goto pop_common;
450
451 SkASSERT(false);
452 return NullValue();
453 }
454
getError() const455 std::tuple<const char*, const SkString> getError() const {
456 return std::make_tuple(fErrorToken, fErrorMessage);
457 }
458
459 private:
460 SkArenaAlloc& fAlloc;
461
462 // Pending values stack.
463 static constexpr size_t kValueStackReserve = 256;
464 std::vector<Value> fValueStack;
465
466 // String unescape buffer.
467 static constexpr size_t kUnescapeBufferReserve = 512;
468 std::vector<char> fUnescapeBuffer;
469
470 // Tracks the current object/array scope, as an index into fStack:
471 //
472 // - for objects: fScopeIndex = (index of first value in scope)
473 // - for arrays : fScopeIndex = -(index of first value in scope)
474 //
475 // fScopeIndex == 0 IFF we are at the top level (no current/active scope).
476 intptr_t fScopeIndex = 0;
477
478 // Error reporting.
479 const char* fErrorToken = nullptr;
480 SkString fErrorMessage;
481
inTopLevelScope() const482 bool inTopLevelScope() const { return fScopeIndex == 0; }
inObjectScope() const483 bool inObjectScope() const { return fScopeIndex > 0; }
inArrayScope() const484 bool inArrayScope() const { return fScopeIndex < 0; }
485
486 // Helper for masquerading raw primitive types as Values (bypassing tagging, etc).
487 template <typename T>
488 class RawValue final : public Value {
489 public:
RawValue(T v)490 explicit RawValue(T v) {
491 static_assert(sizeof(T) <= sizeof(Value), "");
492 *this->cast<T>() = v;
493 }
494
operator *() const495 T operator *() const { return *this->cast<T>(); }
496 };
497
498 template <typename VectorT>
popScopeAsVec(size_t scope_start)499 void popScopeAsVec(size_t scope_start) {
500 SkASSERT(scope_start > 0);
501 SkASSERT(scope_start <= fValueStack.size());
502
503 using T = typename VectorT::ValueT;
504 static_assert( sizeof(T) >= sizeof(Value), "");
505 static_assert( sizeof(T) % sizeof(Value) == 0, "");
506 static_assert(alignof(T) == alignof(Value), "");
507
508 const auto scope_count = fValueStack.size() - scope_start,
509 count = scope_count / (sizeof(T) / sizeof(Value));
510 SkASSERT(scope_count % (sizeof(T) / sizeof(Value)) == 0);
511
512 const auto* begin = reinterpret_cast<const T*>(fValueStack.data() + scope_start);
513
514 // Restore the previous scope index from saved placeholder value,
515 // and instantiate as a vector of values in scope.
516 auto& placeholder = fValueStack[scope_start - 1];
517 fScopeIndex = *static_cast<RawValue<intptr_t>&>(placeholder);
518 placeholder = VectorT(begin, count, fAlloc);
519
520 // Drop the (consumed) values in scope.
521 fValueStack.resize(scope_start);
522 }
523
pushObjectScope()524 void pushObjectScope() {
525 // Save a scope index now, and then later we'll overwrite this value as the Object itself.
526 fValueStack.push_back(RawValue<intptr_t>(fScopeIndex));
527
528 // New object scope.
529 fScopeIndex = SkTo<intptr_t>(fValueStack.size());
530 }
531
popObjectScope()532 void popObjectScope() {
533 SkASSERT(this->inObjectScope());
534 this->popScopeAsVec<ObjectValue>(SkTo<size_t>(fScopeIndex));
535
536 SkDEBUGCODE(
537 const auto& obj = fValueStack.back().as<ObjectValue>();
538 SkASSERT(obj.is<ObjectValue>());
539 for (const auto& member : obj) {
540 SkASSERT(member.fKey.is<StringValue>());
541 }
542 )
543 }
544
pushArrayScope()545 void pushArrayScope() {
546 // Save a scope index now, and then later we'll overwrite this value as the Array itself.
547 fValueStack.push_back(RawValue<intptr_t>(fScopeIndex));
548
549 // New array scope.
550 fScopeIndex = -SkTo<intptr_t>(fValueStack.size());
551 }
552
popArrayScope()553 void popArrayScope() {
554 SkASSERT(this->inArrayScope());
555 this->popScopeAsVec<ArrayValue>(SkTo<size_t>(-fScopeIndex));
556
557 SkDEBUGCODE(
558 const auto& arr = fValueStack.back().as<ArrayValue>();
559 SkASSERT(arr.is<ArrayValue>());
560 )
561 }
562
pushObjectKey(const char * key,size_t size,const char * eos)563 void pushObjectKey(const char* key, size_t size, const char* eos) {
564 SkASSERT(this->inObjectScope());
565 SkASSERT(fValueStack.size() >= SkTo<size_t>(fScopeIndex));
566 SkASSERT(!((fValueStack.size() - SkTo<size_t>(fScopeIndex)) & 1));
567 this->pushString(key, size, eos);
568 }
569
pushTrue()570 void pushTrue() {
571 fValueStack.push_back(BoolValue(true));
572 }
573
pushFalse()574 void pushFalse() {
575 fValueStack.push_back(BoolValue(false));
576 }
577
pushNull()578 void pushNull() {
579 fValueStack.push_back(NullValue());
580 }
581
pushString(const char * s,size_t size,const char * eos)582 void pushString(const char* s, size_t size, const char* eos) {
583 fValueStack.push_back(FastString(s, size, eos, fAlloc));
584 }
585
pushInt32(int32_t i)586 void pushInt32(int32_t i) {
587 fValueStack.push_back(NumberValue(i));
588 }
589
pushFloat(float f)590 void pushFloat(float f) {
591 fValueStack.push_back(NumberValue(f));
592 }
593
594 template <typename T>
error(T && ret_val,const char * p,const char * msg)595 T error(T&& ret_val, const char* p, const char* msg) {
596 #if defined(SK_JSON_REPORT_ERRORS)
597 fErrorToken = p;
598 fErrorMessage.set(msg);
599 #endif
600 return ret_val;
601 }
602
matchTrue(const char * p)603 const char* matchTrue(const char* p) {
604 SkASSERT(p[0] == 't');
605
606 if (p[1] == 'r' && p[2] == 'u' && p[3] == 'e') {
607 this->pushTrue();
608 return p + 4;
609 }
610
611 return this->error(nullptr, p, "invalid token");
612 }
613
matchFalse(const char * p)614 const char* matchFalse(const char* p) {
615 SkASSERT(p[0] == 'f');
616
617 if (p[1] == 'a' && p[2] == 'l' && p[3] == 's' && p[4] == 'e') {
618 this->pushFalse();
619 return p + 5;
620 }
621
622 return this->error(nullptr, p, "invalid token");
623 }
624
matchNull(const char * p)625 const char* matchNull(const char* p) {
626 SkASSERT(p[0] == 'n');
627
628 if (p[1] == 'u' && p[2] == 'l' && p[3] == 'l') {
629 this->pushNull();
630 return p + 4;
631 }
632
633 return this->error(nullptr, p, "invalid token");
634 }
635
unescapeString(const char * begin,const char * end)636 const std::vector<char>* unescapeString(const char* begin, const char* end) {
637 fUnescapeBuffer.clear();
638
639 for (const auto* p = begin; p != end; ++p) {
640 if (*p != '\\') {
641 fUnescapeBuffer.push_back(*p);
642 continue;
643 }
644
645 if (++p == end) {
646 return nullptr;
647 }
648
649 switch (*p) {
650 case '"': fUnescapeBuffer.push_back( '"'); break;
651 case '\\': fUnescapeBuffer.push_back('\\'); break;
652 case '/': fUnescapeBuffer.push_back( '/'); break;
653 case 'b': fUnescapeBuffer.push_back('\b'); break;
654 case 'f': fUnescapeBuffer.push_back('\f'); break;
655 case 'n': fUnescapeBuffer.push_back('\n'); break;
656 case 'r': fUnescapeBuffer.push_back('\r'); break;
657 case 't': fUnescapeBuffer.push_back('\t'); break;
658 case 'u': {
659 if (p + 4 >= end) {
660 return nullptr;
661 }
662
663 uint32_t hexed;
664 const char hex_str[] = {p[1], p[2], p[3], p[4], '\0'};
665 const auto* eos = SkParse::FindHex(hex_str, &hexed);
666 if (!eos || *eos) {
667 return nullptr;
668 }
669
670 char utf8[SkUTF::kMaxBytesInUTF8Sequence];
671 const auto utf8_len = SkUTF::ToUTF8(SkTo<SkUnichar>(hexed), utf8);
672 fUnescapeBuffer.insert(fUnescapeBuffer.end(), utf8, utf8 + utf8_len);
673 p += 4;
674 } break;
675 default: return nullptr;
676 }
677 }
678
679 return &fUnescapeBuffer;
680 }
681
682 template <typename MatchFunc>
matchString(const char * p,const char * p_stop,MatchFunc && func)683 const char* matchString(const char* p, const char* p_stop, MatchFunc&& func) {
684 SkASSERT(*p == '"');
685 const auto* s_begin = p + 1;
686 bool requires_unescape = false;
687
688 do {
689 // Consume string chars.
690 // This is the fast path, and hopefully we only hit it once then quick-exit below.
691 for (p = p + 1; !is_eostring(*p); ++p);
692
693 if (*p == '"') {
694 // Valid string found.
695 if (!requires_unescape) {
696 func(s_begin, p - s_begin, p_stop);
697 } else {
698 // Slow unescape. We could avoid this extra copy with some effort,
699 // but in practice escaped strings should be rare.
700 const auto* buf = this->unescapeString(s_begin, p);
701 if (!buf) {
702 break;
703 }
704
705 SkASSERT(!buf->empty());
706 func(buf->data(), buf->size(), buf->data() + buf->size() - 1);
707 }
708 return p + 1;
709 }
710
711 if (*p == '\\') {
712 requires_unescape = true;
713 ++p;
714 continue;
715 }
716
717 // End-of-scope chars are special: we use them to tag the end of the input.
718 // Thus they cannot be consumed indiscriminately -- we need to check if we hit the
719 // end of the input. To that effect, we treat them as string terminators above,
720 // then we catch them here.
721 if (is_eoscope(*p)) {
722 continue;
723 }
724
725 // Invalid/unexpected char.
726 break;
727 } while (p != p_stop);
728
729 // Premature end-of-input, or illegal string char.
730 return this->error(nullptr, s_begin - 1, "invalid string");
731 }
732
matchFastFloatDecimalPart(const char * p,int sign,float f,int exp)733 const char* matchFastFloatDecimalPart(const char* p, int sign, float f, int exp) {
734 SkASSERT(exp <= 0);
735
736 for (;;) {
737 if (!is_digit(*p)) break;
738 f = f * 10.f + (*p++ - '0'); --exp;
739 if (!is_digit(*p)) break;
740 f = f * 10.f + (*p++ - '0'); --exp;
741 }
742
743 const auto decimal_scale = pow10(exp);
744 if (is_numeric(*p) || !decimal_scale) {
745 SkASSERT((*p == '.' || *p == 'e' || *p == 'E') || !decimal_scale);
746 // Malformed input, or an (unsupported) exponent, or a collapsed decimal factor.
747 return nullptr;
748 }
749
750 this->pushFloat(sign * f * decimal_scale);
751
752 return p;
753 }
754
matchFastFloatPart(const char * p,int sign,float f)755 const char* matchFastFloatPart(const char* p, int sign, float f) {
756 for (;;) {
757 if (!is_digit(*p)) break;
758 f = f * 10.f + (*p++ - '0');
759 if (!is_digit(*p)) break;
760 f = f * 10.f + (*p++ - '0');
761 }
762
763 if (!is_numeric(*p)) {
764 // Matched (integral) float.
765 this->pushFloat(sign * f);
766 return p;
767 }
768
769 return (*p == '.') ? this->matchFastFloatDecimalPart(p + 1, sign, f, 0)
770 : nullptr;
771 }
772
matchFast32OrFloat(const char * p)773 const char* matchFast32OrFloat(const char* p) {
774 int sign = 1;
775 if (*p == '-') {
776 sign = -1;
777 ++p;
778 }
779
780 const auto* digits_start = p;
781
782 int32_t n32 = 0;
783
784 // This is the largest absolute int32 value we can handle before
785 // risking overflow *on the next digit* (214748363).
786 static constexpr int32_t kMaxInt32 = (std::numeric_limits<int32_t>::max() - 9) / 10;
787
788 if (is_digit(*p)) {
789 n32 = (*p++ - '0');
790 for (;;) {
791 if (!is_digit(*p) || n32 > kMaxInt32) break;
792 n32 = n32 * 10 + (*p++ - '0');
793 }
794 }
795
796 if (!is_numeric(*p)) {
797 // Did we actually match any digits?
798 if (p > digits_start) {
799 this->pushInt32(sign * n32);
800 return p;
801 }
802 return nullptr;
803 }
804
805 if (*p == '.') {
806 const auto* decimals_start = ++p;
807
808 int exp = 0;
809
810 for (;;) {
811 if (!is_digit(*p) || n32 > kMaxInt32) break;
812 n32 = n32 * 10 + (*p++ - '0'); --exp;
813 if (!is_digit(*p) || n32 > kMaxInt32) break;
814 n32 = n32 * 10 + (*p++ - '0'); --exp;
815 }
816
817 if (!is_numeric(*p)) {
818 // Did we actually match any digits?
819 if (p > decimals_start) {
820 this->pushFloat(sign * n32 * pow10(exp));
821 return p;
822 }
823 return nullptr;
824 }
825
826 if (n32 > kMaxInt32) {
827 // we ran out on n32 bits
828 return this->matchFastFloatDecimalPart(p, sign, n32, exp);
829 }
830 }
831
832 return this->matchFastFloatPart(p, sign, n32);
833 }
834
matchNumber(const char * p)835 const char* matchNumber(const char* p) {
836 if (const auto* fast = this->matchFast32OrFloat(p)) return fast;
837
838 // slow fallback
839 char* matched;
840 float f = strtof(p, &matched);
841 if (matched > p) {
842 this->pushFloat(f);
843 return matched;
844 }
845 return this->error(nullptr, p, "invalid numeric token");
846 }
847 };
848
Write(const Value & v,SkWStream * stream)849 void Write(const Value& v, SkWStream* stream) {
850 switch (v.getType()) {
851 case Value::Type::kNull:
852 stream->writeText("null");
853 break;
854 case Value::Type::kBool:
855 stream->writeText(*v.as<BoolValue>() ? "true" : "false");
856 break;
857 case Value::Type::kNumber:
858 stream->writeScalarAsText(*v.as<NumberValue>());
859 break;
860 case Value::Type::kString:
861 stream->writeText("\"");
862 stream->writeText(v.as<StringValue>().begin());
863 stream->writeText("\"");
864 break;
865 case Value::Type::kArray: {
866 const auto& array = v.as<ArrayValue>();
867 stream->writeText("[");
868 bool first_value = true;
869 for (const auto& v : array) {
870 if (!first_value) stream->writeText(",");
871 Write(v, stream);
872 first_value = false;
873 }
874 stream->writeText("]");
875 break;
876 }
877 case Value::Type::kObject:
878 const auto& object = v.as<ObjectValue>();
879 stream->writeText("{");
880 bool first_member = true;
881 for (const auto& member : object) {
882 SkASSERT(member.fKey.getType() == Value::Type::kString);
883 if (!first_member) stream->writeText(",");
884 Write(member.fKey, stream);
885 stream->writeText(":");
886 Write(member.fValue, stream);
887 first_member = false;
888 }
889 stream->writeText("}");
890 break;
891 }
892 }
893
894 } // namespace
895
toString() const896 SkString Value::toString() const {
897 SkDynamicMemoryWStream wstream;
898 Write(*this, &wstream);
899 const auto data = wstream.detachAsData();
900 // TODO: is there a better way to pass data around without copying?
901 return SkString(static_cast<const char*>(data->data()), data->size());
902 }
903
904 static constexpr size_t kMinChunkSize = 4096;
905
DOM(const char * data,size_t size)906 DOM::DOM(const char* data, size_t size)
907 : fAlloc(kMinChunkSize) {
908 DOMParser parser(fAlloc);
909
910 fRoot = parser.parse(data, size);
911 }
912
write(SkWStream * stream) const913 void DOM::write(SkWStream* stream) const {
914 Write(fRoot, stream);
915 }
916
917 } // namespace skjson
918