1
2 #include "upb/json_decode.h"
3
4 #include <errno.h>
5 #include <float.h>
6 #include <inttypes.h>
7 #include <limits.h>
8 #include <math.h>
9 #include <setjmp.h>
10 #include <stdlib.h>
11 #include <string.h>
12
13 #include "upb/encode.h"
14 #include "upb/reflection.h"
15
16 /* Special header, must be included last. */
17 #include "upb/port_def.inc"
18
19 typedef struct {
20 const char *ptr, *end;
21 upb_arena *arena; /* TODO: should we have a tmp arena for tmp data? */
22 const upb_symtab *any_pool;
23 int depth;
24 upb_status *status;
25 jmp_buf err;
26 int line;
27 const char *line_begin;
28 bool is_first;
29 int options;
30 const upb_fielddef *debug_field;
31 } jsondec;
32
33 enum { JD_OBJECT, JD_ARRAY, JD_STRING, JD_NUMBER, JD_TRUE, JD_FALSE, JD_NULL };
34
35 /* Forward declarations of mutually-recursive functions. */
36 static void jsondec_wellknown(jsondec *d, upb_msg *msg, const upb_msgdef *m);
37 static upb_msgval jsondec_value(jsondec *d, const upb_fielddef *f);
38 static void jsondec_wellknownvalue(jsondec *d, upb_msg *msg,
39 const upb_msgdef *m);
40 static void jsondec_object(jsondec *d, upb_msg *msg, const upb_msgdef *m);
41
jsondec_streql(upb_strview str,const char * lit)42 static bool jsondec_streql(upb_strview str, const char *lit) {
43 return str.size == strlen(lit) && memcmp(str.data, lit, str.size) == 0;
44 }
45
jsondec_isnullvalue(const upb_fielddef * f)46 static bool jsondec_isnullvalue(const upb_fielddef *f) {
47 return upb_fielddef_type(f) == UPB_TYPE_ENUM &&
48 strcmp(upb_enumdef_fullname(upb_fielddef_enumsubdef(f)),
49 "google.protobuf.NullValue") == 0;
50 }
51
jsondec_isvalue(const upb_fielddef * f)52 static bool jsondec_isvalue(const upb_fielddef *f) {
53 return (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
54 upb_msgdef_wellknowntype(upb_fielddef_msgsubdef(f)) ==
55 UPB_WELLKNOWN_VALUE) ||
56 jsondec_isnullvalue(f);
57 }
58
jsondec_err(jsondec * d,const char * msg)59 UPB_NORETURN static void jsondec_err(jsondec *d, const char *msg) {
60 upb_status_seterrf(d->status, "Error parsing JSON @%d:%d: %s", d->line,
61 (int)(d->ptr - d->line_begin), msg);
62 UPB_LONGJMP(d->err, 1);
63 }
64
jsondec_errf(jsondec * d,const char * fmt,...)65 UPB_NORETURN static void jsondec_errf(jsondec *d, const char *fmt, ...) {
66 va_list argp;
67 upb_status_seterrf(d->status, "Error parsing JSON @%d:%d: ", d->line,
68 (int)(d->ptr - d->line_begin));
69 va_start(argp, fmt);
70 upb_status_vappenderrf(d->status, fmt, argp);
71 va_end(argp);
72 UPB_LONGJMP(d->err, 1);
73 }
74
jsondec_skipws(jsondec * d)75 static void jsondec_skipws(jsondec *d) {
76 while (d->ptr != d->end) {
77 switch (*d->ptr) {
78 case '\n':
79 d->line++;
80 d->line_begin = d->ptr;
81 /* Fallthrough. */
82 case '\r':
83 case '\t':
84 case ' ':
85 d->ptr++;
86 break;
87 default:
88 return;
89 }
90 }
91 jsondec_err(d, "Unexpected EOF");
92 }
93
jsondec_tryparsech(jsondec * d,char ch)94 static bool jsondec_tryparsech(jsondec *d, char ch) {
95 if (d->ptr == d->end || *d->ptr != ch) return false;
96 d->ptr++;
97 return true;
98 }
99
jsondec_parselit(jsondec * d,const char * lit)100 static void jsondec_parselit(jsondec *d, const char *lit) {
101 size_t avail = d->end - d->ptr;
102 size_t len = strlen(lit);
103 if (avail < len || memcmp(d->ptr, lit, len) != 0) {
104 jsondec_errf(d, "Expected: '%s'", lit);
105 }
106 d->ptr += len;
107 }
108
jsondec_wsch(jsondec * d,char ch)109 static void jsondec_wsch(jsondec *d, char ch) {
110 jsondec_skipws(d);
111 if (!jsondec_tryparsech(d, ch)) {
112 jsondec_errf(d, "Expected: '%c'", ch);
113 }
114 }
115
jsondec_true(jsondec * d)116 static void jsondec_true(jsondec *d) { jsondec_parselit(d, "true"); }
jsondec_false(jsondec * d)117 static void jsondec_false(jsondec *d) { jsondec_parselit(d, "false"); }
jsondec_null(jsondec * d)118 static void jsondec_null(jsondec *d) { jsondec_parselit(d, "null"); }
119
jsondec_entrysep(jsondec * d)120 static void jsondec_entrysep(jsondec *d) {
121 jsondec_skipws(d);
122 jsondec_parselit(d, ":");
123 }
124
jsondec_rawpeek(jsondec * d)125 static int jsondec_rawpeek(jsondec *d) {
126 switch (*d->ptr) {
127 case '{':
128 return JD_OBJECT;
129 case '[':
130 return JD_ARRAY;
131 case '"':
132 return JD_STRING;
133 case '-':
134 case '0':
135 case '1':
136 case '2':
137 case '3':
138 case '4':
139 case '5':
140 case '6':
141 case '7':
142 case '8':
143 case '9':
144 return JD_NUMBER;
145 case 't':
146 return JD_TRUE;
147 case 'f':
148 return JD_FALSE;
149 case 'n':
150 return JD_NULL;
151 default:
152 jsondec_errf(d, "Unexpected character: '%c'", *d->ptr);
153 }
154 }
155
156 /* JSON object/array **********************************************************/
157
158 /* These are used like so:
159 *
160 * jsondec_objstart(d);
161 * while (jsondec_objnext(d)) {
162 * ...
163 * }
164 * jsondec_objend(d) */
165
jsondec_peek(jsondec * d)166 static int jsondec_peek(jsondec *d) {
167 jsondec_skipws(d);
168 return jsondec_rawpeek(d);
169 }
170
jsondec_push(jsondec * d)171 static void jsondec_push(jsondec *d) {
172 if (--d->depth < 0) {
173 jsondec_err(d, "Recursion limit exceeded");
174 }
175 d->is_first = true;
176 }
177
jsondec_seqnext(jsondec * d,char end_ch)178 static bool jsondec_seqnext(jsondec *d, char end_ch) {
179 bool is_first = d->is_first;
180 d->is_first = false;
181 jsondec_skipws(d);
182 if (*d->ptr == end_ch) return false;
183 if (!is_first) jsondec_parselit(d, ",");
184 return true;
185 }
186
jsondec_arrstart(jsondec * d)187 static void jsondec_arrstart(jsondec *d) {
188 jsondec_push(d);
189 jsondec_wsch(d, '[');
190 }
191
jsondec_arrend(jsondec * d)192 static void jsondec_arrend(jsondec *d) {
193 d->depth++;
194 jsondec_wsch(d, ']');
195 }
196
jsondec_arrnext(jsondec * d)197 static bool jsondec_arrnext(jsondec *d) {
198 return jsondec_seqnext(d, ']');
199 }
200
jsondec_objstart(jsondec * d)201 static void jsondec_objstart(jsondec *d) {
202 jsondec_push(d);
203 jsondec_wsch(d, '{');
204 }
205
jsondec_objend(jsondec * d)206 static void jsondec_objend(jsondec *d) {
207 d->depth++;
208 jsondec_wsch(d, '}');
209 }
210
jsondec_objnext(jsondec * d)211 static bool jsondec_objnext(jsondec *d) {
212 if (!jsondec_seqnext(d, '}')) return false;
213 if (jsondec_peek(d) != JD_STRING) {
214 jsondec_err(d, "Object must start with string");
215 }
216 return true;
217 }
218
219 /* JSON number ****************************************************************/
220
jsondec_tryskipdigits(jsondec * d)221 static bool jsondec_tryskipdigits(jsondec *d) {
222 const char *start = d->ptr;
223
224 while (d->ptr < d->end) {
225 if (*d->ptr < '0' || *d->ptr > '9') {
226 break;
227 }
228 d->ptr++;
229 }
230
231 return d->ptr != start;
232 }
233
jsondec_skipdigits(jsondec * d)234 static void jsondec_skipdigits(jsondec *d) {
235 if (!jsondec_tryskipdigits(d)) {
236 jsondec_err(d, "Expected one or more digits");
237 }
238 }
239
jsondec_number(jsondec * d)240 static double jsondec_number(jsondec *d) {
241 const char *start = d->ptr;
242
243 assert(jsondec_rawpeek(d) == JD_NUMBER);
244
245 /* Skip over the syntax of a number, as specified by JSON. */
246 if (*d->ptr == '-') d->ptr++;
247
248 if (jsondec_tryparsech(d, '0')) {
249 if (jsondec_tryskipdigits(d)) {
250 jsondec_err(d, "number cannot have leading zero");
251 }
252 } else {
253 jsondec_skipdigits(d);
254 }
255
256 if (d->ptr == d->end) goto parse;
257 if (jsondec_tryparsech(d, '.')) {
258 jsondec_skipdigits(d);
259 }
260 if (d->ptr == d->end) goto parse;
261
262 if (*d->ptr == 'e' || *d->ptr == 'E') {
263 d->ptr++;
264 if (d->ptr == d->end) {
265 jsondec_err(d, "Unexpected EOF in number");
266 }
267 if (*d->ptr == '+' || *d->ptr == '-') {
268 d->ptr++;
269 }
270 jsondec_skipdigits(d);
271 }
272
273 parse:
274 /* Having verified the syntax of a JSON number, use strtod() to parse
275 * (strtod() accepts a superset of JSON syntax). */
276 errno = 0;
277 {
278 char* end;
279 double val = strtod(start, &end);
280 assert(end == d->ptr);
281
282 /* Currently the min/max-val conformance tests fail if we check this. Does
283 * this mean the conformance tests are wrong or strtod() is wrong, or
284 * something else? Investigate further. */
285 /*
286 if (errno == ERANGE) {
287 jsondec_err(d, "Number out of range");
288 }
289 */
290
291 if (val > DBL_MAX || val < -DBL_MAX) {
292 jsondec_err(d, "Number out of range");
293 }
294
295 return val;
296 }
297 }
298
299 /* JSON string ****************************************************************/
300
jsondec_escape(jsondec * d)301 static char jsondec_escape(jsondec *d) {
302 switch (*d->ptr++) {
303 case '"':
304 return '\"';
305 case '\\':
306 return '\\';
307 case '/':
308 return '/';
309 case 'b':
310 return '\b';
311 case 'f':
312 return '\f';
313 case 'n':
314 return '\n';
315 case 'r':
316 return '\r';
317 case 't':
318 return '\t';
319 default:
320 jsondec_err(d, "Invalid escape char");
321 }
322 }
323
jsondec_codepoint(jsondec * d)324 static uint32_t jsondec_codepoint(jsondec *d) {
325 uint32_t cp = 0;
326 const char *end;
327
328 if (d->end - d->ptr < 4) {
329 jsondec_err(d, "EOF inside string");
330 }
331
332 end = d->ptr + 4;
333 while (d->ptr < end) {
334 char ch = *d->ptr++;
335 if (ch >= '0' && ch <= '9') {
336 ch -= '0';
337 } else if (ch >= 'a' && ch <= 'f') {
338 ch = ch - 'a' + 10;
339 } else if (ch >= 'A' && ch <= 'F') {
340 ch = ch - 'A' + 10;
341 } else {
342 jsondec_err(d, "Invalid hex digit");
343 }
344 cp = (cp << 4) | ch;
345 }
346
347 return cp;
348 }
349
350 /* Parses a \uXXXX unicode escape (possibly a surrogate pair). */
jsondec_unicode(jsondec * d,char * out)351 static size_t jsondec_unicode(jsondec *d, char* out) {
352 uint32_t cp = jsondec_codepoint(d);
353 if (cp >= 0xd800 && cp <= 0xdbff) {
354 /* Surrogate pair: two 16-bit codepoints become a 32-bit codepoint. */
355 uint32_t high = cp;
356 uint32_t low;
357 jsondec_parselit(d, "\\u");
358 low = jsondec_codepoint(d);
359 if (low < 0xdc00 || low > 0xdfff) {
360 jsondec_err(d, "Invalid low surrogate");
361 }
362 cp = (high & 0x3ff) << 10;
363 cp |= (low & 0x3ff);
364 cp += 0x10000;
365 } else if (cp >= 0xdc00 && cp <= 0xdfff) {
366 jsondec_err(d, "Unpaired low surrogate");
367 }
368
369 /* Write to UTF-8 */
370 if (cp <= 0x7f) {
371 out[0] = cp;
372 return 1;
373 } else if (cp <= 0x07FF) {
374 out[0] = ((cp >> 6) & 0x1F) | 0xC0;
375 out[1] = ((cp >> 0) & 0x3F) | 0x80;
376 return 2;
377 } else if (cp <= 0xFFFF) {
378 out[0] = ((cp >> 12) & 0x0F) | 0xE0;
379 out[1] = ((cp >> 6) & 0x3F) | 0x80;
380 out[2] = ((cp >> 0) & 0x3F) | 0x80;
381 return 3;
382 } else if (cp < 0x10FFFF) {
383 out[0] = ((cp >> 18) & 0x07) | 0xF0;
384 out[1] = ((cp >> 12) & 0x3f) | 0x80;
385 out[2] = ((cp >> 6) & 0x3f) | 0x80;
386 out[3] = ((cp >> 0) & 0x3f) | 0x80;
387 return 4;
388 } else {
389 jsondec_err(d, "Invalid codepoint");
390 }
391 }
392
jsondec_resize(jsondec * d,char ** buf,char ** end,char ** buf_end)393 static void jsondec_resize(jsondec *d, char **buf, char **end, char **buf_end) {
394 size_t oldsize = *buf_end - *buf;
395 size_t len = *end - *buf;
396 size_t size = UPB_MAX(8, 2 * oldsize);
397
398 *buf = upb_arena_realloc(d->arena, *buf, len, size);
399 if (!*buf) jsondec_err(d, "Out of memory");
400
401 *end = *buf + len;
402 *buf_end = *buf + size;
403 }
404
jsondec_string(jsondec * d)405 static upb_strview jsondec_string(jsondec *d) {
406 char *buf = NULL;
407 char *end = NULL;
408 char *buf_end = NULL;
409
410 jsondec_skipws(d);
411
412 if (*d->ptr++ != '"') {
413 jsondec_err(d, "Expected string");
414 }
415
416 while (d->ptr < d->end) {
417 char ch = *d->ptr++;
418
419 if (end == buf_end) {
420 jsondec_resize(d, &buf, &end, &buf_end);
421 }
422
423 switch (ch) {
424 case '"': {
425 upb_strview ret;
426 ret.data = buf;
427 ret.size = end - buf;
428 *end = '\0'; /* Needed for possible strtod(). */
429 return ret;
430 }
431 case '\\':
432 if (d->ptr == d->end) goto eof;
433 if (*d->ptr == 'u') {
434 d->ptr++;
435 if (buf_end - end < 4) {
436 /* Allow space for maximum-sized code point (4 bytes). */
437 jsondec_resize(d, &buf, &end, &buf_end);
438 }
439 end += jsondec_unicode(d, end);
440 } else {
441 *end++ = jsondec_escape(d);
442 }
443 break;
444 default:
445 if ((unsigned char)*d->ptr < 0x20) {
446 jsondec_err(d, "Invalid char in JSON string");
447 }
448 *end++ = ch;
449 break;
450 }
451 }
452
453 eof:
454 jsondec_err(d, "EOF inside string");
455 }
456
jsondec_skipval(jsondec * d)457 static void jsondec_skipval(jsondec *d) {
458 switch (jsondec_peek(d)) {
459 case JD_OBJECT:
460 jsondec_objstart(d);
461 while (jsondec_objnext(d)) {
462 jsondec_string(d);
463 jsondec_entrysep(d);
464 jsondec_skipval(d);
465 }
466 jsondec_objend(d);
467 break;
468 case JD_ARRAY:
469 jsondec_arrstart(d);
470 while (jsondec_arrnext(d)) {
471 jsondec_skipval(d);
472 }
473 jsondec_arrend(d);
474 break;
475 case JD_TRUE:
476 jsondec_true(d);
477 break;
478 case JD_FALSE:
479 jsondec_false(d);
480 break;
481 case JD_NULL:
482 jsondec_null(d);
483 break;
484 case JD_STRING:
485 jsondec_string(d);
486 break;
487 case JD_NUMBER:
488 jsondec_number(d);
489 break;
490 }
491 }
492
493 /* Base64 decoding for bytes fields. ******************************************/
494
jsondec_base64_tablelookup(const char ch)495 static unsigned int jsondec_base64_tablelookup(const char ch) {
496 /* Table includes the normal base64 chars plus the URL-safe variant. */
497 const signed char table[256] = {
498 -1, -1, -1, -1, -1, -1, -1,
499 -1, -1, -1, -1, -1, -1, -1,
500 -1, -1, -1, -1, -1, -1, -1,
501 -1, -1, -1, -1, -1, -1, -1,
502 -1, -1, -1, -1, -1, -1, -1,
503 -1, -1, -1, -1, -1, -1, -1,
504 -1, 62 /*+*/, -1, 62 /*-*/, -1, 63 /*/ */, 52 /*0*/,
505 53 /*1*/, 54 /*2*/, 55 /*3*/, 56 /*4*/, 57 /*5*/, 58 /*6*/, 59 /*7*/,
506 60 /*8*/, 61 /*9*/, -1, -1, -1, -1, -1,
507 -1, -1, 0 /*A*/, 1 /*B*/, 2 /*C*/, 3 /*D*/, 4 /*E*/,
508 5 /*F*/, 6 /*G*/, 07 /*H*/, 8 /*I*/, 9 /*J*/, 10 /*K*/, 11 /*L*/,
509 12 /*M*/, 13 /*N*/, 14 /*O*/, 15 /*P*/, 16 /*Q*/, 17 /*R*/, 18 /*S*/,
510 19 /*T*/, 20 /*U*/, 21 /*V*/, 22 /*W*/, 23 /*X*/, 24 /*Y*/, 25 /*Z*/,
511 -1, -1, -1, -1, 63 /*_*/, -1, 26 /*a*/,
512 27 /*b*/, 28 /*c*/, 29 /*d*/, 30 /*e*/, 31 /*f*/, 32 /*g*/, 33 /*h*/,
513 34 /*i*/, 35 /*j*/, 36 /*k*/, 37 /*l*/, 38 /*m*/, 39 /*n*/, 40 /*o*/,
514 41 /*p*/, 42 /*q*/, 43 /*r*/, 44 /*s*/, 45 /*t*/, 46 /*u*/, 47 /*v*/,
515 48 /*w*/, 49 /*x*/, 50 /*y*/, 51 /*z*/, -1, -1, -1,
516 -1, -1, -1, -1, -1, -1, -1,
517 -1, -1, -1, -1, -1, -1, -1,
518 -1, -1, -1, -1, -1, -1, -1,
519 -1, -1, -1, -1, -1, -1, -1,
520 -1, -1, -1, -1, -1, -1, -1,
521 -1, -1, -1, -1, -1, -1, -1,
522 -1, -1, -1, -1, -1, -1, -1,
523 -1, -1, -1, -1, -1, -1, -1,
524 -1, -1, -1, -1, -1, -1, -1,
525 -1, -1, -1, -1, -1, -1, -1,
526 -1, -1, -1, -1, -1, -1, -1,
527 -1, -1, -1, -1, -1, -1, -1,
528 -1, -1, -1, -1, -1, -1, -1,
529 -1, -1, -1, -1, -1, -1, -1,
530 -1, -1, -1, -1, -1, -1, -1,
531 -1, -1, -1, -1, -1, -1, -1,
532 -1, -1, -1, -1, -1, -1, -1,
533 -1, -1, -1, -1, -1, -1, -1,
534 -1, -1, -1, -1};
535
536 /* Sign-extend return value so high bit will be set on any unexpected char. */
537 return table[(unsigned)ch];
538 }
539
jsondec_partialbase64(jsondec * d,const char * ptr,const char * end,char * out)540 static char *jsondec_partialbase64(jsondec *d, const char *ptr, const char *end,
541 char *out) {
542 int32_t val = -1;
543
544 switch (end - ptr) {
545 case 2:
546 val = jsondec_base64_tablelookup(ptr[0]) << 18 |
547 jsondec_base64_tablelookup(ptr[1]) << 12;
548 out[0] = val >> 16;
549 out += 1;
550 break;
551 case 3:
552 val = jsondec_base64_tablelookup(ptr[0]) << 18 |
553 jsondec_base64_tablelookup(ptr[1]) << 12 |
554 jsondec_base64_tablelookup(ptr[2]) << 6;
555 out[0] = val >> 16;
556 out[1] = (val >> 8) & 0xff;
557 out += 2;
558 break;
559 }
560
561 if (val < 0) {
562 jsondec_err(d, "Corrupt base64");
563 }
564
565 return out;
566 }
567
jsondec_base64(jsondec * d,upb_strview str)568 static size_t jsondec_base64(jsondec *d, upb_strview str) {
569 /* We decode in place. This is safe because this is a new buffer (not
570 * aliasing the input) and because base64 decoding shrinks 4 bytes into 3. */
571 char *out = (char*)str.data;
572 const char *ptr = str.data;
573 const char *end = ptr + str.size;
574 const char *end4 = ptr + (str.size & -4); /* Round down to multiple of 4. */
575
576 for (; ptr < end4; ptr += 4, out += 3) {
577 int val = jsondec_base64_tablelookup(ptr[0]) << 18 |
578 jsondec_base64_tablelookup(ptr[1]) << 12 |
579 jsondec_base64_tablelookup(ptr[2]) << 6 |
580 jsondec_base64_tablelookup(ptr[3]) << 0;
581
582 if (val < 0) {
583 /* Junk chars or padding. Remove trailing padding, if any. */
584 if (end - ptr == 4 && ptr[3] == '=') {
585 if (ptr[2] == '=') {
586 end -= 2;
587 } else {
588 end -= 1;
589 }
590 }
591 break;
592 }
593
594 out[0] = val >> 16;
595 out[1] = (val >> 8) & 0xff;
596 out[2] = val & 0xff;
597 }
598
599 if (ptr < end) {
600 /* Process remaining chars. We do not require padding. */
601 out = jsondec_partialbase64(d, ptr, end, out);
602 }
603
604 return out - str.data;
605 }
606
607 /* Low-level integer parsing **************************************************/
608
609 /* We use these hand-written routines instead of strto[u]l() because the "long
610 * long" variants aren't in c89. Also our version allows setting a ptr limit. */
611
jsondec_buftouint64(jsondec * d,const char * ptr,const char * end,uint64_t * val)612 static const char *jsondec_buftouint64(jsondec *d, const char *ptr,
613 const char *end, uint64_t *val) {
614 uint64_t u64 = 0;
615 while (ptr < end) {
616 unsigned ch = *ptr - '0';
617 if (ch >= 10) break;
618 if (u64 > UINT64_MAX / 10 || u64 * 10 > UINT64_MAX - ch) {
619 jsondec_err(d, "Integer overflow");
620 }
621 u64 *= 10;
622 u64 += ch;
623 ptr++;
624 }
625
626 *val = u64;
627 return ptr;
628 }
629
jsondec_buftoint64(jsondec * d,const char * ptr,const char * end,int64_t * val)630 static const char *jsondec_buftoint64(jsondec *d, const char *ptr,
631 const char *end, int64_t *val) {
632 bool neg = false;
633 uint64_t u64;
634
635 if (ptr != end && *ptr == '-') {
636 ptr++;
637 neg = true;
638 }
639
640 ptr = jsondec_buftouint64(d, ptr, end, &u64);
641 if (u64 > (uint64_t)INT64_MAX + neg) {
642 jsondec_err(d, "Integer overflow");
643 }
644
645 *val = neg ? -u64 : u64;
646 return ptr;
647 }
648
jsondec_strtouint64(jsondec * d,upb_strview str)649 static uint64_t jsondec_strtouint64(jsondec *d, upb_strview str) {
650 const char *end = str.data + str.size;
651 uint64_t ret;
652 if (jsondec_buftouint64(d, str.data, end, &ret) != end) {
653 jsondec_err(d, "Non-number characters in quoted integer");
654 }
655 return ret;
656 }
657
jsondec_strtoint64(jsondec * d,upb_strview str)658 static int64_t jsondec_strtoint64(jsondec *d, upb_strview str) {
659 const char *end = str.data + str.size;
660 int64_t ret;
661 if (jsondec_buftoint64(d, str.data, end, &ret) != end) {
662 jsondec_err(d, "Non-number characters in quoted integer");
663 }
664 return ret;
665 }
666
667 /* Primitive value types ******************************************************/
668
669 /* Parse INT32 or INT64 value. */
jsondec_int(jsondec * d,const upb_fielddef * f)670 static upb_msgval jsondec_int(jsondec *d, const upb_fielddef *f) {
671 upb_msgval val;
672
673 switch (jsondec_peek(d)) {
674 case JD_NUMBER: {
675 double dbl = jsondec_number(d);
676 if (dbl > 9223372036854774784.0 || dbl < -9223372036854775808.0) {
677 jsondec_err(d, "JSON number is out of range.");
678 }
679 val.int64_val = dbl; /* must be guarded, overflow here is UB */
680 if (val.int64_val != dbl) {
681 jsondec_errf(d, "JSON number was not integral (%d != %" PRId64 ")", dbl,
682 val.int64_val);
683 }
684 break;
685 }
686 case JD_STRING: {
687 upb_strview str = jsondec_string(d);
688 val.int64_val = jsondec_strtoint64(d, str);
689 break;
690 }
691 default:
692 jsondec_err(d, "Expected number or string");
693 }
694
695 if (upb_fielddef_type(f) == UPB_TYPE_INT32) {
696 if (val.int64_val > INT32_MAX || val.int64_val < INT32_MIN) {
697 jsondec_err(d, "Integer out of range.");
698 }
699 val.int32_val = (int32_t)val.int64_val;
700 }
701
702 return val;
703 }
704
705 /* Parse UINT32 or UINT64 value. */
jsondec_uint(jsondec * d,const upb_fielddef * f)706 static upb_msgval jsondec_uint(jsondec *d, const upb_fielddef *f) {
707 upb_msgval val;
708
709 switch (jsondec_peek(d)) {
710 case JD_NUMBER: {
711 double dbl = jsondec_number(d);
712 if (dbl > 18446744073709549568.0 || dbl < 0) {
713 jsondec_err(d, "JSON number is out of range.");
714 }
715 val.uint64_val = dbl; /* must be guarded, overflow here is UB */
716 if (val.uint64_val != dbl) {
717 jsondec_errf(d, "JSON number was not integral (%d != %" PRIu64 ")", dbl,
718 val.uint64_val);
719 }
720 break;
721 }
722 case JD_STRING: {
723 upb_strview str = jsondec_string(d);
724 val.uint64_val = jsondec_strtouint64(d, str);
725 break;
726 }
727 default:
728 jsondec_err(d, "Expected number or string");
729 }
730
731 if (upb_fielddef_type(f) == UPB_TYPE_UINT32) {
732 if (val.uint64_val > UINT32_MAX) {
733 jsondec_err(d, "Integer out of range.");
734 }
735 val.uint32_val = (uint32_t)val.uint64_val;
736 }
737
738 return val;
739 }
740
741 /* Parse DOUBLE or FLOAT value. */
jsondec_double(jsondec * d,const upb_fielddef * f)742 static upb_msgval jsondec_double(jsondec *d, const upb_fielddef *f) {
743 upb_strview str;
744 upb_msgval val;
745
746 switch (jsondec_peek(d)) {
747 case JD_NUMBER:
748 val.double_val = jsondec_number(d);
749 break;
750 case JD_STRING:
751 str = jsondec_string(d);
752 if (jsondec_streql(str, "NaN")) {
753 val.double_val = NAN;
754 } else if (jsondec_streql(str, "Infinity")) {
755 val.double_val = INFINITY;
756 } else if (jsondec_streql(str, "-Infinity")) {
757 val.double_val = -INFINITY;
758 } else {
759 val.double_val = strtod(str.data, NULL);
760 }
761 break;
762 default:
763 jsondec_err(d, "Expected number or string");
764 }
765
766 if (upb_fielddef_type(f) == UPB_TYPE_FLOAT) {
767 if (val.double_val != INFINITY && val.double_val != -INFINITY &&
768 (val.double_val > FLT_MAX || val.double_val < -FLT_MAX)) {
769 jsondec_err(d, "Float out of range");
770 }
771 val.float_val = val.double_val;
772 }
773
774 return val;
775 }
776
777 /* Parse STRING or BYTES value. */
jsondec_strfield(jsondec * d,const upb_fielddef * f)778 static upb_msgval jsondec_strfield(jsondec *d, const upb_fielddef *f) {
779 upb_msgval val;
780 val.str_val = jsondec_string(d);
781 if (upb_fielddef_type(f) == UPB_TYPE_BYTES) {
782 val.str_val.size = jsondec_base64(d, val.str_val);
783 }
784 return val;
785 }
786
jsondec_enum(jsondec * d,const upb_fielddef * f)787 static upb_msgval jsondec_enum(jsondec *d, const upb_fielddef *f) {
788 switch (jsondec_peek(d)) {
789 case JD_STRING: {
790 const upb_enumdef *e = upb_fielddef_enumsubdef(f);
791 upb_strview str = jsondec_string(d);
792 upb_msgval val;
793 if (!upb_enumdef_ntoi(e, str.data, str.size, &val.int32_val)) {
794 if (d->options & UPB_JSONDEC_IGNOREUNKNOWN) {
795 val.int32_val = 0;
796 } else {
797 jsondec_errf(d, "Unknown enumerator: '" UPB_STRVIEW_FORMAT "'",
798 UPB_STRVIEW_ARGS(str));
799 }
800 }
801 return val;
802 }
803 case JD_NULL: {
804 if (jsondec_isnullvalue(f)) {
805 upb_msgval val;
806 jsondec_null(d);
807 val.int32_val = 0;
808 return val;
809 }
810 }
811 /* Fallthrough. */
812 default:
813 return jsondec_int(d, f);
814 }
815 }
816
jsondec_bool(jsondec * d,const upb_fielddef * f)817 static upb_msgval jsondec_bool(jsondec *d, const upb_fielddef *f) {
818 bool is_map_key = upb_fielddef_number(f) == 1 &&
819 upb_msgdef_mapentry(upb_fielddef_containingtype(f));
820 upb_msgval val;
821
822 if (is_map_key) {
823 upb_strview str = jsondec_string(d);
824 if (jsondec_streql(str, "true")) {
825 val.bool_val = true;
826 } else if (jsondec_streql(str, "false")) {
827 val.bool_val = false;
828 } else {
829 jsondec_err(d, "Invalid boolean map key");
830 }
831 } else {
832 switch (jsondec_peek(d)) {
833 case JD_TRUE:
834 val.bool_val = true;
835 jsondec_true(d);
836 break;
837 case JD_FALSE:
838 val.bool_val = false;
839 jsondec_false(d);
840 break;
841 default:
842 jsondec_err(d, "Expected true or false");
843 }
844 }
845
846 return val;
847 }
848
849 /* Composite types (array/message/map) ****************************************/
850
jsondec_array(jsondec * d,upb_msg * msg,const upb_fielddef * f)851 static void jsondec_array(jsondec *d, upb_msg *msg, const upb_fielddef *f) {
852 upb_array *arr = upb_msg_mutable(msg, f, d->arena).array;
853
854 jsondec_arrstart(d);
855 while (jsondec_arrnext(d)) {
856 upb_msgval elem = jsondec_value(d, f);
857 upb_array_append(arr, elem, d->arena);
858 }
859 jsondec_arrend(d);
860 }
861
jsondec_map(jsondec * d,upb_msg * msg,const upb_fielddef * f)862 static void jsondec_map(jsondec *d, upb_msg *msg, const upb_fielddef *f) {
863 upb_map *map = upb_msg_mutable(msg, f, d->arena).map;
864 const upb_msgdef *entry = upb_fielddef_msgsubdef(f);
865 const upb_fielddef *key_f = upb_msgdef_itof(entry, 1);
866 const upb_fielddef *val_f = upb_msgdef_itof(entry, 2);
867
868 jsondec_objstart(d);
869 while (jsondec_objnext(d)) {
870 upb_msgval key, val;
871 key = jsondec_value(d, key_f);
872 jsondec_entrysep(d);
873 val = jsondec_value(d, val_f);
874 upb_map_set(map, key, val, d->arena);
875 }
876 jsondec_objend(d);
877 }
878
jsondec_tomsg(jsondec * d,upb_msg * msg,const upb_msgdef * m)879 static void jsondec_tomsg(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
880 if (upb_msgdef_wellknowntype(m) == UPB_WELLKNOWN_UNSPECIFIED) {
881 jsondec_object(d, msg, m);
882 } else {
883 jsondec_wellknown(d, msg, m);
884 }
885 }
886
jsondec_msg(jsondec * d,const upb_fielddef * f)887 static upb_msgval jsondec_msg(jsondec *d, const upb_fielddef *f) {
888 const upb_msgdef *m = upb_fielddef_msgsubdef(f);
889 upb_msg *msg = upb_msg_new(m, d->arena);
890 upb_msgval val;
891
892 jsondec_tomsg(d, msg, m);
893 val.msg_val = msg;
894 return val;
895 }
896
jsondec_field(jsondec * d,upb_msg * msg,const upb_msgdef * m)897 static void jsondec_field(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
898 upb_strview name;
899 const upb_fielddef *f;
900 const upb_fielddef *preserved;
901
902 name = jsondec_string(d);
903 jsondec_entrysep(d);
904 f = upb_msgdef_lookupjsonname(m, name.data, name.size);
905
906 if (!f) {
907 if ((d->options & UPB_JSONDEC_IGNOREUNKNOWN) == 0) {
908 jsondec_errf(d, "Unknown field: '" UPB_STRVIEW_FORMAT "'",
909 UPB_STRVIEW_ARGS(name));
910 }
911 jsondec_skipval(d);
912 return;
913 }
914
915 if (upb_fielddef_realcontainingoneof(f) &&
916 upb_msg_whichoneof(msg, upb_fielddef_containingoneof(f))) {
917 jsondec_err(d, "More than one field for this oneof.");
918 }
919
920 if (jsondec_peek(d) == JD_NULL && !jsondec_isvalue(f)) {
921 /* JSON "null" indicates a default value, so no need to set anything. */
922 jsondec_null(d);
923 return;
924 }
925
926 preserved = d->debug_field;
927 d->debug_field = f;
928
929 if (upb_fielddef_ismap(f)) {
930 jsondec_map(d, msg, f);
931 } else if (upb_fielddef_isseq(f)) {
932 jsondec_array(d, msg, f);
933 } else if (upb_fielddef_issubmsg(f)) {
934 upb_msg *submsg = upb_msg_mutable(msg, f, d->arena).msg;
935 const upb_msgdef *subm = upb_fielddef_msgsubdef(f);
936 jsondec_tomsg(d, submsg, subm);
937 } else {
938 upb_msgval val = jsondec_value(d, f);
939 upb_msg_set(msg, f, val, d->arena);
940 }
941
942 d->debug_field = preserved;
943 }
944
jsondec_object(jsondec * d,upb_msg * msg,const upb_msgdef * m)945 static void jsondec_object(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
946 jsondec_objstart(d);
947 while (jsondec_objnext(d)) {
948 jsondec_field(d, msg, m);
949 }
950 jsondec_objend(d);
951 }
952
jsondec_value(jsondec * d,const upb_fielddef * f)953 static upb_msgval jsondec_value(jsondec *d, const upb_fielddef *f) {
954 switch (upb_fielddef_type(f)) {
955 case UPB_TYPE_BOOL:
956 return jsondec_bool(d, f);
957 case UPB_TYPE_FLOAT:
958 case UPB_TYPE_DOUBLE:
959 return jsondec_double(d, f);
960 case UPB_TYPE_UINT32:
961 case UPB_TYPE_UINT64:
962 return jsondec_uint(d, f);
963 case UPB_TYPE_INT32:
964 case UPB_TYPE_INT64:
965 return jsondec_int(d, f);
966 case UPB_TYPE_STRING:
967 case UPB_TYPE_BYTES:
968 return jsondec_strfield(d, f);
969 case UPB_TYPE_ENUM:
970 return jsondec_enum(d, f);
971 case UPB_TYPE_MESSAGE:
972 return jsondec_msg(d, f);
973 default:
974 UPB_UNREACHABLE();
975 }
976 }
977
978 /* Well-known types ***********************************************************/
979
jsondec_tsdigits(jsondec * d,const char ** ptr,size_t digits,const char * after)980 static int jsondec_tsdigits(jsondec *d, const char **ptr, size_t digits,
981 const char *after) {
982 uint64_t val;
983 const char *p = *ptr;
984 const char *end = p + digits;
985 size_t after_len = after ? strlen(after) : 0;
986
987 UPB_ASSERT(digits <= 9); /* int can't overflow. */
988
989 if (jsondec_buftouint64(d, p, end, &val) != end ||
990 (after_len && memcmp(end, after, after_len) != 0)) {
991 jsondec_err(d, "Malformed timestamp");
992 }
993
994 UPB_ASSERT(val < INT_MAX);
995
996 *ptr = end + after_len;
997 return (int)val;
998 }
999
jsondec_nanos(jsondec * d,const char ** ptr,const char * end)1000 static int jsondec_nanos(jsondec *d, const char **ptr, const char *end) {
1001 uint64_t nanos = 0;
1002 const char *p = *ptr;
1003
1004 if (p != end && *p == '.') {
1005 const char *nano_end = jsondec_buftouint64(d, p + 1, end, &nanos);
1006 int digits = (int)(nano_end - p - 1);
1007 int exp_lg10 = 9 - digits;
1008 if (digits > 9) {
1009 jsondec_err(d, "Too many digits for partial seconds");
1010 }
1011 while (exp_lg10--) nanos *= 10;
1012 *ptr = nano_end;
1013 }
1014
1015 UPB_ASSERT(nanos < INT_MAX);
1016
1017 return (int)nanos;
1018 }
1019
1020 /* jsondec_epochdays(1970, 1, 1) == 1970-01-01 == 0. */
jsondec_epochdays(int y,int m,int d)1021 int jsondec_epochdays(int y, int m, int d) {
1022 const uint32_t year_base = 4800; /* Before min year, multiple of 400. */
1023 const uint32_t m_adj = m - 3; /* March-based month. */
1024 const uint32_t carry = m_adj > (uint32_t)m ? 1 : 0;
1025 const uint32_t adjust = carry ? 12 : 0;
1026 const uint32_t y_adj = y + year_base - carry;
1027 const uint32_t month_days = ((m_adj + adjust) * 62719 + 769) / 2048;
1028 const uint32_t leap_days = y_adj / 4 - y_adj / 100 + y_adj / 400;
1029 return y_adj * 365 + leap_days + month_days + (d - 1) - 2472632;
1030 }
1031
jsondec_unixtime(int y,int m,int d,int h,int min,int s)1032 static int64_t jsondec_unixtime(int y, int m, int d, int h, int min, int s) {
1033 return (int64_t)jsondec_epochdays(y, m, d) * 86400 + h * 3600 + min * 60 + s;
1034 }
1035
jsondec_timestamp(jsondec * d,upb_msg * msg,const upb_msgdef * m)1036 static void jsondec_timestamp(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
1037 upb_msgval seconds;
1038 upb_msgval nanos;
1039 upb_strview str = jsondec_string(d);
1040 const char *ptr = str.data;
1041 const char *end = ptr + str.size;
1042
1043 if (str.size < 20) goto malformed;
1044
1045 {
1046 /* 1972-01-01T01:00:00 */
1047 int year = jsondec_tsdigits(d, &ptr, 4, "-");
1048 int mon = jsondec_tsdigits(d, &ptr, 2, "-");
1049 int day = jsondec_tsdigits(d, &ptr, 2, "T");
1050 int hour = jsondec_tsdigits(d, &ptr, 2, ":");
1051 int min = jsondec_tsdigits(d, &ptr, 2, ":");
1052 int sec = jsondec_tsdigits(d, &ptr, 2, NULL);
1053
1054 seconds.int64_val = jsondec_unixtime(year, mon, day, hour, min, sec);
1055 }
1056
1057 nanos.int32_val = jsondec_nanos(d, &ptr, end);
1058
1059 {
1060 /* [+-]08:00 or Z */
1061 int ofs = 0;
1062 bool neg = false;
1063
1064 if (ptr == end) goto malformed;
1065
1066 switch (*ptr++) {
1067 case '-':
1068 neg = true;
1069 /* fallthrough */
1070 case '+':
1071 if ((end - ptr) != 5) goto malformed;
1072 ofs = jsondec_tsdigits(d, &ptr, 2, ":00");
1073 ofs *= 60 * 60;
1074 seconds.int64_val += (neg ? ofs : -ofs);
1075 break;
1076 case 'Z':
1077 if (ptr != end) goto malformed;
1078 break;
1079 default:
1080 goto malformed;
1081 }
1082 }
1083
1084 if (seconds.int64_val < -62135596800) {
1085 jsondec_err(d, "Timestamp out of range");
1086 }
1087
1088 upb_msg_set(msg, upb_msgdef_itof(m, 1), seconds, d->arena);
1089 upb_msg_set(msg, upb_msgdef_itof(m, 2), nanos, d->arena);
1090 return;
1091
1092 malformed:
1093 jsondec_err(d, "Malformed timestamp");
1094 }
1095
jsondec_duration(jsondec * d,upb_msg * msg,const upb_msgdef * m)1096 static void jsondec_duration(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
1097 upb_msgval seconds;
1098 upb_msgval nanos;
1099 upb_strview str = jsondec_string(d);
1100 const char *ptr = str.data;
1101 const char *end = ptr + str.size;
1102 const int64_t max = (uint64_t)3652500 * 86400;
1103
1104 /* "3.000000001s", "3s", etc. */
1105 ptr = jsondec_buftoint64(d, ptr, end, &seconds.int64_val);
1106 nanos.int32_val = jsondec_nanos(d, &ptr, end);
1107
1108 if (end - ptr != 1 || *ptr != 's') {
1109 jsondec_err(d, "Malformed duration");
1110 }
1111
1112 if (seconds.int64_val < -max || seconds.int64_val > max) {
1113 jsondec_err(d, "Duration out of range");
1114 }
1115
1116 if (seconds.int64_val < 0) {
1117 nanos.int32_val = - nanos.int32_val;
1118 }
1119
1120 upb_msg_set(msg, upb_msgdef_itof(m, 1), seconds, d->arena);
1121 upb_msg_set(msg, upb_msgdef_itof(m, 2), nanos, d->arena);
1122 }
1123
jsondec_listvalue(jsondec * d,upb_msg * msg,const upb_msgdef * m)1124 static void jsondec_listvalue(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
1125 const upb_fielddef *values_f = upb_msgdef_itof(m, 1);
1126 const upb_msgdef *value_m = upb_fielddef_msgsubdef(values_f);
1127 upb_array *values = upb_msg_mutable(msg, values_f, d->arena).array;
1128
1129 jsondec_arrstart(d);
1130 while (jsondec_arrnext(d)) {
1131 upb_msg *value_msg = upb_msg_new(value_m, d->arena);
1132 upb_msgval value;
1133 value.msg_val = value_msg;
1134 upb_array_append(values, value, d->arena);
1135 jsondec_wellknownvalue(d, value_msg, value_m);
1136 }
1137 jsondec_arrend(d);
1138 }
1139
jsondec_struct(jsondec * d,upb_msg * msg,const upb_msgdef * m)1140 static void jsondec_struct(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
1141 const upb_fielddef *fields_f = upb_msgdef_itof(m, 1);
1142 const upb_msgdef *entry_m = upb_fielddef_msgsubdef(fields_f);
1143 const upb_fielddef *value_f = upb_msgdef_itof(entry_m, 2);
1144 const upb_msgdef *value_m = upb_fielddef_msgsubdef(value_f);
1145 upb_map *fields = upb_msg_mutable(msg, fields_f, d->arena).map;
1146
1147 jsondec_objstart(d);
1148 while (jsondec_objnext(d)) {
1149 upb_msgval key, value;
1150 upb_msg *value_msg = upb_msg_new(value_m, d->arena);
1151 key.str_val = jsondec_string(d);
1152 value.msg_val = value_msg;
1153 upb_map_set(fields, key, value, d->arena);
1154 jsondec_entrysep(d);
1155 jsondec_wellknownvalue(d, value_msg, value_m);
1156 }
1157 jsondec_objend(d);
1158 }
1159
jsondec_wellknownvalue(jsondec * d,upb_msg * msg,const upb_msgdef * m)1160 static void jsondec_wellknownvalue(jsondec *d, upb_msg *msg,
1161 const upb_msgdef *m) {
1162 upb_msgval val;
1163 const upb_fielddef *f;
1164 upb_msg *submsg;
1165
1166 switch (jsondec_peek(d)) {
1167 case JD_NUMBER:
1168 /* double number_value = 2; */
1169 f = upb_msgdef_itof(m, 2);
1170 val.double_val = jsondec_number(d);
1171 break;
1172 case JD_STRING:
1173 /* string string_value = 3; */
1174 f = upb_msgdef_itof(m, 3);
1175 val.str_val = jsondec_string(d);
1176 break;
1177 case JD_FALSE:
1178 /* bool bool_value = 4; */
1179 f = upb_msgdef_itof(m, 4);
1180 val.bool_val = false;
1181 jsondec_false(d);
1182 break;
1183 case JD_TRUE:
1184 /* bool bool_value = 4; */
1185 f = upb_msgdef_itof(m, 4);
1186 val.bool_val = true;
1187 jsondec_true(d);
1188 break;
1189 case JD_NULL:
1190 /* NullValue null_value = 1; */
1191 f = upb_msgdef_itof(m, 1);
1192 val.int32_val = 0;
1193 jsondec_null(d);
1194 break;
1195 /* Note: these cases return, because upb_msg_mutable() is enough. */
1196 case JD_OBJECT:
1197 /* Struct struct_value = 5; */
1198 f = upb_msgdef_itof(m, 5);
1199 submsg = upb_msg_mutable(msg, f, d->arena).msg;
1200 jsondec_struct(d, submsg, upb_fielddef_msgsubdef(f));
1201 return;
1202 case JD_ARRAY:
1203 /* ListValue list_value = 6; */
1204 f = upb_msgdef_itof(m, 6);
1205 submsg = upb_msg_mutable(msg, f, d->arena).msg;
1206 jsondec_listvalue(d, submsg, upb_fielddef_msgsubdef(f));
1207 return;
1208 default:
1209 UPB_UNREACHABLE();
1210 }
1211
1212 upb_msg_set(msg, f, val, d->arena);
1213 }
1214
jsondec_mask(jsondec * d,const char * buf,const char * end)1215 static upb_strview jsondec_mask(jsondec *d, const char *buf, const char *end) {
1216 /* FieldMask fields grow due to inserted '_' characters, so we can't do the
1217 * transform in place. */
1218 const char *ptr = buf;
1219 upb_strview ret;
1220 char *out;
1221
1222 ret.size = end - ptr;
1223 while (ptr < end) {
1224 ret.size += (*ptr >= 'A' && *ptr <= 'Z');
1225 ptr++;
1226 }
1227
1228 out = upb_arena_malloc(d->arena, ret.size);
1229 ptr = buf;
1230 ret.data = out;
1231
1232 while (ptr < end) {
1233 char ch = *ptr++;
1234 if (ch >= 'A' && ch <= 'Z') {
1235 *out++ = '_';
1236 *out++ = ch + 32;
1237 } else if (ch == '_') {
1238 jsondec_err(d, "field mask may not contain '_'");
1239 } else {
1240 *out++ = ch;
1241 }
1242 }
1243
1244 return ret;
1245 }
1246
jsondec_fieldmask(jsondec * d,upb_msg * msg,const upb_msgdef * m)1247 static void jsondec_fieldmask(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
1248 /* repeated string paths = 1; */
1249 const upb_fielddef *paths_f = upb_msgdef_itof(m, 1);
1250 upb_array *arr = upb_msg_mutable(msg, paths_f, d->arena).array;
1251 upb_strview str = jsondec_string(d);
1252 const char *ptr = str.data;
1253 const char *end = ptr + str.size;
1254 upb_msgval val;
1255
1256 while (ptr < end) {
1257 const char *elem_end = memchr(ptr, ',', end - ptr);
1258 if (elem_end) {
1259 val.str_val = jsondec_mask(d, ptr, elem_end);
1260 ptr = elem_end + 1;
1261 } else {
1262 val.str_val = jsondec_mask(d, ptr, end);
1263 ptr = end;
1264 }
1265 upb_array_append(arr, val, d->arena);
1266 }
1267 }
1268
jsondec_anyfield(jsondec * d,upb_msg * msg,const upb_msgdef * m)1269 static void jsondec_anyfield(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
1270 if (upb_msgdef_wellknowntype(m) == UPB_WELLKNOWN_UNSPECIFIED) {
1271 /* For regular types: {"@type": "[user type]", "f1": <V1>, "f2": <V2>}
1272 * where f1, f2, etc. are the normal fields of this type. */
1273 jsondec_field(d, msg, m);
1274 } else {
1275 /* For well-known types: {"@type": "[well-known type]", "value": <X>}
1276 * where <X> is whatever encoding the WKT normally uses. */
1277 upb_strview str = jsondec_string(d);
1278 jsondec_entrysep(d);
1279 if (!jsondec_streql(str, "value")) {
1280 jsondec_err(d, "Key for well-known type must be 'value'");
1281 }
1282 jsondec_wellknown(d, msg, m);
1283 }
1284 }
1285
jsondec_typeurl(jsondec * d,upb_msg * msg,const upb_msgdef * m)1286 static const upb_msgdef *jsondec_typeurl(jsondec *d, upb_msg *msg,
1287 const upb_msgdef *m) {
1288 const upb_fielddef *type_url_f = upb_msgdef_itof(m, 1);
1289 const upb_msgdef *type_m;
1290 upb_strview type_url = jsondec_string(d);
1291 const char *end = type_url.data + type_url.size;
1292 const char *ptr = end;
1293 upb_msgval val;
1294
1295 val.str_val = type_url;
1296 upb_msg_set(msg, type_url_f, val, d->arena);
1297
1298 /* Find message name after the last '/' */
1299 while (ptr > type_url.data && *--ptr != '/') {}
1300
1301 if (ptr == type_url.data || ptr == end) {
1302 jsondec_err(d, "Type url must have at least one '/' and non-empty host");
1303 }
1304
1305 ptr++;
1306 type_m = upb_symtab_lookupmsg2(d->any_pool, ptr, end - ptr);
1307
1308 if (!type_m) {
1309 jsondec_err(d, "Type was not found");
1310 }
1311
1312 return type_m;
1313 }
1314
jsondec_any(jsondec * d,upb_msg * msg,const upb_msgdef * m)1315 static void jsondec_any(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
1316 /* string type_url = 1;
1317 * bytes value = 2; */
1318 const upb_fielddef *value_f = upb_msgdef_itof(m, 2);
1319 upb_msg *any_msg;
1320 const upb_msgdef *any_m = NULL;
1321 const char *pre_type_data = NULL;
1322 const char *pre_type_end = NULL;
1323 upb_msgval encoded;
1324
1325 jsondec_objstart(d);
1326
1327 /* Scan looking for "@type", which is not necessarily first. */
1328 while (!any_m && jsondec_objnext(d)) {
1329 const char *start = d->ptr;
1330 upb_strview name = jsondec_string(d);
1331 jsondec_entrysep(d);
1332 if (jsondec_streql(name, "@type")) {
1333 any_m = jsondec_typeurl(d, msg, m);
1334 if (pre_type_data) {
1335 pre_type_end = start;
1336 while (*pre_type_end != ',') pre_type_end--;
1337 }
1338 } else {
1339 if (!pre_type_data) pre_type_data = start;
1340 jsondec_skipval(d);
1341 }
1342 }
1343
1344 if (!any_m) {
1345 jsondec_err(d, "Any object didn't contain a '@type' field");
1346 }
1347
1348 any_msg = upb_msg_new(any_m, d->arena);
1349
1350 if (pre_type_data) {
1351 size_t len = pre_type_end - pre_type_data + 1;
1352 char *tmp = upb_arena_malloc(d->arena, len);
1353 const char *saved_ptr = d->ptr;
1354 const char *saved_end = d->end;
1355 memcpy(tmp, pre_type_data, len - 1);
1356 tmp[len - 1] = '}';
1357 d->ptr = tmp;
1358 d->end = tmp + len;
1359 d->is_first = true;
1360 while (jsondec_objnext(d)) {
1361 jsondec_anyfield(d, any_msg, any_m);
1362 }
1363 d->ptr = saved_ptr;
1364 d->end = saved_end;
1365 }
1366
1367 while (jsondec_objnext(d)) {
1368 jsondec_anyfield(d, any_msg, any_m);
1369 }
1370
1371 jsondec_objend(d);
1372
1373 encoded.str_val.data = upb_encode(any_msg, upb_msgdef_layout(any_m), d->arena,
1374 &encoded.str_val.size);
1375 upb_msg_set(msg, value_f, encoded, d->arena);
1376 }
1377
jsondec_wrapper(jsondec * d,upb_msg * msg,const upb_msgdef * m)1378 static void jsondec_wrapper(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
1379 const upb_fielddef *value_f = upb_msgdef_itof(m, 1);
1380 upb_msgval val = jsondec_value(d, value_f);
1381 upb_msg_set(msg, value_f, val, d->arena);
1382 }
1383
jsondec_wellknown(jsondec * d,upb_msg * msg,const upb_msgdef * m)1384 static void jsondec_wellknown(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
1385 switch (upb_msgdef_wellknowntype(m)) {
1386 case UPB_WELLKNOWN_ANY:
1387 jsondec_any(d, msg, m);
1388 break;
1389 case UPB_WELLKNOWN_FIELDMASK:
1390 jsondec_fieldmask(d, msg, m);
1391 break;
1392 case UPB_WELLKNOWN_DURATION:
1393 jsondec_duration(d, msg, m);
1394 break;
1395 case UPB_WELLKNOWN_TIMESTAMP:
1396 jsondec_timestamp(d, msg, m);
1397 break;
1398 case UPB_WELLKNOWN_VALUE:
1399 jsondec_wellknownvalue(d, msg, m);
1400 break;
1401 case UPB_WELLKNOWN_LISTVALUE:
1402 jsondec_listvalue(d, msg, m);
1403 break;
1404 case UPB_WELLKNOWN_STRUCT:
1405 jsondec_struct(d, msg, m);
1406 break;
1407 case UPB_WELLKNOWN_DOUBLEVALUE:
1408 case UPB_WELLKNOWN_FLOATVALUE:
1409 case UPB_WELLKNOWN_INT64VALUE:
1410 case UPB_WELLKNOWN_UINT64VALUE:
1411 case UPB_WELLKNOWN_INT32VALUE:
1412 case UPB_WELLKNOWN_UINT32VALUE:
1413 case UPB_WELLKNOWN_STRINGVALUE:
1414 case UPB_WELLKNOWN_BYTESVALUE:
1415 case UPB_WELLKNOWN_BOOLVALUE:
1416 jsondec_wrapper(d, msg, m);
1417 break;
1418 default:
1419 UPB_UNREACHABLE();
1420 }
1421 }
1422
upb_json_decode(const char * buf,size_t size,upb_msg * msg,const upb_msgdef * m,const upb_symtab * any_pool,int options,upb_arena * arena,upb_status * status)1423 bool upb_json_decode(const char *buf, size_t size, upb_msg *msg,
1424 const upb_msgdef *m, const upb_symtab *any_pool,
1425 int options, upb_arena *arena, upb_status *status) {
1426 jsondec d;
1427 d.ptr = buf;
1428 d.end = buf + size;
1429 d.arena = arena;
1430 d.any_pool = any_pool;
1431 d.status = status;
1432 d.options = options;
1433 d.depth = 64;
1434 d.line = 1;
1435 d.line_begin = d.ptr;
1436 d.debug_field = NULL;
1437 d.is_first = false;
1438
1439 if (UPB_SETJMP(d.err)) return false;
1440
1441 jsondec_tomsg(&d, msg, m);
1442 return true;
1443 }
1444