1 
2 #include "upb/json_decode.h"
3 
4 #include <errno.h>
5 #include <float.h>
6 #include <inttypes.h>
7 #include <limits.h>
8 #include <math.h>
9 #include <setjmp.h>
10 #include <stdlib.h>
11 #include <string.h>
12 
13 #include "upb/encode.h"
14 #include "upb/reflection.h"
15 
16 /* Special header, must be included last. */
17 #include "upb/port_def.inc"
18 
19 typedef struct {
20   const char *ptr, *end;
21   upb_arena *arena;  /* TODO: should we have a tmp arena for tmp data? */
22   const upb_symtab *any_pool;
23   int depth;
24   upb_status *status;
25   jmp_buf err;
26   int line;
27   const char *line_begin;
28   bool is_first;
29   int options;
30   const upb_fielddef *debug_field;
31 } jsondec;
32 
33 enum { JD_OBJECT, JD_ARRAY, JD_STRING, JD_NUMBER, JD_TRUE, JD_FALSE, JD_NULL };
34 
35 /* Forward declarations of mutually-recursive functions. */
36 static void jsondec_wellknown(jsondec *d, upb_msg *msg, const upb_msgdef *m);
37 static upb_msgval jsondec_value(jsondec *d, const upb_fielddef *f);
38 static void jsondec_wellknownvalue(jsondec *d, upb_msg *msg,
39                                    const upb_msgdef *m);
40 static void jsondec_object(jsondec *d, upb_msg *msg, const upb_msgdef *m);
41 
jsondec_streql(upb_strview str,const char * lit)42 static bool jsondec_streql(upb_strview str, const char *lit) {
43   return str.size == strlen(lit) && memcmp(str.data, lit, str.size) == 0;
44 }
45 
jsondec_isnullvalue(const upb_fielddef * f)46 static bool jsondec_isnullvalue(const upb_fielddef *f) {
47   return upb_fielddef_type(f) == UPB_TYPE_ENUM &&
48          strcmp(upb_enumdef_fullname(upb_fielddef_enumsubdef(f)),
49                 "google.protobuf.NullValue") == 0;
50 }
51 
jsondec_isvalue(const upb_fielddef * f)52 static bool jsondec_isvalue(const upb_fielddef *f) {
53   return (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
54           upb_msgdef_wellknowntype(upb_fielddef_msgsubdef(f)) ==
55               UPB_WELLKNOWN_VALUE) ||
56          jsondec_isnullvalue(f);
57 }
58 
jsondec_err(jsondec * d,const char * msg)59 UPB_NORETURN static void jsondec_err(jsondec *d, const char *msg) {
60   upb_status_seterrf(d->status, "Error parsing JSON @%d:%d: %s", d->line,
61                      (int)(d->ptr - d->line_begin), msg);
62   UPB_LONGJMP(d->err, 1);
63 }
64 
jsondec_errf(jsondec * d,const char * fmt,...)65 UPB_NORETURN static void jsondec_errf(jsondec *d, const char *fmt, ...) {
66   va_list argp;
67   upb_status_seterrf(d->status, "Error parsing JSON @%d:%d: ", d->line,
68                      (int)(d->ptr - d->line_begin));
69   va_start(argp, fmt);
70   upb_status_vappenderrf(d->status, fmt, argp);
71   va_end(argp);
72   UPB_LONGJMP(d->err, 1);
73 }
74 
jsondec_skipws(jsondec * d)75 static void jsondec_skipws(jsondec *d) {
76   while (d->ptr != d->end) {
77     switch (*d->ptr) {
78       case '\n':
79         d->line++;
80         d->line_begin = d->ptr;
81         /* Fallthrough. */
82       case '\r':
83       case '\t':
84       case ' ':
85         d->ptr++;
86         break;
87       default:
88         return;
89     }
90   }
91   jsondec_err(d, "Unexpected EOF");
92 }
93 
jsondec_tryparsech(jsondec * d,char ch)94 static bool jsondec_tryparsech(jsondec *d, char ch) {
95   if (d->ptr == d->end || *d->ptr != ch) return false;
96   d->ptr++;
97   return true;
98 }
99 
jsondec_parselit(jsondec * d,const char * lit)100 static void jsondec_parselit(jsondec *d, const char *lit) {
101   size_t avail = d->end - d->ptr;
102   size_t len = strlen(lit);
103   if (avail < len || memcmp(d->ptr, lit, len) != 0) {
104     jsondec_errf(d, "Expected: '%s'", lit);
105   }
106   d->ptr += len;
107 }
108 
jsondec_wsch(jsondec * d,char ch)109 static void jsondec_wsch(jsondec *d, char ch) {
110   jsondec_skipws(d);
111   if (!jsondec_tryparsech(d, ch)) {
112     jsondec_errf(d, "Expected: '%c'", ch);
113   }
114 }
115 
jsondec_true(jsondec * d)116 static void jsondec_true(jsondec *d) { jsondec_parselit(d, "true"); }
jsondec_false(jsondec * d)117 static void jsondec_false(jsondec *d) { jsondec_parselit(d, "false"); }
jsondec_null(jsondec * d)118 static void jsondec_null(jsondec *d) { jsondec_parselit(d, "null"); }
119 
jsondec_entrysep(jsondec * d)120 static void jsondec_entrysep(jsondec *d) {
121   jsondec_skipws(d);
122   jsondec_parselit(d, ":");
123 }
124 
jsondec_rawpeek(jsondec * d)125 static int jsondec_rawpeek(jsondec *d) {
126   switch (*d->ptr) {
127     case '{':
128       return JD_OBJECT;
129     case '[':
130       return JD_ARRAY;
131     case '"':
132       return JD_STRING;
133     case '-':
134     case '0':
135     case '1':
136     case '2':
137     case '3':
138     case '4':
139     case '5':
140     case '6':
141     case '7':
142     case '8':
143     case '9':
144       return JD_NUMBER;
145     case 't':
146       return JD_TRUE;
147     case 'f':
148       return JD_FALSE;
149     case 'n':
150       return JD_NULL;
151     default:
152       jsondec_errf(d, "Unexpected character: '%c'", *d->ptr);
153   }
154 }
155 
156 /* JSON object/array **********************************************************/
157 
158 /* These are used like so:
159  *
160  * jsondec_objstart(d);
161  * while (jsondec_objnext(d)) {
162  *   ...
163  * }
164  * jsondec_objend(d) */
165 
jsondec_peek(jsondec * d)166 static int jsondec_peek(jsondec *d) {
167   jsondec_skipws(d);
168   return jsondec_rawpeek(d);
169 }
170 
jsondec_push(jsondec * d)171 static void jsondec_push(jsondec *d) {
172   if (--d->depth < 0) {
173     jsondec_err(d, "Recursion limit exceeded");
174   }
175   d->is_first = true;
176 }
177 
jsondec_seqnext(jsondec * d,char end_ch)178 static bool jsondec_seqnext(jsondec *d, char end_ch) {
179   bool is_first = d->is_first;
180   d->is_first = false;
181   jsondec_skipws(d);
182   if (*d->ptr == end_ch) return false;
183   if (!is_first) jsondec_parselit(d, ",");
184   return true;
185 }
186 
jsondec_arrstart(jsondec * d)187 static void jsondec_arrstart(jsondec *d) {
188   jsondec_push(d);
189   jsondec_wsch(d, '[');
190 }
191 
jsondec_arrend(jsondec * d)192 static void jsondec_arrend(jsondec *d) {
193   d->depth++;
194   jsondec_wsch(d, ']');
195 }
196 
jsondec_arrnext(jsondec * d)197 static bool jsondec_arrnext(jsondec *d) {
198   return jsondec_seqnext(d, ']');
199 }
200 
jsondec_objstart(jsondec * d)201 static void jsondec_objstart(jsondec *d) {
202   jsondec_push(d);
203   jsondec_wsch(d, '{');
204 }
205 
jsondec_objend(jsondec * d)206 static void jsondec_objend(jsondec *d) {
207   d->depth++;
208   jsondec_wsch(d, '}');
209 }
210 
jsondec_objnext(jsondec * d)211 static bool jsondec_objnext(jsondec *d) {
212   if (!jsondec_seqnext(d, '}')) return false;
213   if (jsondec_peek(d) != JD_STRING) {
214     jsondec_err(d, "Object must start with string");
215   }
216   return true;
217 }
218 
219 /* JSON number ****************************************************************/
220 
jsondec_tryskipdigits(jsondec * d)221 static bool jsondec_tryskipdigits(jsondec *d) {
222   const char *start = d->ptr;
223 
224   while (d->ptr < d->end) {
225     if (*d->ptr < '0' || *d->ptr > '9') {
226       break;
227     }
228     d->ptr++;
229   }
230 
231   return d->ptr != start;
232 }
233 
jsondec_skipdigits(jsondec * d)234 static void jsondec_skipdigits(jsondec *d) {
235   if (!jsondec_tryskipdigits(d)) {
236     jsondec_err(d, "Expected one or more digits");
237   }
238 }
239 
jsondec_number(jsondec * d)240 static double jsondec_number(jsondec *d) {
241   const char *start = d->ptr;
242 
243   assert(jsondec_rawpeek(d) == JD_NUMBER);
244 
245   /* Skip over the syntax of a number, as specified by JSON. */
246   if (*d->ptr == '-') d->ptr++;
247 
248   if (jsondec_tryparsech(d, '0')) {
249     if (jsondec_tryskipdigits(d)) {
250       jsondec_err(d, "number cannot have leading zero");
251     }
252   } else {
253     jsondec_skipdigits(d);
254   }
255 
256   if (d->ptr == d->end) goto parse;
257   if (jsondec_tryparsech(d, '.')) {
258     jsondec_skipdigits(d);
259   }
260   if (d->ptr == d->end) goto parse;
261 
262   if (*d->ptr == 'e' || *d->ptr == 'E') {
263     d->ptr++;
264     if (d->ptr == d->end) {
265       jsondec_err(d, "Unexpected EOF in number");
266     }
267     if (*d->ptr == '+' || *d->ptr == '-') {
268       d->ptr++;
269     }
270     jsondec_skipdigits(d);
271   }
272 
273 parse:
274   /* Having verified the syntax of a JSON number, use strtod() to parse
275    * (strtod() accepts a superset of JSON syntax). */
276   errno = 0;
277   {
278     char* end;
279     double val = strtod(start, &end);
280     assert(end == d->ptr);
281 
282     /* Currently the min/max-val conformance tests fail if we check this.  Does
283      * this mean the conformance tests are wrong or strtod() is wrong, or
284      * something else?  Investigate further. */
285     /*
286     if (errno == ERANGE) {
287       jsondec_err(d, "Number out of range");
288     }
289     */
290 
291     if (val > DBL_MAX || val < -DBL_MAX) {
292       jsondec_err(d, "Number out of range");
293     }
294 
295     return val;
296   }
297 }
298 
299 /* JSON string ****************************************************************/
300 
jsondec_escape(jsondec * d)301 static char jsondec_escape(jsondec *d) {
302   switch (*d->ptr++) {
303     case '"':
304       return '\"';
305     case '\\':
306       return '\\';
307     case '/':
308       return '/';
309     case 'b':
310       return '\b';
311     case 'f':
312       return '\f';
313     case 'n':
314       return '\n';
315     case 'r':
316       return '\r';
317     case 't':
318       return '\t';
319     default:
320       jsondec_err(d, "Invalid escape char");
321   }
322 }
323 
jsondec_codepoint(jsondec * d)324 static uint32_t jsondec_codepoint(jsondec *d) {
325   uint32_t cp = 0;
326   const char *end;
327 
328   if (d->end - d->ptr < 4) {
329     jsondec_err(d, "EOF inside string");
330   }
331 
332   end = d->ptr + 4;
333   while (d->ptr < end) {
334     char ch = *d->ptr++;
335     if (ch >= '0' && ch <= '9') {
336       ch -= '0';
337     } else if (ch >= 'a' && ch <= 'f') {
338       ch = ch - 'a' + 10;
339     } else if (ch >= 'A' && ch <= 'F') {
340       ch = ch - 'A' + 10;
341     } else {
342       jsondec_err(d, "Invalid hex digit");
343     }
344     cp = (cp << 4) | ch;
345   }
346 
347   return cp;
348 }
349 
350 /* Parses a \uXXXX unicode escape (possibly a surrogate pair). */
jsondec_unicode(jsondec * d,char * out)351 static size_t jsondec_unicode(jsondec *d, char* out) {
352   uint32_t cp = jsondec_codepoint(d);
353   if (cp >= 0xd800 && cp <= 0xdbff) {
354     /* Surrogate pair: two 16-bit codepoints become a 32-bit codepoint. */
355     uint32_t high = cp;
356     uint32_t low;
357     jsondec_parselit(d, "\\u");
358     low = jsondec_codepoint(d);
359     if (low < 0xdc00 || low > 0xdfff) {
360       jsondec_err(d, "Invalid low surrogate");
361     }
362     cp = (high & 0x3ff) << 10;
363     cp |= (low & 0x3ff);
364     cp += 0x10000;
365   } else if (cp >= 0xdc00 && cp <= 0xdfff) {
366     jsondec_err(d, "Unpaired low surrogate");
367   }
368 
369   /* Write to UTF-8 */
370   if (cp <= 0x7f) {
371     out[0] = cp;
372     return 1;
373   } else if (cp <= 0x07FF) {
374     out[0] = ((cp >> 6) & 0x1F) | 0xC0;
375     out[1] = ((cp >> 0) & 0x3F) | 0x80;
376     return 2;
377   } else if (cp <= 0xFFFF) {
378     out[0] = ((cp >> 12) & 0x0F) | 0xE0;
379     out[1] = ((cp >> 6) & 0x3F) | 0x80;
380     out[2] = ((cp >> 0) & 0x3F) | 0x80;
381     return 3;
382   } else if (cp < 0x10FFFF) {
383     out[0] = ((cp >> 18) & 0x07) | 0xF0;
384     out[1] = ((cp >> 12) & 0x3f) | 0x80;
385     out[2] = ((cp >> 6) & 0x3f) | 0x80;
386     out[3] = ((cp >> 0) & 0x3f) | 0x80;
387     return 4;
388   } else {
389     jsondec_err(d, "Invalid codepoint");
390   }
391 }
392 
jsondec_resize(jsondec * d,char ** buf,char ** end,char ** buf_end)393 static void jsondec_resize(jsondec *d, char **buf, char **end, char **buf_end) {
394   size_t oldsize = *buf_end - *buf;
395   size_t len = *end - *buf;
396   size_t size = UPB_MAX(8, 2 * oldsize);
397 
398   *buf = upb_arena_realloc(d->arena, *buf, len, size);
399   if (!*buf) jsondec_err(d, "Out of memory");
400 
401   *end = *buf + len;
402   *buf_end = *buf + size;
403 }
404 
jsondec_string(jsondec * d)405 static upb_strview jsondec_string(jsondec *d) {
406   char *buf = NULL;
407   char *end = NULL;
408   char *buf_end = NULL;
409 
410   jsondec_skipws(d);
411 
412   if (*d->ptr++ != '"') {
413     jsondec_err(d, "Expected string");
414   }
415 
416   while (d->ptr < d->end) {
417     char ch = *d->ptr++;
418 
419     if (end == buf_end) {
420       jsondec_resize(d, &buf, &end, &buf_end);
421     }
422 
423     switch (ch) {
424       case '"': {
425         upb_strview ret;
426         ret.data = buf;
427         ret.size = end - buf;
428         *end = '\0';  /* Needed for possible strtod(). */
429         return ret;
430       }
431       case '\\':
432         if (d->ptr == d->end) goto eof;
433         if (*d->ptr == 'u') {
434           d->ptr++;
435           if (buf_end - end < 4) {
436             /* Allow space for maximum-sized code point (4 bytes). */
437             jsondec_resize(d, &buf, &end, &buf_end);
438           }
439           end += jsondec_unicode(d, end);
440         } else {
441           *end++ = jsondec_escape(d);
442         }
443         break;
444       default:
445         if ((unsigned char)*d->ptr < 0x20) {
446           jsondec_err(d, "Invalid char in JSON string");
447         }
448         *end++ = ch;
449         break;
450     }
451   }
452 
453 eof:
454   jsondec_err(d, "EOF inside string");
455 }
456 
jsondec_skipval(jsondec * d)457 static void jsondec_skipval(jsondec *d) {
458   switch (jsondec_peek(d)) {
459     case JD_OBJECT:
460       jsondec_objstart(d);
461       while (jsondec_objnext(d)) {
462         jsondec_string(d);
463         jsondec_entrysep(d);
464         jsondec_skipval(d);
465       }
466       jsondec_objend(d);
467       break;
468     case JD_ARRAY:
469       jsondec_arrstart(d);
470       while (jsondec_arrnext(d)) {
471         jsondec_skipval(d);
472       }
473       jsondec_arrend(d);
474       break;
475     case JD_TRUE:
476       jsondec_true(d);
477       break;
478     case JD_FALSE:
479       jsondec_false(d);
480       break;
481     case JD_NULL:
482       jsondec_null(d);
483       break;
484     case JD_STRING:
485       jsondec_string(d);
486       break;
487     case JD_NUMBER:
488       jsondec_number(d);
489       break;
490   }
491 }
492 
493 /* Base64 decoding for bytes fields. ******************************************/
494 
jsondec_base64_tablelookup(const char ch)495 static unsigned int jsondec_base64_tablelookup(const char ch) {
496   /* Table includes the normal base64 chars plus the URL-safe variant. */
497   const signed char table[256] = {
498       -1,       -1,       -1,       -1,       -1,       -1,        -1,
499       -1,       -1,       -1,       -1,       -1,       -1,        -1,
500       -1,       -1,       -1,       -1,       -1,       -1,        -1,
501       -1,       -1,       -1,       -1,       -1,       -1,        -1,
502       -1,       -1,       -1,       -1,       -1,       -1,        -1,
503       -1,       -1,       -1,       -1,       -1,       -1,        -1,
504       -1,       62 /*+*/, -1,       62 /*-*/, -1,       63 /*/ */, 52 /*0*/,
505       53 /*1*/, 54 /*2*/, 55 /*3*/, 56 /*4*/, 57 /*5*/, 58 /*6*/,  59 /*7*/,
506       60 /*8*/, 61 /*9*/, -1,       -1,       -1,       -1,        -1,
507       -1,       -1,       0 /*A*/,  1 /*B*/,  2 /*C*/,  3 /*D*/,   4 /*E*/,
508       5 /*F*/,  6 /*G*/,  07 /*H*/, 8 /*I*/,  9 /*J*/,  10 /*K*/,  11 /*L*/,
509       12 /*M*/, 13 /*N*/, 14 /*O*/, 15 /*P*/, 16 /*Q*/, 17 /*R*/,  18 /*S*/,
510       19 /*T*/, 20 /*U*/, 21 /*V*/, 22 /*W*/, 23 /*X*/, 24 /*Y*/,  25 /*Z*/,
511       -1,       -1,       -1,       -1,       63 /*_*/, -1,        26 /*a*/,
512       27 /*b*/, 28 /*c*/, 29 /*d*/, 30 /*e*/, 31 /*f*/, 32 /*g*/,  33 /*h*/,
513       34 /*i*/, 35 /*j*/, 36 /*k*/, 37 /*l*/, 38 /*m*/, 39 /*n*/,  40 /*o*/,
514       41 /*p*/, 42 /*q*/, 43 /*r*/, 44 /*s*/, 45 /*t*/, 46 /*u*/,  47 /*v*/,
515       48 /*w*/, 49 /*x*/, 50 /*y*/, 51 /*z*/, -1,       -1,        -1,
516       -1,       -1,       -1,       -1,       -1,       -1,        -1,
517       -1,       -1,       -1,       -1,       -1,       -1,        -1,
518       -1,       -1,       -1,       -1,       -1,       -1,        -1,
519       -1,       -1,       -1,       -1,       -1,       -1,        -1,
520       -1,       -1,       -1,       -1,       -1,       -1,        -1,
521       -1,       -1,       -1,       -1,       -1,       -1,        -1,
522       -1,       -1,       -1,       -1,       -1,       -1,        -1,
523       -1,       -1,       -1,       -1,       -1,       -1,        -1,
524       -1,       -1,       -1,       -1,       -1,       -1,        -1,
525       -1,       -1,       -1,       -1,       -1,       -1,        -1,
526       -1,       -1,       -1,       -1,       -1,       -1,        -1,
527       -1,       -1,       -1,       -1,       -1,       -1,        -1,
528       -1,       -1,       -1,       -1,       -1,       -1,        -1,
529       -1,       -1,       -1,       -1,       -1,       -1,        -1,
530       -1,       -1,       -1,       -1,       -1,       -1,        -1,
531       -1,       -1,       -1,       -1,       -1,       -1,        -1,
532       -1,       -1,       -1,       -1,       -1,       -1,        -1,
533       -1,       -1,       -1,       -1,       -1,       -1,        -1,
534       -1,       -1,       -1,       -1};
535 
536   /* Sign-extend return value so high bit will be set on any unexpected char. */
537   return table[(unsigned)ch];
538 }
539 
jsondec_partialbase64(jsondec * d,const char * ptr,const char * end,char * out)540 static char *jsondec_partialbase64(jsondec *d, const char *ptr, const char *end,
541                                    char *out) {
542   int32_t val = -1;
543 
544   switch (end - ptr) {
545     case 2:
546       val = jsondec_base64_tablelookup(ptr[0]) << 18 |
547             jsondec_base64_tablelookup(ptr[1]) << 12;
548       out[0] = val >> 16;
549       out += 1;
550       break;
551     case 3:
552       val = jsondec_base64_tablelookup(ptr[0]) << 18 |
553             jsondec_base64_tablelookup(ptr[1]) << 12 |
554             jsondec_base64_tablelookup(ptr[2]) << 6;
555       out[0] = val >> 16;
556       out[1] = (val >> 8) & 0xff;
557       out += 2;
558       break;
559   }
560 
561   if (val < 0) {
562     jsondec_err(d, "Corrupt base64");
563   }
564 
565   return out;
566 }
567 
jsondec_base64(jsondec * d,upb_strview str)568 static size_t jsondec_base64(jsondec *d, upb_strview str) {
569   /* We decode in place. This is safe because this is a new buffer (not
570    * aliasing the input) and because base64 decoding shrinks 4 bytes into 3. */
571   char *out = (char*)str.data;
572   const char *ptr = str.data;
573   const char *end = ptr + str.size;
574   const char *end4 = ptr + (str.size & -4);  /* Round down to multiple of 4. */
575 
576   for (; ptr < end4; ptr += 4, out += 3) {
577     int val = jsondec_base64_tablelookup(ptr[0]) << 18 |
578               jsondec_base64_tablelookup(ptr[1]) << 12 |
579               jsondec_base64_tablelookup(ptr[2]) << 6 |
580               jsondec_base64_tablelookup(ptr[3]) << 0;
581 
582     if (val < 0) {
583       /* Junk chars or padding. Remove trailing padding, if any. */
584       if (end - ptr == 4 && ptr[3] == '=') {
585         if (ptr[2] == '=') {
586           end -= 2;
587         } else {
588           end -= 1;
589         }
590       }
591       break;
592     }
593 
594     out[0] = val >> 16;
595     out[1] = (val >> 8) & 0xff;
596     out[2] = val & 0xff;
597   }
598 
599   if (ptr < end) {
600     /* Process remaining chars. We do not require padding. */
601     out = jsondec_partialbase64(d, ptr, end, out);
602   }
603 
604   return out - str.data;
605 }
606 
607 /* Low-level integer parsing **************************************************/
608 
609 /* We use these hand-written routines instead of strto[u]l() because the "long
610  * long" variants aren't in c89. Also our version allows setting a ptr limit. */
611 
jsondec_buftouint64(jsondec * d,const char * ptr,const char * end,uint64_t * val)612 static const char *jsondec_buftouint64(jsondec *d, const char *ptr,
613                                        const char *end, uint64_t *val) {
614   uint64_t u64 = 0;
615   while (ptr < end) {
616     unsigned ch = *ptr - '0';
617     if (ch >= 10) break;
618     if (u64 > UINT64_MAX / 10 || u64 * 10 > UINT64_MAX - ch) {
619       jsondec_err(d, "Integer overflow");
620     }
621     u64 *= 10;
622     u64 += ch;
623     ptr++;
624   }
625 
626   *val = u64;
627   return ptr;
628 }
629 
jsondec_buftoint64(jsondec * d,const char * ptr,const char * end,int64_t * val)630 static const char *jsondec_buftoint64(jsondec *d, const char *ptr,
631                                       const char *end, int64_t *val) {
632   bool neg = false;
633   uint64_t u64;
634 
635   if (ptr != end && *ptr == '-') {
636     ptr++;
637     neg = true;
638   }
639 
640   ptr = jsondec_buftouint64(d, ptr, end, &u64);
641   if (u64 > (uint64_t)INT64_MAX + neg) {
642     jsondec_err(d, "Integer overflow");
643   }
644 
645   *val = neg ? -u64 : u64;
646   return ptr;
647 }
648 
jsondec_strtouint64(jsondec * d,upb_strview str)649 static uint64_t jsondec_strtouint64(jsondec *d, upb_strview str) {
650   const char *end = str.data + str.size;
651   uint64_t ret;
652   if (jsondec_buftouint64(d, str.data, end, &ret) != end) {
653     jsondec_err(d, "Non-number characters in quoted integer");
654   }
655   return ret;
656 }
657 
jsondec_strtoint64(jsondec * d,upb_strview str)658 static int64_t jsondec_strtoint64(jsondec *d, upb_strview str) {
659   const char *end = str.data + str.size;
660   int64_t ret;
661   if (jsondec_buftoint64(d, str.data, end, &ret) != end) {
662     jsondec_err(d, "Non-number characters in quoted integer");
663   }
664   return ret;
665 }
666 
667 /* Primitive value types ******************************************************/
668 
669 /* Parse INT32 or INT64 value. */
jsondec_int(jsondec * d,const upb_fielddef * f)670 static upb_msgval jsondec_int(jsondec *d, const upb_fielddef *f) {
671   upb_msgval val;
672 
673   switch (jsondec_peek(d)) {
674     case JD_NUMBER: {
675       double dbl = jsondec_number(d);
676       if (dbl > 9223372036854774784.0 || dbl < -9223372036854775808.0) {
677         jsondec_err(d, "JSON number is out of range.");
678       }
679       val.int64_val = dbl;  /* must be guarded, overflow here is UB */
680       if (val.int64_val != dbl) {
681         jsondec_errf(d, "JSON number was not integral (%d != %" PRId64 ")", dbl,
682                      val.int64_val);
683       }
684       break;
685     }
686     case JD_STRING: {
687       upb_strview str = jsondec_string(d);
688       val.int64_val = jsondec_strtoint64(d, str);
689       break;
690     }
691     default:
692       jsondec_err(d, "Expected number or string");
693   }
694 
695   if (upb_fielddef_type(f) == UPB_TYPE_INT32) {
696     if (val.int64_val > INT32_MAX || val.int64_val < INT32_MIN) {
697       jsondec_err(d, "Integer out of range.");
698     }
699     val.int32_val = (int32_t)val.int64_val;
700   }
701 
702   return val;
703 }
704 
705 /* Parse UINT32 or UINT64 value. */
jsondec_uint(jsondec * d,const upb_fielddef * f)706 static upb_msgval jsondec_uint(jsondec *d, const upb_fielddef *f) {
707   upb_msgval val;
708 
709   switch (jsondec_peek(d)) {
710     case JD_NUMBER: {
711       double dbl = jsondec_number(d);
712       if (dbl > 18446744073709549568.0 || dbl < 0) {
713         jsondec_err(d, "JSON number is out of range.");
714       }
715       val.uint64_val = dbl;  /* must be guarded, overflow here is UB */
716       if (val.uint64_val != dbl) {
717         jsondec_errf(d, "JSON number was not integral (%d != %" PRIu64 ")", dbl,
718                      val.uint64_val);
719       }
720       break;
721     }
722     case JD_STRING: {
723       upb_strview str = jsondec_string(d);
724       val.uint64_val = jsondec_strtouint64(d, str);
725       break;
726     }
727     default:
728       jsondec_err(d, "Expected number or string");
729   }
730 
731   if (upb_fielddef_type(f) == UPB_TYPE_UINT32) {
732     if (val.uint64_val > UINT32_MAX) {
733       jsondec_err(d, "Integer out of range.");
734     }
735     val.uint32_val = (uint32_t)val.uint64_val;
736   }
737 
738   return val;
739 }
740 
741 /* Parse DOUBLE or FLOAT value. */
jsondec_double(jsondec * d,const upb_fielddef * f)742 static upb_msgval jsondec_double(jsondec *d, const upb_fielddef *f) {
743   upb_strview str;
744   upb_msgval val;
745 
746   switch (jsondec_peek(d)) {
747     case JD_NUMBER:
748       val.double_val = jsondec_number(d);
749       break;
750     case JD_STRING:
751       str = jsondec_string(d);
752       if (jsondec_streql(str, "NaN")) {
753         val.double_val = NAN;
754       } else if (jsondec_streql(str, "Infinity")) {
755         val.double_val = INFINITY;
756       } else if (jsondec_streql(str, "-Infinity")) {
757         val.double_val = -INFINITY;
758       } else {
759         val.double_val = strtod(str.data, NULL);
760       }
761       break;
762     default:
763       jsondec_err(d, "Expected number or string");
764   }
765 
766   if (upb_fielddef_type(f) == UPB_TYPE_FLOAT) {
767     if (val.double_val != INFINITY && val.double_val != -INFINITY &&
768         (val.double_val > FLT_MAX || val.double_val < -FLT_MAX)) {
769       jsondec_err(d, "Float out of range");
770     }
771     val.float_val = val.double_val;
772   }
773 
774   return val;
775 }
776 
777 /* Parse STRING or BYTES value. */
jsondec_strfield(jsondec * d,const upb_fielddef * f)778 static upb_msgval jsondec_strfield(jsondec *d, const upb_fielddef *f) {
779   upb_msgval val;
780   val.str_val = jsondec_string(d);
781   if (upb_fielddef_type(f) == UPB_TYPE_BYTES) {
782     val.str_val.size = jsondec_base64(d, val.str_val);
783   }
784   return val;
785 }
786 
jsondec_enum(jsondec * d,const upb_fielddef * f)787 static upb_msgval jsondec_enum(jsondec *d, const upb_fielddef *f) {
788   switch (jsondec_peek(d)) {
789     case JD_STRING: {
790       const upb_enumdef *e = upb_fielddef_enumsubdef(f);
791       upb_strview str = jsondec_string(d);
792       upb_msgval val;
793       if (!upb_enumdef_ntoi(e, str.data, str.size, &val.int32_val)) {
794         if (d->options & UPB_JSONDEC_IGNOREUNKNOWN) {
795           val.int32_val = 0;
796         } else {
797           jsondec_errf(d, "Unknown enumerator: '" UPB_STRVIEW_FORMAT "'",
798                        UPB_STRVIEW_ARGS(str));
799         }
800       }
801       return val;
802     }
803     case JD_NULL: {
804       if (jsondec_isnullvalue(f)) {
805         upb_msgval val;
806         jsondec_null(d);
807         val.int32_val = 0;
808         return val;
809       }
810     }
811       /* Fallthrough. */
812     default:
813       return jsondec_int(d, f);
814   }
815 }
816 
jsondec_bool(jsondec * d,const upb_fielddef * f)817 static upb_msgval jsondec_bool(jsondec *d, const upb_fielddef *f) {
818   bool is_map_key = upb_fielddef_number(f) == 1 &&
819                     upb_msgdef_mapentry(upb_fielddef_containingtype(f));
820   upb_msgval val;
821 
822   if (is_map_key) {
823     upb_strview str = jsondec_string(d);
824     if (jsondec_streql(str, "true")) {
825       val.bool_val = true;
826     } else if (jsondec_streql(str, "false")) {
827       val.bool_val = false;
828     } else {
829       jsondec_err(d, "Invalid boolean map key");
830     }
831   } else {
832     switch (jsondec_peek(d)) {
833       case JD_TRUE:
834         val.bool_val = true;
835         jsondec_true(d);
836         break;
837       case JD_FALSE:
838         val.bool_val = false;
839         jsondec_false(d);
840         break;
841       default:
842         jsondec_err(d, "Expected true or false");
843     }
844   }
845 
846   return val;
847 }
848 
849 /* Composite types (array/message/map) ****************************************/
850 
jsondec_array(jsondec * d,upb_msg * msg,const upb_fielddef * f)851 static void jsondec_array(jsondec *d, upb_msg *msg, const upb_fielddef *f) {
852   upb_array *arr = upb_msg_mutable(msg, f, d->arena).array;
853 
854   jsondec_arrstart(d);
855   while (jsondec_arrnext(d)) {
856     upb_msgval elem = jsondec_value(d, f);
857     upb_array_append(arr, elem, d->arena);
858   }
859   jsondec_arrend(d);
860 }
861 
jsondec_map(jsondec * d,upb_msg * msg,const upb_fielddef * f)862 static void jsondec_map(jsondec *d, upb_msg *msg, const upb_fielddef *f) {
863   upb_map *map = upb_msg_mutable(msg, f, d->arena).map;
864   const upb_msgdef *entry = upb_fielddef_msgsubdef(f);
865   const upb_fielddef *key_f = upb_msgdef_itof(entry, 1);
866   const upb_fielddef *val_f = upb_msgdef_itof(entry, 2);
867 
868   jsondec_objstart(d);
869   while (jsondec_objnext(d)) {
870     upb_msgval key, val;
871     key = jsondec_value(d, key_f);
872     jsondec_entrysep(d);
873     val = jsondec_value(d, val_f);
874     upb_map_set(map, key, val, d->arena);
875   }
876   jsondec_objend(d);
877 }
878 
jsondec_tomsg(jsondec * d,upb_msg * msg,const upb_msgdef * m)879 static void jsondec_tomsg(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
880   if (upb_msgdef_wellknowntype(m) == UPB_WELLKNOWN_UNSPECIFIED) {
881     jsondec_object(d, msg, m);
882   } else {
883     jsondec_wellknown(d, msg, m);
884   }
885 }
886 
jsondec_msg(jsondec * d,const upb_fielddef * f)887 static upb_msgval jsondec_msg(jsondec *d, const upb_fielddef *f) {
888   const upb_msgdef *m = upb_fielddef_msgsubdef(f);
889   upb_msg *msg = upb_msg_new(m, d->arena);
890   upb_msgval val;
891 
892   jsondec_tomsg(d, msg, m);
893   val.msg_val = msg;
894   return val;
895 }
896 
jsondec_field(jsondec * d,upb_msg * msg,const upb_msgdef * m)897 static void jsondec_field(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
898   upb_strview name;
899   const upb_fielddef *f;
900   const upb_fielddef *preserved;
901 
902   name = jsondec_string(d);
903   jsondec_entrysep(d);
904   f = upb_msgdef_lookupjsonname(m, name.data, name.size);
905 
906   if (!f) {
907     if ((d->options & UPB_JSONDEC_IGNOREUNKNOWN) == 0) {
908       jsondec_errf(d, "Unknown field: '" UPB_STRVIEW_FORMAT "'",
909                    UPB_STRVIEW_ARGS(name));
910     }
911     jsondec_skipval(d);
912     return;
913   }
914 
915   if (upb_fielddef_realcontainingoneof(f) &&
916       upb_msg_whichoneof(msg, upb_fielddef_containingoneof(f))) {
917     jsondec_err(d, "More than one field for this oneof.");
918   }
919 
920   if (jsondec_peek(d) == JD_NULL && !jsondec_isvalue(f)) {
921     /* JSON "null" indicates a default value, so no need to set anything. */
922     jsondec_null(d);
923     return;
924   }
925 
926   preserved = d->debug_field;
927   d->debug_field = f;
928 
929   if (upb_fielddef_ismap(f)) {
930     jsondec_map(d, msg, f);
931   } else if (upb_fielddef_isseq(f)) {
932     jsondec_array(d, msg, f);
933   } else if (upb_fielddef_issubmsg(f)) {
934     upb_msg *submsg = upb_msg_mutable(msg, f, d->arena).msg;
935     const upb_msgdef *subm = upb_fielddef_msgsubdef(f);
936     jsondec_tomsg(d, submsg, subm);
937   } else {
938     upb_msgval val = jsondec_value(d, f);
939     upb_msg_set(msg, f, val, d->arena);
940   }
941 
942   d->debug_field = preserved;
943 }
944 
jsondec_object(jsondec * d,upb_msg * msg,const upb_msgdef * m)945 static void jsondec_object(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
946   jsondec_objstart(d);
947   while (jsondec_objnext(d)) {
948     jsondec_field(d, msg, m);
949   }
950   jsondec_objend(d);
951 }
952 
jsondec_value(jsondec * d,const upb_fielddef * f)953 static upb_msgval jsondec_value(jsondec *d, const upb_fielddef *f) {
954   switch (upb_fielddef_type(f)) {
955     case UPB_TYPE_BOOL:
956       return jsondec_bool(d, f);
957     case UPB_TYPE_FLOAT:
958     case UPB_TYPE_DOUBLE:
959       return jsondec_double(d, f);
960     case UPB_TYPE_UINT32:
961     case UPB_TYPE_UINT64:
962       return jsondec_uint(d, f);
963     case UPB_TYPE_INT32:
964     case UPB_TYPE_INT64:
965       return jsondec_int(d, f);
966     case UPB_TYPE_STRING:
967     case UPB_TYPE_BYTES:
968       return jsondec_strfield(d, f);
969     case UPB_TYPE_ENUM:
970       return jsondec_enum(d, f);
971     case UPB_TYPE_MESSAGE:
972       return jsondec_msg(d, f);
973     default:
974       UPB_UNREACHABLE();
975   }
976 }
977 
978 /* Well-known types ***********************************************************/
979 
jsondec_tsdigits(jsondec * d,const char ** ptr,size_t digits,const char * after)980 static int jsondec_tsdigits(jsondec *d, const char **ptr, size_t digits,
981                             const char *after) {
982   uint64_t val;
983   const char *p = *ptr;
984   const char *end = p + digits;
985   size_t after_len = after ? strlen(after) : 0;
986 
987   UPB_ASSERT(digits <= 9);  /* int can't overflow. */
988 
989   if (jsondec_buftouint64(d, p, end, &val) != end ||
990       (after_len && memcmp(end, after, after_len) != 0)) {
991     jsondec_err(d, "Malformed timestamp");
992   }
993 
994   UPB_ASSERT(val < INT_MAX);
995 
996   *ptr = end + after_len;
997   return (int)val;
998 }
999 
jsondec_nanos(jsondec * d,const char ** ptr,const char * end)1000 static int jsondec_nanos(jsondec *d, const char **ptr, const char *end) {
1001   uint64_t nanos = 0;
1002   const char *p = *ptr;
1003 
1004   if (p != end && *p == '.') {
1005     const char *nano_end = jsondec_buftouint64(d, p + 1, end, &nanos);
1006     int digits = (int)(nano_end - p - 1);
1007     int exp_lg10 = 9 - digits;
1008     if (digits > 9) {
1009       jsondec_err(d, "Too many digits for partial seconds");
1010     }
1011     while (exp_lg10--) nanos *= 10;
1012     *ptr = nano_end;
1013   }
1014 
1015   UPB_ASSERT(nanos < INT_MAX);
1016 
1017   return (int)nanos;
1018 }
1019 
1020 /* jsondec_epochdays(1970, 1, 1) == 1970-01-01 == 0. */
jsondec_epochdays(int y,int m,int d)1021 int jsondec_epochdays(int y, int m, int d) {
1022   const uint32_t year_base = 4800;    /* Before min year, multiple of 400. */
1023   const uint32_t m_adj = m - 3;       /* March-based month. */
1024   const uint32_t carry = m_adj > (uint32_t)m ? 1 : 0;
1025   const uint32_t adjust = carry ? 12 : 0;
1026   const uint32_t y_adj = y + year_base - carry;
1027   const uint32_t month_days = ((m_adj + adjust) * 62719 + 769) / 2048;
1028   const uint32_t leap_days = y_adj / 4 - y_adj / 100 + y_adj / 400;
1029   return y_adj * 365 + leap_days + month_days + (d - 1) - 2472632;
1030 }
1031 
jsondec_unixtime(int y,int m,int d,int h,int min,int s)1032 static int64_t jsondec_unixtime(int y, int m, int d, int h, int min, int s) {
1033   return (int64_t)jsondec_epochdays(y, m, d) * 86400 + h * 3600 + min * 60 + s;
1034 }
1035 
jsondec_timestamp(jsondec * d,upb_msg * msg,const upb_msgdef * m)1036 static void jsondec_timestamp(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
1037   upb_msgval seconds;
1038   upb_msgval nanos;
1039   upb_strview str = jsondec_string(d);
1040   const char *ptr = str.data;
1041   const char *end = ptr + str.size;
1042 
1043   if (str.size < 20) goto malformed;
1044 
1045   {
1046     /* 1972-01-01T01:00:00 */
1047     int year = jsondec_tsdigits(d, &ptr, 4, "-");
1048     int mon = jsondec_tsdigits(d, &ptr, 2, "-");
1049     int day = jsondec_tsdigits(d, &ptr, 2, "T");
1050     int hour = jsondec_tsdigits(d, &ptr, 2, ":");
1051     int min = jsondec_tsdigits(d, &ptr, 2, ":");
1052     int sec = jsondec_tsdigits(d, &ptr, 2, NULL);
1053 
1054     seconds.int64_val = jsondec_unixtime(year, mon, day, hour, min, sec);
1055   }
1056 
1057   nanos.int32_val = jsondec_nanos(d, &ptr, end);
1058 
1059   {
1060     /* [+-]08:00 or Z */
1061     int ofs = 0;
1062     bool neg = false;
1063 
1064     if (ptr == end) goto malformed;
1065 
1066     switch (*ptr++) {
1067       case '-':
1068         neg = true;
1069         /* fallthrough */
1070       case '+':
1071         if ((end - ptr) != 5) goto malformed;
1072         ofs = jsondec_tsdigits(d, &ptr, 2, ":00");
1073         ofs *= 60 * 60;
1074         seconds.int64_val += (neg ? ofs : -ofs);
1075         break;
1076       case 'Z':
1077         if (ptr != end) goto malformed;
1078         break;
1079       default:
1080         goto malformed;
1081     }
1082   }
1083 
1084   if (seconds.int64_val < -62135596800) {
1085     jsondec_err(d, "Timestamp out of range");
1086   }
1087 
1088   upb_msg_set(msg, upb_msgdef_itof(m, 1), seconds, d->arena);
1089   upb_msg_set(msg, upb_msgdef_itof(m, 2), nanos, d->arena);
1090   return;
1091 
1092 malformed:
1093   jsondec_err(d, "Malformed timestamp");
1094 }
1095 
jsondec_duration(jsondec * d,upb_msg * msg,const upb_msgdef * m)1096 static void jsondec_duration(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
1097   upb_msgval seconds;
1098   upb_msgval nanos;
1099   upb_strview str = jsondec_string(d);
1100   const char *ptr = str.data;
1101   const char *end = ptr + str.size;
1102   const int64_t max = (uint64_t)3652500 * 86400;
1103 
1104   /* "3.000000001s", "3s", etc. */
1105   ptr = jsondec_buftoint64(d, ptr, end, &seconds.int64_val);
1106   nanos.int32_val = jsondec_nanos(d, &ptr, end);
1107 
1108   if (end - ptr != 1 || *ptr != 's') {
1109     jsondec_err(d, "Malformed duration");
1110   }
1111 
1112   if (seconds.int64_val < -max || seconds.int64_val > max) {
1113     jsondec_err(d, "Duration out of range");
1114   }
1115 
1116   if (seconds.int64_val < 0) {
1117     nanos.int32_val = - nanos.int32_val;
1118   }
1119 
1120   upb_msg_set(msg, upb_msgdef_itof(m, 1), seconds, d->arena);
1121   upb_msg_set(msg, upb_msgdef_itof(m, 2), nanos, d->arena);
1122 }
1123 
jsondec_listvalue(jsondec * d,upb_msg * msg,const upb_msgdef * m)1124 static void jsondec_listvalue(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
1125   const upb_fielddef *values_f = upb_msgdef_itof(m, 1);
1126   const upb_msgdef *value_m = upb_fielddef_msgsubdef(values_f);
1127   upb_array *values = upb_msg_mutable(msg, values_f, d->arena).array;
1128 
1129   jsondec_arrstart(d);
1130   while (jsondec_arrnext(d)) {
1131     upb_msg *value_msg = upb_msg_new(value_m, d->arena);
1132     upb_msgval value;
1133     value.msg_val = value_msg;
1134     upb_array_append(values, value, d->arena);
1135     jsondec_wellknownvalue(d, value_msg, value_m);
1136   }
1137   jsondec_arrend(d);
1138 }
1139 
jsondec_struct(jsondec * d,upb_msg * msg,const upb_msgdef * m)1140 static void jsondec_struct(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
1141   const upb_fielddef *fields_f = upb_msgdef_itof(m, 1);
1142   const upb_msgdef *entry_m = upb_fielddef_msgsubdef(fields_f);
1143   const upb_fielddef *value_f = upb_msgdef_itof(entry_m, 2);
1144   const upb_msgdef *value_m = upb_fielddef_msgsubdef(value_f);
1145   upb_map *fields = upb_msg_mutable(msg, fields_f, d->arena).map;
1146 
1147   jsondec_objstart(d);
1148   while (jsondec_objnext(d)) {
1149     upb_msgval key, value;
1150     upb_msg *value_msg = upb_msg_new(value_m, d->arena);
1151     key.str_val = jsondec_string(d);
1152     value.msg_val = value_msg;
1153     upb_map_set(fields, key, value, d->arena);
1154     jsondec_entrysep(d);
1155     jsondec_wellknownvalue(d, value_msg, value_m);
1156   }
1157   jsondec_objend(d);
1158 }
1159 
jsondec_wellknownvalue(jsondec * d,upb_msg * msg,const upb_msgdef * m)1160 static void jsondec_wellknownvalue(jsondec *d, upb_msg *msg,
1161                                    const upb_msgdef *m) {
1162   upb_msgval val;
1163   const upb_fielddef *f;
1164   upb_msg *submsg;
1165 
1166   switch (jsondec_peek(d)) {
1167     case JD_NUMBER:
1168       /* double number_value = 2; */
1169       f = upb_msgdef_itof(m, 2);
1170       val.double_val = jsondec_number(d);
1171       break;
1172     case JD_STRING:
1173       /* string string_value = 3; */
1174       f = upb_msgdef_itof(m, 3);
1175       val.str_val = jsondec_string(d);
1176       break;
1177     case JD_FALSE:
1178       /* bool bool_value = 4; */
1179       f = upb_msgdef_itof(m, 4);
1180       val.bool_val = false;
1181       jsondec_false(d);
1182       break;
1183     case JD_TRUE:
1184       /* bool bool_value = 4; */
1185       f = upb_msgdef_itof(m, 4);
1186       val.bool_val = true;
1187       jsondec_true(d);
1188       break;
1189     case JD_NULL:
1190       /* NullValue null_value = 1; */
1191       f = upb_msgdef_itof(m, 1);
1192       val.int32_val = 0;
1193       jsondec_null(d);
1194       break;
1195     /* Note: these cases return, because upb_msg_mutable() is enough. */
1196     case JD_OBJECT:
1197       /* Struct struct_value = 5; */
1198       f = upb_msgdef_itof(m, 5);
1199       submsg = upb_msg_mutable(msg, f, d->arena).msg;
1200       jsondec_struct(d, submsg, upb_fielddef_msgsubdef(f));
1201       return;
1202     case JD_ARRAY:
1203       /* ListValue list_value = 6; */
1204       f = upb_msgdef_itof(m, 6);
1205       submsg = upb_msg_mutable(msg, f, d->arena).msg;
1206       jsondec_listvalue(d, submsg, upb_fielddef_msgsubdef(f));
1207       return;
1208     default:
1209       UPB_UNREACHABLE();
1210   }
1211 
1212   upb_msg_set(msg, f, val, d->arena);
1213 }
1214 
jsondec_mask(jsondec * d,const char * buf,const char * end)1215 static upb_strview jsondec_mask(jsondec *d, const char *buf, const char *end) {
1216   /* FieldMask fields grow due to inserted '_' characters, so we can't do the
1217    * transform in place. */
1218   const char *ptr = buf;
1219   upb_strview ret;
1220   char *out;
1221 
1222   ret.size = end - ptr;
1223   while (ptr < end) {
1224     ret.size += (*ptr >= 'A' && *ptr <= 'Z');
1225     ptr++;
1226   }
1227 
1228   out = upb_arena_malloc(d->arena, ret.size);
1229   ptr = buf;
1230   ret.data = out;
1231 
1232   while (ptr < end) {
1233     char ch = *ptr++;
1234     if (ch >= 'A' && ch <= 'Z') {
1235       *out++ = '_';
1236       *out++ = ch + 32;
1237     } else if (ch == '_') {
1238       jsondec_err(d, "field mask may not contain '_'");
1239     } else {
1240       *out++ = ch;
1241     }
1242   }
1243 
1244   return ret;
1245 }
1246 
jsondec_fieldmask(jsondec * d,upb_msg * msg,const upb_msgdef * m)1247 static void jsondec_fieldmask(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
1248   /* repeated string paths = 1; */
1249   const upb_fielddef *paths_f = upb_msgdef_itof(m, 1);
1250   upb_array *arr = upb_msg_mutable(msg, paths_f, d->arena).array;
1251   upb_strview str = jsondec_string(d);
1252   const char *ptr = str.data;
1253   const char *end = ptr + str.size;
1254   upb_msgval val;
1255 
1256   while (ptr < end) {
1257     const char *elem_end = memchr(ptr, ',', end - ptr);
1258     if (elem_end) {
1259       val.str_val = jsondec_mask(d, ptr, elem_end);
1260       ptr = elem_end + 1;
1261     } else {
1262       val.str_val = jsondec_mask(d, ptr, end);
1263       ptr = end;
1264     }
1265     upb_array_append(arr, val, d->arena);
1266   }
1267 }
1268 
jsondec_anyfield(jsondec * d,upb_msg * msg,const upb_msgdef * m)1269 static void jsondec_anyfield(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
1270   if (upb_msgdef_wellknowntype(m) == UPB_WELLKNOWN_UNSPECIFIED) {
1271     /* For regular types: {"@type": "[user type]", "f1": <V1>, "f2": <V2>}
1272      * where f1, f2, etc. are the normal fields of this type. */
1273     jsondec_field(d, msg, m);
1274   } else {
1275     /* For well-known types: {"@type": "[well-known type]", "value": <X>}
1276      * where <X> is whatever encoding the WKT normally uses. */
1277     upb_strview str = jsondec_string(d);
1278     jsondec_entrysep(d);
1279     if (!jsondec_streql(str, "value")) {
1280       jsondec_err(d, "Key for well-known type must be 'value'");
1281     }
1282     jsondec_wellknown(d, msg, m);
1283   }
1284 }
1285 
jsondec_typeurl(jsondec * d,upb_msg * msg,const upb_msgdef * m)1286 static const upb_msgdef *jsondec_typeurl(jsondec *d, upb_msg *msg,
1287                                          const upb_msgdef *m) {
1288   const upb_fielddef *type_url_f = upb_msgdef_itof(m, 1);
1289   const upb_msgdef *type_m;
1290   upb_strview type_url = jsondec_string(d);
1291   const char *end = type_url.data + type_url.size;
1292   const char *ptr = end;
1293   upb_msgval val;
1294 
1295   val.str_val = type_url;
1296   upb_msg_set(msg, type_url_f, val, d->arena);
1297 
1298   /* Find message name after the last '/' */
1299   while (ptr > type_url.data && *--ptr != '/') {}
1300 
1301   if (ptr == type_url.data || ptr == end) {
1302     jsondec_err(d, "Type url must have at least one '/' and non-empty host");
1303   }
1304 
1305   ptr++;
1306   type_m = upb_symtab_lookupmsg2(d->any_pool, ptr, end - ptr);
1307 
1308   if (!type_m) {
1309     jsondec_err(d, "Type was not found");
1310   }
1311 
1312   return type_m;
1313 }
1314 
jsondec_any(jsondec * d,upb_msg * msg,const upb_msgdef * m)1315 static void jsondec_any(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
1316   /* string type_url = 1;
1317    * bytes value = 2; */
1318   const upb_fielddef *value_f = upb_msgdef_itof(m, 2);
1319   upb_msg *any_msg;
1320   const upb_msgdef *any_m = NULL;
1321   const char *pre_type_data = NULL;
1322   const char *pre_type_end = NULL;
1323   upb_msgval encoded;
1324 
1325   jsondec_objstart(d);
1326 
1327   /* Scan looking for "@type", which is not necessarily first. */
1328   while (!any_m && jsondec_objnext(d)) {
1329     const char *start = d->ptr;
1330     upb_strview name = jsondec_string(d);
1331     jsondec_entrysep(d);
1332     if (jsondec_streql(name, "@type")) {
1333       any_m = jsondec_typeurl(d, msg, m);
1334       if (pre_type_data) {
1335         pre_type_end = start;
1336         while (*pre_type_end != ',') pre_type_end--;
1337       }
1338     } else {
1339       if (!pre_type_data) pre_type_data = start;
1340       jsondec_skipval(d);
1341     }
1342   }
1343 
1344   if (!any_m) {
1345     jsondec_err(d, "Any object didn't contain a '@type' field");
1346   }
1347 
1348   any_msg = upb_msg_new(any_m, d->arena);
1349 
1350   if (pre_type_data) {
1351     size_t len = pre_type_end - pre_type_data + 1;
1352     char *tmp = upb_arena_malloc(d->arena, len);
1353     const char *saved_ptr = d->ptr;
1354     const char *saved_end = d->end;
1355     memcpy(tmp, pre_type_data, len - 1);
1356     tmp[len - 1] = '}';
1357     d->ptr = tmp;
1358     d->end = tmp + len;
1359     d->is_first = true;
1360     while (jsondec_objnext(d)) {
1361       jsondec_anyfield(d, any_msg, any_m);
1362     }
1363     d->ptr = saved_ptr;
1364     d->end = saved_end;
1365   }
1366 
1367   while (jsondec_objnext(d)) {
1368     jsondec_anyfield(d, any_msg, any_m);
1369   }
1370 
1371   jsondec_objend(d);
1372 
1373   encoded.str_val.data = upb_encode(any_msg, upb_msgdef_layout(any_m), d->arena,
1374                                     &encoded.str_val.size);
1375   upb_msg_set(msg, value_f, encoded, d->arena);
1376 }
1377 
jsondec_wrapper(jsondec * d,upb_msg * msg,const upb_msgdef * m)1378 static void jsondec_wrapper(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
1379   const upb_fielddef *value_f = upb_msgdef_itof(m, 1);
1380   upb_msgval val = jsondec_value(d, value_f);
1381   upb_msg_set(msg, value_f, val, d->arena);
1382 }
1383 
jsondec_wellknown(jsondec * d,upb_msg * msg,const upb_msgdef * m)1384 static void jsondec_wellknown(jsondec *d, upb_msg *msg, const upb_msgdef *m) {
1385   switch (upb_msgdef_wellknowntype(m)) {
1386     case UPB_WELLKNOWN_ANY:
1387       jsondec_any(d, msg, m);
1388       break;
1389     case UPB_WELLKNOWN_FIELDMASK:
1390       jsondec_fieldmask(d, msg, m);
1391       break;
1392     case UPB_WELLKNOWN_DURATION:
1393       jsondec_duration(d, msg, m);
1394       break;
1395     case UPB_WELLKNOWN_TIMESTAMP:
1396       jsondec_timestamp(d, msg, m);
1397       break;
1398     case UPB_WELLKNOWN_VALUE:
1399       jsondec_wellknownvalue(d, msg, m);
1400       break;
1401     case UPB_WELLKNOWN_LISTVALUE:
1402       jsondec_listvalue(d, msg, m);
1403       break;
1404     case UPB_WELLKNOWN_STRUCT:
1405       jsondec_struct(d, msg, m);
1406       break;
1407     case UPB_WELLKNOWN_DOUBLEVALUE:
1408     case UPB_WELLKNOWN_FLOATVALUE:
1409     case UPB_WELLKNOWN_INT64VALUE:
1410     case UPB_WELLKNOWN_UINT64VALUE:
1411     case UPB_WELLKNOWN_INT32VALUE:
1412     case UPB_WELLKNOWN_UINT32VALUE:
1413     case UPB_WELLKNOWN_STRINGVALUE:
1414     case UPB_WELLKNOWN_BYTESVALUE:
1415     case UPB_WELLKNOWN_BOOLVALUE:
1416       jsondec_wrapper(d, msg, m);
1417       break;
1418     default:
1419       UPB_UNREACHABLE();
1420   }
1421 }
1422 
upb_json_decode(const char * buf,size_t size,upb_msg * msg,const upb_msgdef * m,const upb_symtab * any_pool,int options,upb_arena * arena,upb_status * status)1423 bool upb_json_decode(const char *buf, size_t size, upb_msg *msg,
1424                      const upb_msgdef *m, const upb_symtab *any_pool,
1425                      int options, upb_arena *arena, upb_status *status) {
1426   jsondec d;
1427   d.ptr = buf;
1428   d.end = buf + size;
1429   d.arena = arena;
1430   d.any_pool = any_pool;
1431   d.status = status;
1432   d.options = options;
1433   d.depth = 64;
1434   d.line = 1;
1435   d.line_begin = d.ptr;
1436   d.debug_field = NULL;
1437   d.is_first = false;
1438 
1439   if (UPB_SETJMP(d.err)) return false;
1440 
1441   jsondec_tomsg(&d, msg, m);
1442   return true;
1443 }
1444