1 
2 #line 1 "upb/json/parser.rl"
3 /*
4 ** upb::json::Parser (upb_json_parser)
5 **
6 ** A parser that uses the Ragel State Machine Compiler to generate
7 ** the finite automata.
8 **
9 ** Ragel only natively handles regular languages, but we can manually
10 ** program it a bit to handle context-free languages like JSON, by using
11 ** the "fcall" and "fret" constructs.
12 **
13 ** This parser can handle the basics, but needs several things to be fleshed
14 ** out:
15 **
16 ** - handling of unicode escape sequences (including high surrogate pairs).
17 ** - properly check and report errors for unknown fields, stack overflow,
18 **   improper array nesting (or lack of nesting).
19 ** - handling of base64 sequences with padding characters.
20 ** - handling of push-back (non-success returns from sink functions).
21 ** - handling of keys/escape-sequences/etc that span input buffers.
22 */
23 
24 #include <ctype.h>
25 #include <errno.h>
26 #include <float.h>
27 #include <math.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 
33 #include <time.h>
34 
35 #include "upb/json/parser.h"
36 #include "upb/pb/encoder.h"
37 
38 #include "upb/port_def.inc"
39 
40 #define UPB_JSON_MAX_DEPTH 64
41 
42 /* Type of value message */
43 enum {
44   VALUE_NULLVALUE   = 0,
45   VALUE_NUMBERVALUE = 1,
46   VALUE_STRINGVALUE = 2,
47   VALUE_BOOLVALUE   = 3,
48   VALUE_STRUCTVALUE = 4,
49   VALUE_LISTVALUE   = 5
50 };
51 
52 /* Forward declare */
53 static bool is_top_level(upb_json_parser *p);
54 static bool is_wellknown_msg(upb_json_parser *p, upb_wellknowntype_t type);
55 static bool is_wellknown_field(upb_json_parser *p, upb_wellknowntype_t type);
56 
57 static bool is_number_wrapper_object(upb_json_parser *p);
58 static bool does_number_wrapper_start(upb_json_parser *p);
59 static bool does_number_wrapper_end(upb_json_parser *p);
60 
61 static bool is_string_wrapper_object(upb_json_parser *p);
62 static bool does_string_wrapper_start(upb_json_parser *p);
63 static bool does_string_wrapper_end(upb_json_parser *p);
64 
65 static bool does_fieldmask_start(upb_json_parser *p);
66 static bool does_fieldmask_end(upb_json_parser *p);
67 static void start_fieldmask_object(upb_json_parser *p);
68 static void end_fieldmask_object(upb_json_parser *p);
69 
70 static void start_wrapper_object(upb_json_parser *p);
71 static void end_wrapper_object(upb_json_parser *p);
72 
73 static void start_value_object(upb_json_parser *p, int value_type);
74 static void end_value_object(upb_json_parser *p);
75 
76 static void start_listvalue_object(upb_json_parser *p);
77 static void end_listvalue_object(upb_json_parser *p);
78 
79 static void start_structvalue_object(upb_json_parser *p);
80 static void end_structvalue_object(upb_json_parser *p);
81 
82 static void start_object(upb_json_parser *p);
83 static void end_object(upb_json_parser *p);
84 
85 static void start_any_object(upb_json_parser *p, const char *ptr);
86 static bool end_any_object(upb_json_parser *p, const char *ptr);
87 
88 static bool start_subobject(upb_json_parser *p);
89 static void end_subobject(upb_json_parser *p);
90 
91 static void start_member(upb_json_parser *p);
92 static void end_member(upb_json_parser *p);
93 static bool end_membername(upb_json_parser *p);
94 
95 static void start_any_member(upb_json_parser *p, const char *ptr);
96 static void end_any_member(upb_json_parser *p, const char *ptr);
97 static bool end_any_membername(upb_json_parser *p);
98 
99 size_t parse(void *closure, const void *hd, const char *buf, size_t size,
100              const upb_bufhandle *handle);
101 static bool end(void *closure, const void *hd);
102 
103 static const char eof_ch = 'e';
104 
105 /* stringsink */
106 typedef struct {
107   upb_byteshandler handler;
108   upb_bytessink sink;
109   char *ptr;
110   size_t len, size;
111 } upb_stringsink;
112 
113 
stringsink_start(void * _sink,const void * hd,size_t size_hint)114 static void *stringsink_start(void *_sink, const void *hd, size_t size_hint) {
115   upb_stringsink *sink = _sink;
116   sink->len = 0;
117   UPB_UNUSED(hd);
118   UPB_UNUSED(size_hint);
119   return sink;
120 }
121 
stringsink_string(void * _sink,const void * hd,const char * ptr,size_t len,const upb_bufhandle * handle)122 static size_t stringsink_string(void *_sink, const void *hd, const char *ptr,
123                                 size_t len, const upb_bufhandle *handle) {
124   upb_stringsink *sink = _sink;
125   size_t new_size = sink->size;
126 
127   UPB_UNUSED(hd);
128   UPB_UNUSED(handle);
129 
130   while (sink->len + len > new_size) {
131     new_size *= 2;
132   }
133 
134   if (new_size != sink->size) {
135     sink->ptr = realloc(sink->ptr, new_size);
136     sink->size = new_size;
137   }
138 
139   memcpy(sink->ptr + sink->len, ptr, len);
140   sink->len += len;
141 
142   return len;
143 }
144 
upb_stringsink_init(upb_stringsink * sink)145 void upb_stringsink_init(upb_stringsink *sink) {
146   upb_byteshandler_init(&sink->handler);
147   upb_byteshandler_setstartstr(&sink->handler, stringsink_start, NULL);
148   upb_byteshandler_setstring(&sink->handler, stringsink_string, NULL);
149 
150   upb_bytessink_reset(&sink->sink, &sink->handler, sink);
151 
152   sink->size = 32;
153   sink->ptr = malloc(sink->size);
154   sink->len = 0;
155 }
156 
upb_stringsink_uninit(upb_stringsink * sink)157 void upb_stringsink_uninit(upb_stringsink *sink) { free(sink->ptr); }
158 
159 typedef struct {
160   /* For encoding Any value field in binary format. */
161   upb_handlercache *encoder_handlercache;
162   upb_stringsink stringsink;
163 
164   /* For decoding Any value field in json format. */
165   upb_json_codecache *parser_codecache;
166   upb_sink sink;
167   upb_json_parser *parser;
168 
169   /* Mark the range of uninterpreted values in json input before type url. */
170   const char *before_type_url_start;
171   const char *before_type_url_end;
172 
173   /* Mark the range of uninterpreted values in json input after type url. */
174   const char *after_type_url_start;
175 } upb_jsonparser_any_frame;
176 
177 typedef struct {
178   upb_sink sink;
179 
180   /* The current message in which we're parsing, and the field whose value we're
181    * expecting next. */
182   const upb_msgdef *m;
183   const upb_fielddef *f;
184 
185   /* The table mapping json name to fielddef for this message. */
186   const upb_strtable *name_table;
187 
188   /* We are in a repeated-field context. We need this flag to decide whether to
189    * handle the array as a normal repeated field or a
190    * google.protobuf.ListValue/google.protobuf.Value. */
191   bool is_repeated;
192 
193   /* We are in a repeated-field context, ready to emit mapentries as
194    * submessages. This flag alters the start-of-object (open-brace) behavior to
195    * begin a sequence of mapentry messages rather than a single submessage. */
196   bool is_map;
197 
198   /* We are in a map-entry message context. This flag is set when parsing the
199    * value field of a single map entry and indicates to all value-field parsers
200    * (subobjects, strings, numbers, and bools) that the map-entry submessage
201    * should end as soon as the value is parsed. */
202   bool is_mapentry;
203 
204   /* If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent
205    * message's map field that we're currently parsing. This differs from |f|
206    * because |f| is the field in the *current* message (i.e., the map-entry
207    * message itself), not the parent's field that leads to this map. */
208   const upb_fielddef *mapfield;
209 
210   /* We are in an Any message context. This flag is set when parsing the Any
211    * message and indicates to all field parsers (subobjects, strings, numbers,
212    * and bools) that the parsed field should be serialized as binary data or
213    * cached (type url not found yet). */
214   bool is_any;
215 
216   /* The type of packed message in Any. */
217   upb_jsonparser_any_frame *any_frame;
218 
219   /* True if the field to be parsed is unknown. */
220   bool is_unknown_field;
221 } upb_jsonparser_frame;
222 
init_frame(upb_jsonparser_frame * frame)223 static void init_frame(upb_jsonparser_frame* frame) {
224   frame->m = NULL;
225   frame->f = NULL;
226   frame->name_table = NULL;
227   frame->is_repeated = false;
228   frame->is_map = false;
229   frame->is_mapentry = false;
230   frame->mapfield = NULL;
231   frame->is_any = false;
232   frame->any_frame = NULL;
233   frame->is_unknown_field = false;
234 }
235 
236 struct upb_json_parser {
237   upb_arena *arena;
238   const upb_json_parsermethod *method;
239   upb_bytessink input_;
240 
241   /* Stack to track the JSON scopes we are in. */
242   upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH];
243   upb_jsonparser_frame *top;
244   upb_jsonparser_frame *limit;
245 
246   upb_status *status;
247 
248   /* Ragel's internal parsing stack for the parsing state machine. */
249   int current_state;
250   int parser_stack[UPB_JSON_MAX_DEPTH];
251   int parser_top;
252 
253   /* The handle for the current buffer. */
254   const upb_bufhandle *handle;
255 
256   /* Accumulate buffer.  See details in parser.rl. */
257   const char *accumulated;
258   size_t accumulated_len;
259   char *accumulate_buf;
260   size_t accumulate_buf_size;
261 
262   /* Multi-part text data.  See details in parser.rl. */
263   int multipart_state;
264   upb_selector_t string_selector;
265 
266   /* Input capture.  See details in parser.rl. */
267   const char *capture;
268 
269   /* Intermediate result of parsing a unicode escape sequence. */
270   uint32_t digit;
271 
272   /* For resolve type url in Any. */
273   const upb_symtab *symtab;
274 
275   /* Whether to proceed if unknown field is met. */
276   bool ignore_json_unknown;
277 
278   /* Cache for parsing timestamp due to base and zone are handled in different
279    * handlers. */
280   struct tm tm;
281 };
282 
start_jsonparser_frame(upb_json_parser * p)283 static upb_jsonparser_frame* start_jsonparser_frame(upb_json_parser *p) {
284   upb_jsonparser_frame *inner;
285   inner = p->top + 1;
286   init_frame(inner);
287   return inner;
288 }
289 
290 struct upb_json_codecache {
291   upb_arena *arena;
292   upb_inttable methods;   /* upb_msgdef* -> upb_json_parsermethod* */
293 };
294 
295 struct upb_json_parsermethod {
296   const upb_json_codecache *cache;
297   upb_byteshandler input_handler_;
298 
299   /* Maps json_name -> fielddef */
300   upb_strtable name_table;
301 };
302 
303 #define PARSER_CHECK_RETURN(x) if (!(x)) return false
304 
json_parser_any_frame_new(upb_json_parser * p)305 static upb_jsonparser_any_frame *json_parser_any_frame_new(
306     upb_json_parser *p) {
307   upb_jsonparser_any_frame *frame;
308 
309   frame = upb_arena_malloc(p->arena, sizeof(upb_jsonparser_any_frame));
310 
311   frame->encoder_handlercache = upb_pb_encoder_newcache();
312   frame->parser_codecache = upb_json_codecache_new();
313   frame->parser = NULL;
314   frame->before_type_url_start = NULL;
315   frame->before_type_url_end = NULL;
316   frame->after_type_url_start = NULL;
317 
318   upb_stringsink_init(&frame->stringsink);
319 
320   return frame;
321 }
322 
json_parser_any_frame_set_payload_type(upb_json_parser * p,upb_jsonparser_any_frame * frame,const upb_msgdef * payload_type)323 static void json_parser_any_frame_set_payload_type(
324     upb_json_parser *p,
325     upb_jsonparser_any_frame *frame,
326     const upb_msgdef *payload_type) {
327   const upb_handlers *h;
328   const upb_json_parsermethod *parser_method;
329   upb_pb_encoder *encoder;
330 
331   /* Initialize encoder. */
332   h = upb_handlercache_get(frame->encoder_handlercache, payload_type);
333   encoder = upb_pb_encoder_create(p->arena, h, frame->stringsink.sink);
334 
335   /* Initialize parser. */
336   parser_method = upb_json_codecache_get(frame->parser_codecache, payload_type);
337   upb_sink_reset(&frame->sink, h, encoder);
338   frame->parser =
339       upb_json_parser_create(p->arena, parser_method, p->symtab, frame->sink,
340                              p->status, p->ignore_json_unknown);
341 }
342 
json_parser_any_frame_free(upb_jsonparser_any_frame * frame)343 static void json_parser_any_frame_free(upb_jsonparser_any_frame *frame) {
344   upb_handlercache_free(frame->encoder_handlercache);
345   upb_json_codecache_free(frame->parser_codecache);
346   upb_stringsink_uninit(&frame->stringsink);
347 }
348 
json_parser_any_frame_has_type_url(upb_jsonparser_any_frame * frame)349 static bool json_parser_any_frame_has_type_url(
350   upb_jsonparser_any_frame *frame) {
351   return frame->parser != NULL;
352 }
353 
json_parser_any_frame_has_value_before_type_url(upb_jsonparser_any_frame * frame)354 static bool json_parser_any_frame_has_value_before_type_url(
355   upb_jsonparser_any_frame *frame) {
356   return frame->before_type_url_start != frame->before_type_url_end;
357 }
358 
json_parser_any_frame_has_value_after_type_url(upb_jsonparser_any_frame * frame)359 static bool json_parser_any_frame_has_value_after_type_url(
360   upb_jsonparser_any_frame *frame) {
361   return frame->after_type_url_start != NULL;
362 }
363 
json_parser_any_frame_has_value(upb_jsonparser_any_frame * frame)364 static bool json_parser_any_frame_has_value(
365   upb_jsonparser_any_frame *frame) {
366   return json_parser_any_frame_has_value_before_type_url(frame) ||
367          json_parser_any_frame_has_value_after_type_url(frame);
368 }
369 
json_parser_any_frame_set_before_type_url_end(upb_jsonparser_any_frame * frame,const char * ptr)370 static void json_parser_any_frame_set_before_type_url_end(
371     upb_jsonparser_any_frame *frame,
372     const char *ptr) {
373   if (frame->parser == NULL) {
374     frame->before_type_url_end = ptr;
375   }
376 }
377 
json_parser_any_frame_set_after_type_url_start_once(upb_jsonparser_any_frame * frame,const char * ptr)378 static void json_parser_any_frame_set_after_type_url_start_once(
379     upb_jsonparser_any_frame *frame,
380     const char *ptr) {
381   if (json_parser_any_frame_has_type_url(frame) &&
382       frame->after_type_url_start == NULL) {
383     frame->after_type_url_start = ptr;
384   }
385 }
386 
387 /* Used to signal that a capture has been suspended. */
388 static char suspend_capture;
389 
getsel_for_handlertype(upb_json_parser * p,upb_handlertype_t type)390 static upb_selector_t getsel_for_handlertype(upb_json_parser *p,
391                                              upb_handlertype_t type) {
392   upb_selector_t sel;
393   bool ok = upb_handlers_getselector(p->top->f, type, &sel);
394   UPB_ASSUME(ok);
395   return sel;
396 }
397 
parser_getsel(upb_json_parser * p)398 static upb_selector_t parser_getsel(upb_json_parser *p) {
399   return getsel_for_handlertype(
400       p, upb_handlers_getprimitivehandlertype(p->top->f));
401 }
402 
check_stack(upb_json_parser * p)403 static bool check_stack(upb_json_parser *p) {
404   if ((p->top + 1) == p->limit) {
405     upb_status_seterrmsg(p->status, "Nesting too deep");
406     return false;
407   }
408 
409   return true;
410 }
411 
set_name_table(upb_json_parser * p,upb_jsonparser_frame * frame)412 static void set_name_table(upb_json_parser *p, upb_jsonparser_frame *frame) {
413   upb_value v;
414   const upb_json_codecache *cache = p->method->cache;
415   bool ok;
416   const upb_json_parsermethod *method;
417 
418   ok = upb_inttable_lookupptr(&cache->methods, frame->m, &v);
419   UPB_ASSUME(ok);
420   method = upb_value_getconstptr(v);
421 
422   frame->name_table = &method->name_table;
423 }
424 
425 /* There are GCC/Clang built-ins for overflow checking which we could start
426  * using if there was any performance benefit to it. */
427 
checked_add(size_t a,size_t b,size_t * c)428 static bool checked_add(size_t a, size_t b, size_t *c) {
429   if (SIZE_MAX - a < b) return false;
430   *c = a + b;
431   return true;
432 }
433 
saturating_multiply(size_t a,size_t b)434 static size_t saturating_multiply(size_t a, size_t b) {
435   /* size_t is unsigned, so this is defined behavior even on overflow. */
436   size_t ret = a * b;
437   if (b != 0 && ret / b != a) {
438     ret = SIZE_MAX;
439   }
440   return ret;
441 }
442 
443 
444 /* Base64 decoding ************************************************************/
445 
446 /* TODO(haberman): make this streaming. */
447 
448 static const signed char b64table[] = {
449   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
450   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
451   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
452   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
453   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
454   -1,      -1,      -1,      62/*+*/, -1,      -1,      -1,      63/*/ */,
455   52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/,
456   60/*8*/, 61/*9*/, -1,      -1,      -1,      -1,      -1,      -1,
457   -1,       0/*A*/,  1/*B*/,  2/*C*/,  3/*D*/,  4/*E*/,  5/*F*/,  6/*G*/,
458   07/*H*/,  8/*I*/,  9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/,
459   15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/,
460   23/*X*/, 24/*Y*/, 25/*Z*/, -1,      -1,      -1,      -1,      -1,
461   -1,      26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/,
462   33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/,
463   41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/,
464   49/*x*/, 50/*y*/, 51/*z*/, -1,      -1,      -1,      -1,      -1,
465   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
466   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
467   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
468   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
469   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
470   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
471   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
472   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
473   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
474   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
475   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
476   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
477   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
478   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
479   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
480   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1
481 };
482 
483 /* Returns the table value sign-extended to 32 bits.  Knowing that the upper
484  * bits will be 1 for unrecognized characters makes it easier to check for
485  * this error condition later (see below). */
b64lookup(unsigned char ch)486 int32_t b64lookup(unsigned char ch) { return b64table[ch]; }
487 
488 /* Returns true if the given character is not a valid base64 character or
489  * padding. */
nonbase64(unsigned char ch)490 bool nonbase64(unsigned char ch) { return b64lookup(ch) == -1 && ch != '='; }
491 
base64_push(upb_json_parser * p,upb_selector_t sel,const char * ptr,size_t len)492 static bool base64_push(upb_json_parser *p, upb_selector_t sel, const char *ptr,
493                         size_t len) {
494   const char *limit = ptr + len;
495   for (; ptr < limit; ptr += 4) {
496     uint32_t val;
497     char output[3];
498 
499     if (limit - ptr < 4) {
500       upb_status_seterrf(p->status,
501                          "Base64 input for bytes field not a multiple of 4: %s",
502                          upb_fielddef_name(p->top->f));
503       return false;
504     }
505 
506     val = b64lookup(ptr[0]) << 18 |
507           b64lookup(ptr[1]) << 12 |
508           b64lookup(ptr[2]) << 6  |
509           b64lookup(ptr[3]);
510 
511     /* Test the upper bit; returns true if any of the characters returned -1. */
512     if (val & 0x80000000) {
513       goto otherchar;
514     }
515 
516     output[0] = val >> 16;
517     output[1] = (val >> 8) & 0xff;
518     output[2] = val & 0xff;
519     upb_sink_putstring(p->top->sink, sel, output, 3, NULL);
520   }
521   return true;
522 
523 otherchar:
524   if (nonbase64(ptr[0]) || nonbase64(ptr[1]) || nonbase64(ptr[2]) ||
525       nonbase64(ptr[3]) ) {
526     upb_status_seterrf(p->status,
527                        "Non-base64 characters in bytes field: %s",
528                        upb_fielddef_name(p->top->f));
529     return false;
530   } if (ptr[2] == '=') {
531     uint32_t val;
532     char output;
533 
534     /* Last group contains only two input bytes, one output byte. */
535     if (ptr[0] == '=' || ptr[1] == '=' || ptr[3] != '=') {
536       goto badpadding;
537     }
538 
539     val = b64lookup(ptr[0]) << 18 |
540           b64lookup(ptr[1]) << 12;
541 
542     UPB_ASSERT(!(val & 0x80000000));
543     output = val >> 16;
544     upb_sink_putstring(p->top->sink, sel, &output, 1, NULL);
545     return true;
546   } else {
547     uint32_t val;
548     char output[2];
549 
550     /* Last group contains only three input bytes, two output bytes. */
551     if (ptr[0] == '=' || ptr[1] == '=' || ptr[2] == '=') {
552       goto badpadding;
553     }
554 
555     val = b64lookup(ptr[0]) << 18 |
556           b64lookup(ptr[1]) << 12 |
557           b64lookup(ptr[2]) << 6;
558 
559     output[0] = val >> 16;
560     output[1] = (val >> 8) & 0xff;
561     upb_sink_putstring(p->top->sink, sel, output, 2, NULL);
562     return true;
563   }
564 
565 badpadding:
566   upb_status_seterrf(p->status,
567                      "Incorrect base64 padding for field: %s (%.*s)",
568                      upb_fielddef_name(p->top->f),
569                      4, ptr);
570   return false;
571 }
572 
573 
574 /* Accumulate buffer **********************************************************/
575 
576 /* Functionality for accumulating a buffer.
577  *
578  * Some parts of the parser need an entire value as a contiguous string.  For
579  * example, to look up a member name in a hash table, or to turn a string into
580  * a number, the relevant library routines need the input string to be in
581  * contiguous memory, even if the value spanned two or more buffers in the
582  * input.  These routines handle that.
583  *
584  * In the common case we can just point to the input buffer to get this
585  * contiguous string and avoid any actual copy.  So we optimistically begin
586  * this way.  But there are a few cases where we must instead copy into a
587  * separate buffer:
588  *
589  *   1. The string was not contiguous in the input (it spanned buffers).
590  *
591  *   2. The string included escape sequences that need to be interpreted to get
592  *      the true value in a contiguous buffer. */
593 
assert_accumulate_empty(upb_json_parser * p)594 static void assert_accumulate_empty(upb_json_parser *p) {
595   UPB_ASSERT(p->accumulated == NULL);
596   UPB_ASSERT(p->accumulated_len == 0);
597 }
598 
accumulate_clear(upb_json_parser * p)599 static void accumulate_clear(upb_json_parser *p) {
600   p->accumulated = NULL;
601   p->accumulated_len = 0;
602 }
603 
604 /* Used internally by accumulate_append(). */
accumulate_realloc(upb_json_parser * p,size_t need)605 static bool accumulate_realloc(upb_json_parser *p, size_t need) {
606   void *mem;
607   size_t old_size = p->accumulate_buf_size;
608   size_t new_size = UPB_MAX(old_size, 128);
609   while (new_size < need) {
610     new_size = saturating_multiply(new_size, 2);
611   }
612 
613   mem = upb_arena_realloc(p->arena, p->accumulate_buf, old_size, new_size);
614   if (!mem) {
615     upb_status_seterrmsg(p->status, "Out of memory allocating buffer.");
616     return false;
617   }
618 
619   p->accumulate_buf = mem;
620   p->accumulate_buf_size = new_size;
621   return true;
622 }
623 
624 /* Logically appends the given data to the append buffer.
625  * If "can_alias" is true, we will try to avoid actually copying, but the buffer
626  * must be valid until the next accumulate_append() call (if any). */
accumulate_append(upb_json_parser * p,const char * buf,size_t len,bool can_alias)627 static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len,
628                               bool can_alias) {
629   size_t need;
630 
631   if (!p->accumulated && can_alias) {
632     p->accumulated = buf;
633     p->accumulated_len = len;
634     return true;
635   }
636 
637   if (!checked_add(p->accumulated_len, len, &need)) {
638     upb_status_seterrmsg(p->status, "Integer overflow.");
639     return false;
640   }
641 
642   if (need > p->accumulate_buf_size && !accumulate_realloc(p, need)) {
643     return false;
644   }
645 
646   if (p->accumulated != p->accumulate_buf) {
647     if (p->accumulated_len) {
648       memcpy(p->accumulate_buf, p->accumulated, p->accumulated_len);
649     }
650     p->accumulated = p->accumulate_buf;
651   }
652 
653   memcpy(p->accumulate_buf + p->accumulated_len, buf, len);
654   p->accumulated_len += len;
655   return true;
656 }
657 
658 /* Returns a pointer to the data accumulated since the last accumulate_clear()
659  * call, and writes the length to *len.  This with point either to the input
660  * buffer or a temporary accumulate buffer. */
accumulate_getptr(upb_json_parser * p,size_t * len)661 static const char *accumulate_getptr(upb_json_parser *p, size_t *len) {
662   UPB_ASSERT(p->accumulated);
663   *len = p->accumulated_len;
664   return p->accumulated;
665 }
666 
667 
668 /* Mult-part text data ********************************************************/
669 
670 /* When we have text data in the input, it can often come in multiple segments.
671  * For example, there may be some raw string data followed by an escape
672  * sequence.  The two segments are processed with different logic.  Also buffer
673  * seams in the input can cause multiple segments.
674  *
675  * As we see segments, there are two main cases for how we want to process them:
676  *
677  *  1. we want to push the captured input directly to string handlers.
678  *
679  *  2. we need to accumulate all the parts into a contiguous buffer for further
680  *     processing (field name lookup, string->number conversion, etc). */
681 
682 /* This is the set of states for p->multipart_state. */
683 enum {
684   /* We are not currently processing multipart data. */
685   MULTIPART_INACTIVE = 0,
686 
687   /* We are processing multipart data by accumulating it into a contiguous
688    * buffer. */
689   MULTIPART_ACCUMULATE = 1,
690 
691   /* We are processing multipart data by pushing each part directly to the
692    * current string handlers. */
693   MULTIPART_PUSHEAGERLY = 2
694 };
695 
696 /* Start a multi-part text value where we accumulate the data for processing at
697  * the end. */
multipart_startaccum(upb_json_parser * p)698 static void multipart_startaccum(upb_json_parser *p) {
699   assert_accumulate_empty(p);
700   UPB_ASSERT(p->multipart_state == MULTIPART_INACTIVE);
701   p->multipart_state = MULTIPART_ACCUMULATE;
702 }
703 
704 /* Start a multi-part text value where we immediately push text data to a string
705  * value with the given selector. */
multipart_start(upb_json_parser * p,upb_selector_t sel)706 static void multipart_start(upb_json_parser *p, upb_selector_t sel) {
707   assert_accumulate_empty(p);
708   UPB_ASSERT(p->multipart_state == MULTIPART_INACTIVE);
709   p->multipart_state = MULTIPART_PUSHEAGERLY;
710   p->string_selector = sel;
711 }
712 
multipart_text(upb_json_parser * p,const char * buf,size_t len,bool can_alias)713 static bool multipart_text(upb_json_parser *p, const char *buf, size_t len,
714                            bool can_alias) {
715   switch (p->multipart_state) {
716     case MULTIPART_INACTIVE:
717       upb_status_seterrmsg(
718           p->status, "Internal error: unexpected state MULTIPART_INACTIVE");
719       return false;
720 
721     case MULTIPART_ACCUMULATE:
722       if (!accumulate_append(p, buf, len, can_alias)) {
723         return false;
724       }
725       break;
726 
727     case MULTIPART_PUSHEAGERLY: {
728       const upb_bufhandle *handle = can_alias ? p->handle : NULL;
729       upb_sink_putstring(p->top->sink, p->string_selector, buf, len, handle);
730       break;
731     }
732   }
733 
734   return true;
735 }
736 
737 /* Note: this invalidates the accumulate buffer!  Call only after reading its
738  * contents. */
multipart_end(upb_json_parser * p)739 static void multipart_end(upb_json_parser *p) {
740   /* This is false sometimes. Probably a bug of some sort, but this code is
741    * intended for deletion soon. */
742   /* UPB_ASSERT(p->multipart_state != MULTIPART_INACTIVE); */
743   p->multipart_state = MULTIPART_INACTIVE;
744   accumulate_clear(p);
745 }
746 
747 
748 /* Input capture **************************************************************/
749 
750 /* Functionality for capturing a region of the input as text.  Gracefully
751  * handles the case where a buffer seam occurs in the middle of the captured
752  * region. */
753 
capture_begin(upb_json_parser * p,const char * ptr)754 static void capture_begin(upb_json_parser *p, const char *ptr) {
755   UPB_ASSERT(p->multipart_state != MULTIPART_INACTIVE);
756   UPB_ASSERT(p->capture == NULL);
757   p->capture = ptr;
758 }
759 
capture_end(upb_json_parser * p,const char * ptr)760 static bool capture_end(upb_json_parser *p, const char *ptr) {
761   UPB_ASSERT(p->capture);
762   if (multipart_text(p, p->capture, ptr - p->capture, true)) {
763     p->capture = NULL;
764     return true;
765   } else {
766     return false;
767   }
768 }
769 
770 /* This is called at the end of each input buffer (ie. when we have hit a
771  * buffer seam).  If we are in the middle of capturing the input, this
772  * processes the unprocessed capture region. */
capture_suspend(upb_json_parser * p,const char ** ptr)773 static void capture_suspend(upb_json_parser *p, const char **ptr) {
774   if (!p->capture) return;
775 
776   if (multipart_text(p, p->capture, *ptr - p->capture, false)) {
777     /* We use this as a signal that we were in the middle of capturing, and
778      * that capturing should resume at the beginning of the next buffer.
779      *
780      * We can't use *ptr here, because we have no guarantee that this pointer
781      * will be valid when we resume (if the underlying memory is freed, then
782      * using the pointer at all, even to compare to NULL, is likely undefined
783      * behavior). */
784     p->capture = &suspend_capture;
785   } else {
786     /* Need to back up the pointer to the beginning of the capture, since
787      * we were not able to actually preserve it. */
788     *ptr = p->capture;
789   }
790 }
791 
capture_resume(upb_json_parser * p,const char * ptr)792 static void capture_resume(upb_json_parser *p, const char *ptr) {
793   if (p->capture) {
794     UPB_ASSERT(p->capture == &suspend_capture);
795     p->capture = ptr;
796   }
797 }
798 
799 
800 /* Callbacks from the parser **************************************************/
801 
802 /* These are the functions called directly from the parser itself.
803  * We define these in the same order as their declarations in the parser. */
804 
escape_char(char in)805 static char escape_char(char in) {
806   switch (in) {
807     case 'r': return '\r';
808     case 't': return '\t';
809     case 'n': return '\n';
810     case 'f': return '\f';
811     case 'b': return '\b';
812     case '/': return '/';
813     case '"': return '"';
814     case '\\': return '\\';
815     default:
816       UPB_ASSERT(0);
817       return 'x';
818   }
819 }
820 
escape(upb_json_parser * p,const char * ptr)821 static bool escape(upb_json_parser *p, const char *ptr) {
822   char ch = escape_char(*ptr);
823   return multipart_text(p, &ch, 1, false);
824 }
825 
start_hex(upb_json_parser * p)826 static void start_hex(upb_json_parser *p) {
827   p->digit = 0;
828 }
829 
hexdigit(upb_json_parser * p,const char * ptr)830 static void hexdigit(upb_json_parser *p, const char *ptr) {
831   char ch = *ptr;
832 
833   p->digit <<= 4;
834 
835   if (ch >= '0' && ch <= '9') {
836     p->digit += (ch - '0');
837   } else if (ch >= 'a' && ch <= 'f') {
838     p->digit += ((ch - 'a') + 10);
839   } else {
840     UPB_ASSERT(ch >= 'A' && ch <= 'F');
841     p->digit += ((ch - 'A') + 10);
842   }
843 }
844 
end_hex(upb_json_parser * p)845 static bool end_hex(upb_json_parser *p) {
846   uint32_t codepoint = p->digit;
847 
848   /* emit the codepoint as UTF-8. */
849   char utf8[3]; /* support \u0000 -- \uFFFF -- need only three bytes. */
850   int length = 0;
851   if (codepoint <= 0x7F) {
852     utf8[0] = codepoint;
853     length = 1;
854   } else if (codepoint <= 0x07FF) {
855     utf8[1] = (codepoint & 0x3F) | 0x80;
856     codepoint >>= 6;
857     utf8[0] = (codepoint & 0x1F) | 0xC0;
858     length = 2;
859   } else /* codepoint <= 0xFFFF */ {
860     utf8[2] = (codepoint & 0x3F) | 0x80;
861     codepoint >>= 6;
862     utf8[1] = (codepoint & 0x3F) | 0x80;
863     codepoint >>= 6;
864     utf8[0] = (codepoint & 0x0F) | 0xE0;
865     length = 3;
866   }
867   /* TODO(haberman): Handle high surrogates: if codepoint is a high surrogate
868    * we have to wait for the next escape to get the full code point). */
869 
870   return multipart_text(p, utf8, length, false);
871 }
872 
start_text(upb_json_parser * p,const char * ptr)873 static void start_text(upb_json_parser *p, const char *ptr) {
874   capture_begin(p, ptr);
875 }
876 
end_text(upb_json_parser * p,const char * ptr)877 static bool end_text(upb_json_parser *p, const char *ptr) {
878   return capture_end(p, ptr);
879 }
880 
start_number(upb_json_parser * p,const char * ptr)881 static bool start_number(upb_json_parser *p, const char *ptr) {
882   if (is_top_level(p)) {
883     if (is_number_wrapper_object(p)) {
884       start_wrapper_object(p);
885     } else if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
886       start_value_object(p, VALUE_NUMBERVALUE);
887     } else {
888       return false;
889     }
890   } else if (does_number_wrapper_start(p)) {
891     if (!start_subobject(p)) {
892       return false;
893     }
894     start_wrapper_object(p);
895   } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) {
896     if (!start_subobject(p)) {
897       return false;
898     }
899     start_value_object(p, VALUE_NUMBERVALUE);
900   }
901 
902   multipart_startaccum(p);
903   capture_begin(p, ptr);
904   return true;
905 }
906 
907 static bool parse_number(upb_json_parser *p, bool is_quoted);
908 
end_number_nontop(upb_json_parser * p,const char * ptr)909 static bool end_number_nontop(upb_json_parser *p, const char *ptr) {
910   if (!capture_end(p, ptr)) {
911     return false;
912   }
913 
914   if (p->top->f == NULL) {
915     multipart_end(p);
916     return true;
917   }
918 
919   return parse_number(p, false);
920 }
921 
end_number(upb_json_parser * p,const char * ptr)922 static bool end_number(upb_json_parser *p, const char *ptr) {
923   if (!end_number_nontop(p, ptr)) {
924     return false;
925   }
926 
927   if (does_number_wrapper_end(p)) {
928     end_wrapper_object(p);
929     if (!is_top_level(p)) {
930       end_subobject(p);
931     }
932     return true;
933   }
934 
935   if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
936     end_value_object(p);
937     if (!is_top_level(p)) {
938       end_subobject(p);
939     }
940     return true;
941   }
942 
943   return true;
944 }
945 
946 /* |buf| is NULL-terminated. |buf| itself will never include quotes;
947  * |is_quoted| tells us whether this text originally appeared inside quotes. */
parse_number_from_buffer(upb_json_parser * p,const char * buf,bool is_quoted)948 static bool parse_number_from_buffer(upb_json_parser *p, const char *buf,
949                                      bool is_quoted) {
950   size_t len = strlen(buf);
951   const char *bufend = buf + len;
952   char *end;
953   upb_fieldtype_t type = upb_fielddef_type(p->top->f);
954   double val;
955   double dummy;
956   double inf = INFINITY;
957 
958   errno = 0;
959 
960   if (len == 0 || buf[0] == ' ') {
961     return false;
962   }
963 
964   /* For integer types, first try parsing with integer-specific routines.
965    * If these succeed, they will be more accurate for int64/uint64 than
966    * strtod().
967    */
968   switch (type) {
969     case UPB_TYPE_ENUM:
970     case UPB_TYPE_INT32: {
971       long val = strtol(buf, &end, 0);
972       if (errno == ERANGE || end != bufend) {
973         break;
974       } else if (val > INT32_MAX || val < INT32_MIN) {
975         return false;
976       } else {
977         upb_sink_putint32(p->top->sink, parser_getsel(p), (int32_t)val);
978         return true;
979       }
980       UPB_UNREACHABLE();
981     }
982     case UPB_TYPE_UINT32: {
983       unsigned long val = strtoul(buf, &end, 0);
984       if (end != bufend) {
985         break;
986       } else if (val > UINT32_MAX || errno == ERANGE) {
987         return false;
988       } else {
989         upb_sink_putuint32(p->top->sink, parser_getsel(p), (uint32_t)val);
990         return true;
991       }
992       UPB_UNREACHABLE();
993     }
994     /* XXX: We can't handle [u]int64 properly on 32-bit machines because
995      * strto[u]ll isn't in C89. */
996     case UPB_TYPE_INT64: {
997       long val = strtol(buf, &end, 0);
998       if (errno == ERANGE || end != bufend) {
999         break;
1000       } else {
1001         upb_sink_putint64(p->top->sink, parser_getsel(p), val);
1002         return true;
1003       }
1004       UPB_UNREACHABLE();
1005     }
1006     case UPB_TYPE_UINT64: {
1007       unsigned long val = strtoul(p->accumulated, &end, 0);
1008       if (end != bufend) {
1009         break;
1010       } else if (errno == ERANGE) {
1011         return false;
1012       } else {
1013         upb_sink_putuint64(p->top->sink, parser_getsel(p), val);
1014         return true;
1015       }
1016       UPB_UNREACHABLE();
1017     }
1018     default:
1019       break;
1020   }
1021 
1022   if (type != UPB_TYPE_DOUBLE && type != UPB_TYPE_FLOAT && is_quoted) {
1023     /* Quoted numbers for integer types are not allowed to be in double form. */
1024     return false;
1025   }
1026 
1027   if (len == strlen("Infinity") && strcmp(buf, "Infinity") == 0) {
1028     /* C89 does not have an INFINITY macro. */
1029     val = inf;
1030   } else if (len == strlen("-Infinity") && strcmp(buf, "-Infinity") == 0) {
1031     val = -inf;
1032   } else {
1033     val = strtod(buf, &end);
1034     if (errno == ERANGE || end != bufend) {
1035       return false;
1036     }
1037   }
1038 
1039   switch (type) {
1040 #define CASE(capitaltype, smalltype, ctype, min, max)                     \
1041     case UPB_TYPE_ ## capitaltype: {                                      \
1042       if (modf(val, &dummy) != 0 || val > max || val < min) {             \
1043         return false;                                                     \
1044       } else {                                                            \
1045         upb_sink_put ## smalltype(p->top->sink, parser_getsel(p),        \
1046                                   (ctype)val);                            \
1047         return true;                                                      \
1048       }                                                                   \
1049       break;                                                              \
1050     }
1051     case UPB_TYPE_ENUM:
1052     CASE(INT32, int32, int32_t, INT32_MIN, INT32_MAX);
1053     CASE(INT64, int64, int64_t, INT64_MIN, INT64_MAX);
1054     CASE(UINT32, uint32, uint32_t, 0, UINT32_MAX);
1055     CASE(UINT64, uint64, uint64_t, 0, UINT64_MAX);
1056 #undef CASE
1057 
1058     case UPB_TYPE_DOUBLE:
1059       upb_sink_putdouble(p->top->sink, parser_getsel(p), val);
1060       return true;
1061     case UPB_TYPE_FLOAT:
1062       if ((val > FLT_MAX || val < -FLT_MAX) && val != inf && val != -inf) {
1063         return false;
1064       } else {
1065         upb_sink_putfloat(p->top->sink, parser_getsel(p), val);
1066         return true;
1067       }
1068     default:
1069       return false;
1070   }
1071 }
1072 
parse_number(upb_json_parser * p,bool is_quoted)1073 static bool parse_number(upb_json_parser *p, bool is_quoted) {
1074   size_t len;
1075   const char *buf;
1076 
1077   /* strtol() and friends unfortunately do not support specifying the length of
1078    * the input string, so we need to force a copy into a NULL-terminated buffer. */
1079   if (!multipart_text(p, "\0", 1, false)) {
1080     return false;
1081   }
1082 
1083   buf = accumulate_getptr(p, &len);
1084 
1085   if (parse_number_from_buffer(p, buf, is_quoted)) {
1086     multipart_end(p);
1087     return true;
1088   } else {
1089     upb_status_seterrf(p->status, "error parsing number: %s", buf);
1090     multipart_end(p);
1091     return false;
1092   }
1093 }
1094 
parser_putbool(upb_json_parser * p,bool val)1095 static bool parser_putbool(upb_json_parser *p, bool val) {
1096   bool ok;
1097 
1098   if (p->top->f == NULL) {
1099     return true;
1100   }
1101 
1102   if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) {
1103     upb_status_seterrf(p->status,
1104                        "Boolean value specified for non-bool field: %s",
1105                        upb_fielddef_name(p->top->f));
1106     return false;
1107   }
1108 
1109   ok = upb_sink_putbool(p->top->sink, parser_getsel(p), val);
1110   UPB_ASSERT(ok);
1111 
1112   return true;
1113 }
1114 
end_bool(upb_json_parser * p,bool val)1115 static bool end_bool(upb_json_parser *p, bool val) {
1116   if (is_top_level(p)) {
1117     if (is_wellknown_msg(p, UPB_WELLKNOWN_BOOLVALUE)) {
1118       start_wrapper_object(p);
1119     } else if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
1120       start_value_object(p, VALUE_BOOLVALUE);
1121     } else {
1122       return false;
1123     }
1124   } else if (is_wellknown_field(p, UPB_WELLKNOWN_BOOLVALUE)) {
1125     if (!start_subobject(p)) {
1126       return false;
1127     }
1128     start_wrapper_object(p);
1129   } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) {
1130     if (!start_subobject(p)) {
1131       return false;
1132     }
1133     start_value_object(p, VALUE_BOOLVALUE);
1134   }
1135 
1136   if (p->top->is_unknown_field) {
1137     return true;
1138   }
1139 
1140   if (!parser_putbool(p, val)) {
1141     return false;
1142   }
1143 
1144   if (is_wellknown_msg(p, UPB_WELLKNOWN_BOOLVALUE)) {
1145     end_wrapper_object(p);
1146     if (!is_top_level(p)) {
1147       end_subobject(p);
1148     }
1149     return true;
1150   }
1151 
1152   if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
1153     end_value_object(p);
1154     if (!is_top_level(p)) {
1155       end_subobject(p);
1156     }
1157     return true;
1158   }
1159 
1160   return true;
1161 }
1162 
end_null(upb_json_parser * p)1163 static bool end_null(upb_json_parser *p) {
1164   const char *zero_ptr = "0";
1165 
1166   if (is_top_level(p)) {
1167     if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
1168       start_value_object(p, VALUE_NULLVALUE);
1169     } else {
1170       return true;
1171     }
1172   } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) {
1173     if (!start_subobject(p)) {
1174       return false;
1175     }
1176     start_value_object(p, VALUE_NULLVALUE);
1177   } else {
1178     return true;
1179   }
1180 
1181   /* Fill null_value field. */
1182   multipart_startaccum(p);
1183   capture_begin(p, zero_ptr);
1184   capture_end(p, zero_ptr + 1);
1185   parse_number(p, false);
1186 
1187   end_value_object(p);
1188   if (!is_top_level(p)) {
1189     end_subobject(p);
1190   }
1191 
1192   return true;
1193 }
1194 
start_any_stringval(upb_json_parser * p)1195 static bool start_any_stringval(upb_json_parser *p) {
1196   multipart_startaccum(p);
1197   return true;
1198 }
1199 
start_stringval(upb_json_parser * p)1200 static bool start_stringval(upb_json_parser *p) {
1201   if (is_top_level(p)) {
1202     if (is_string_wrapper_object(p) ||
1203         is_number_wrapper_object(p)) {
1204       start_wrapper_object(p);
1205     } else if (is_wellknown_msg(p, UPB_WELLKNOWN_FIELDMASK)) {
1206       start_fieldmask_object(p);
1207       return true;
1208     } else if (is_wellknown_msg(p, UPB_WELLKNOWN_TIMESTAMP) ||
1209                is_wellknown_msg(p, UPB_WELLKNOWN_DURATION)) {
1210       start_object(p);
1211     } else if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
1212       start_value_object(p, VALUE_STRINGVALUE);
1213     } else {
1214       return false;
1215     }
1216   } else if (does_string_wrapper_start(p) ||
1217              does_number_wrapper_start(p)) {
1218     if (!start_subobject(p)) {
1219       return false;
1220     }
1221     start_wrapper_object(p);
1222   } else if (does_fieldmask_start(p)) {
1223     if (!start_subobject(p)) {
1224       return false;
1225     }
1226     start_fieldmask_object(p);
1227     return true;
1228   } else if (is_wellknown_field(p, UPB_WELLKNOWN_TIMESTAMP) ||
1229              is_wellknown_field(p, UPB_WELLKNOWN_DURATION)) {
1230     if (!start_subobject(p)) {
1231       return false;
1232     }
1233     start_object(p);
1234   } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) {
1235     if (!start_subobject(p)) {
1236       return false;
1237     }
1238     start_value_object(p, VALUE_STRINGVALUE);
1239   }
1240 
1241   if (p->top->f == NULL) {
1242     multipart_startaccum(p);
1243     return true;
1244   }
1245 
1246   if (p->top->is_any) {
1247     return start_any_stringval(p);
1248   }
1249 
1250   if (upb_fielddef_isstring(p->top->f)) {
1251     upb_jsonparser_frame *inner;
1252     upb_selector_t sel;
1253 
1254     if (!check_stack(p)) return false;
1255 
1256     /* Start a new parser frame: parser frames correspond one-to-one with
1257      * handler frames, and string events occur in a sub-frame. */
1258     inner = start_jsonparser_frame(p);
1259     sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
1260     upb_sink_startstr(p->top->sink, sel, 0, &inner->sink);
1261     inner->m = p->top->m;
1262     inner->f = p->top->f;
1263     p->top = inner;
1264 
1265     if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) {
1266       /* For STRING fields we push data directly to the handlers as it is
1267        * parsed.  We don't do this yet for BYTES fields, because our base64
1268        * decoder is not streaming.
1269        *
1270        * TODO(haberman): make base64 decoding streaming also. */
1271       multipart_start(p, getsel_for_handlertype(p, UPB_HANDLER_STRING));
1272       return true;
1273     } else {
1274       multipart_startaccum(p);
1275       return true;
1276     }
1277   } else if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL &&
1278              upb_fielddef_type(p->top->f) != UPB_TYPE_MESSAGE) {
1279     /* No need to push a frame -- numeric values in quotes remain in the
1280      * current parser frame.  These values must accmulate so we can convert
1281      * them all at once at the end. */
1282     multipart_startaccum(p);
1283     return true;
1284   } else {
1285     upb_status_seterrf(p->status,
1286                        "String specified for bool or submessage field: %s",
1287                        upb_fielddef_name(p->top->f));
1288     return false;
1289   }
1290 }
1291 
end_any_stringval(upb_json_parser * p)1292 static bool end_any_stringval(upb_json_parser *p) {
1293   size_t len;
1294   const char *buf = accumulate_getptr(p, &len);
1295 
1296   /* Set type_url */
1297   upb_selector_t sel;
1298   upb_jsonparser_frame *inner;
1299   if (!check_stack(p)) return false;
1300   inner = p->top + 1;
1301 
1302   sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
1303   upb_sink_startstr(p->top->sink, sel, 0, &inner->sink);
1304   sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
1305   upb_sink_putstring(inner->sink, sel, buf, len, NULL);
1306   sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
1307   upb_sink_endstr(inner->sink, sel);
1308 
1309   multipart_end(p);
1310 
1311   /* Resolve type url */
1312   if (strncmp(buf, "type.googleapis.com/", 20) == 0 && len > 20) {
1313     const upb_msgdef *payload_type = NULL;
1314     buf += 20;
1315     len -= 20;
1316 
1317     payload_type = upb_symtab_lookupmsg2(p->symtab, buf, len);
1318     if (payload_type == NULL) {
1319       upb_status_seterrf(
1320           p->status, "Cannot find packed type: %.*s\n", (int)len, buf);
1321       return false;
1322     }
1323 
1324     json_parser_any_frame_set_payload_type(p, p->top->any_frame, payload_type);
1325 
1326     return true;
1327   } else {
1328     upb_status_seterrf(
1329         p->status, "Invalid type url: %.*s\n", (int)len, buf);
1330     return false;
1331   }
1332 }
1333 
end_stringval_nontop(upb_json_parser * p)1334 static bool end_stringval_nontop(upb_json_parser *p) {
1335   bool ok = true;
1336 
1337   if (is_wellknown_msg(p, UPB_WELLKNOWN_TIMESTAMP) ||
1338       is_wellknown_msg(p, UPB_WELLKNOWN_DURATION)) {
1339     multipart_end(p);
1340     return true;
1341   }
1342 
1343   if (p->top->f == NULL) {
1344     multipart_end(p);
1345     return true;
1346   }
1347 
1348   if (p->top->is_any) {
1349     return end_any_stringval(p);
1350   }
1351 
1352   switch (upb_fielddef_type(p->top->f)) {
1353     case UPB_TYPE_BYTES:
1354       if (!base64_push(p, getsel_for_handlertype(p, UPB_HANDLER_STRING),
1355                        p->accumulated, p->accumulated_len)) {
1356         return false;
1357       }
1358       /* Fall through. */
1359 
1360     case UPB_TYPE_STRING: {
1361       upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
1362       upb_sink_endstr(p->top->sink, sel);
1363       p->top--;
1364       break;
1365     }
1366 
1367     case UPB_TYPE_ENUM: {
1368       /* Resolve enum symbolic name to integer value. */
1369       const upb_enumdef *enumdef = upb_fielddef_enumsubdef(p->top->f);
1370 
1371       size_t len;
1372       const char *buf = accumulate_getptr(p, &len);
1373 
1374       int32_t int_val = 0;
1375       ok = upb_enumdef_ntoi(enumdef, buf, len, &int_val);
1376 
1377       if (ok) {
1378         upb_selector_t sel = parser_getsel(p);
1379         upb_sink_putint32(p->top->sink, sel, int_val);
1380       } else {
1381         if (p->ignore_json_unknown) {
1382           ok = true;
1383           /* TODO(teboring): Should also clean this field. */
1384         } else {
1385           upb_status_seterrf(p->status, "Enum value unknown: '%.*s'", len, buf);
1386         }
1387       }
1388 
1389       break;
1390     }
1391 
1392     case UPB_TYPE_INT32:
1393     case UPB_TYPE_INT64:
1394     case UPB_TYPE_UINT32:
1395     case UPB_TYPE_UINT64:
1396     case UPB_TYPE_DOUBLE:
1397     case UPB_TYPE_FLOAT:
1398       ok = parse_number(p, true);
1399       break;
1400 
1401     default:
1402       UPB_ASSERT(false);
1403       upb_status_seterrmsg(p->status, "Internal error in JSON decoder");
1404       ok = false;
1405       break;
1406   }
1407 
1408   multipart_end(p);
1409 
1410   return ok;
1411 }
1412 
end_stringval(upb_json_parser * p)1413 static bool end_stringval(upb_json_parser *p) {
1414   /* FieldMask's stringvals have been ended when handling them. Only need to
1415    * close FieldMask here.*/
1416   if (does_fieldmask_end(p)) {
1417     end_fieldmask_object(p);
1418     if (!is_top_level(p)) {
1419       end_subobject(p);
1420     }
1421     return true;
1422   }
1423 
1424   if (!end_stringval_nontop(p)) {
1425     return false;
1426   }
1427 
1428   if (does_string_wrapper_end(p) ||
1429       does_number_wrapper_end(p)) {
1430     end_wrapper_object(p);
1431     if (!is_top_level(p)) {
1432       end_subobject(p);
1433     }
1434     return true;
1435   }
1436 
1437   if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
1438     end_value_object(p);
1439     if (!is_top_level(p)) {
1440       end_subobject(p);
1441     }
1442     return true;
1443   }
1444 
1445   if (is_wellknown_msg(p, UPB_WELLKNOWN_TIMESTAMP) ||
1446       is_wellknown_msg(p, UPB_WELLKNOWN_DURATION) ||
1447       is_wellknown_msg(p, UPB_WELLKNOWN_FIELDMASK)) {
1448     end_object(p);
1449     if (!is_top_level(p)) {
1450       end_subobject(p);
1451     }
1452     return true;
1453   }
1454 
1455   return true;
1456 }
1457 
start_duration_base(upb_json_parser * p,const char * ptr)1458 static void start_duration_base(upb_json_parser *p, const char *ptr) {
1459   capture_begin(p, ptr);
1460 }
1461 
end_duration_base(upb_json_parser * p,const char * ptr)1462 static bool end_duration_base(upb_json_parser *p, const char *ptr) {
1463   size_t len;
1464   const char *buf;
1465   char seconds_buf[14];
1466   char nanos_buf[12];
1467   char *end;
1468   int64_t seconds = 0;
1469   int32_t nanos = 0;
1470   double val = 0.0;
1471   const char *seconds_membername = "seconds";
1472   const char *nanos_membername = "nanos";
1473   size_t fraction_start;
1474 
1475   if (!capture_end(p, ptr)) {
1476     return false;
1477   }
1478 
1479   buf = accumulate_getptr(p, &len);
1480 
1481   memset(seconds_buf, 0, 14);
1482   memset(nanos_buf, 0, 12);
1483 
1484   /* Find out base end. The maximus duration is 315576000000, which cannot be
1485    * represented by double without losing precision. Thus, we need to handle
1486    * fraction and base separately. */
1487   for (fraction_start = 0; fraction_start < len && buf[fraction_start] != '.';
1488        fraction_start++);
1489 
1490   /* Parse base */
1491   memcpy(seconds_buf, buf, fraction_start);
1492   seconds = strtol(seconds_buf, &end, 10);
1493   if (errno == ERANGE || end != seconds_buf + fraction_start) {
1494     upb_status_seterrf(p->status, "error parsing duration: %s",
1495                        seconds_buf);
1496     return false;
1497   }
1498 
1499   if (seconds > 315576000000) {
1500     upb_status_seterrf(p->status, "error parsing duration: "
1501                                    "maximum acceptable value is "
1502                                    "315576000000");
1503     return false;
1504   }
1505 
1506   if (seconds < -315576000000) {
1507     upb_status_seterrf(p->status, "error parsing duration: "
1508                                    "minimum acceptable value is "
1509                                    "-315576000000");
1510     return false;
1511   }
1512 
1513   /* Parse fraction */
1514   nanos_buf[0] = '0';
1515   memcpy(nanos_buf + 1, buf + fraction_start, len - fraction_start);
1516   val = strtod(nanos_buf, &end);
1517   if (errno == ERANGE || end != nanos_buf + len - fraction_start + 1) {
1518     upb_status_seterrf(p->status, "error parsing duration: %s",
1519                        nanos_buf);
1520     return false;
1521   }
1522 
1523   nanos = val * 1000000000;
1524   if (seconds < 0) nanos = -nanos;
1525 
1526   /* Clean up buffer */
1527   multipart_end(p);
1528 
1529   /* Set seconds */
1530   start_member(p);
1531   capture_begin(p, seconds_membername);
1532   capture_end(p, seconds_membername + 7);
1533   end_membername(p);
1534   upb_sink_putint64(p->top->sink, parser_getsel(p), seconds);
1535   end_member(p);
1536 
1537   /* Set nanos */
1538   start_member(p);
1539   capture_begin(p, nanos_membername);
1540   capture_end(p, nanos_membername + 5);
1541   end_membername(p);
1542   upb_sink_putint32(p->top->sink, parser_getsel(p), nanos);
1543   end_member(p);
1544 
1545   /* Continue previous arena */
1546   multipart_startaccum(p);
1547 
1548   return true;
1549 }
1550 
parse_timestamp_number(upb_json_parser * p)1551 static int parse_timestamp_number(upb_json_parser *p) {
1552   size_t len;
1553   const char *buf;
1554   int val;
1555 
1556   /* atoi() and friends unfortunately do not support specifying the length of
1557    * the input string, so we need to force a copy into a NULL-terminated buffer. */
1558   multipart_text(p, "\0", 1, false);
1559 
1560   buf = accumulate_getptr(p, &len);
1561   val = atoi(buf);
1562   multipart_end(p);
1563   multipart_startaccum(p);
1564 
1565   return val;
1566 }
1567 
start_year(upb_json_parser * p,const char * ptr)1568 static void start_year(upb_json_parser *p, const char *ptr) {
1569   capture_begin(p, ptr);
1570 }
1571 
end_year(upb_json_parser * p,const char * ptr)1572 static bool end_year(upb_json_parser *p, const char *ptr) {
1573   if (!capture_end(p, ptr)) {
1574     return false;
1575   }
1576   p->tm.tm_year = parse_timestamp_number(p) - 1900;
1577   return true;
1578 }
1579 
start_month(upb_json_parser * p,const char * ptr)1580 static void start_month(upb_json_parser *p, const char *ptr) {
1581   capture_begin(p, ptr);
1582 }
1583 
end_month(upb_json_parser * p,const char * ptr)1584 static bool end_month(upb_json_parser *p, const char *ptr) {
1585   if (!capture_end(p, ptr)) {
1586     return false;
1587   }
1588   p->tm.tm_mon = parse_timestamp_number(p) - 1;
1589   return true;
1590 }
1591 
start_day(upb_json_parser * p,const char * ptr)1592 static void start_day(upb_json_parser *p, const char *ptr) {
1593   capture_begin(p, ptr);
1594 }
1595 
end_day(upb_json_parser * p,const char * ptr)1596 static bool end_day(upb_json_parser *p, const char *ptr) {
1597   if (!capture_end(p, ptr)) {
1598     return false;
1599   }
1600   p->tm.tm_mday = parse_timestamp_number(p);
1601   return true;
1602 }
1603 
start_hour(upb_json_parser * p,const char * ptr)1604 static void start_hour(upb_json_parser *p, const char *ptr) {
1605   capture_begin(p, ptr);
1606 }
1607 
end_hour(upb_json_parser * p,const char * ptr)1608 static bool end_hour(upb_json_parser *p, const char *ptr) {
1609   if (!capture_end(p, ptr)) {
1610     return false;
1611   }
1612   p->tm.tm_hour = parse_timestamp_number(p);
1613   return true;
1614 }
1615 
start_minute(upb_json_parser * p,const char * ptr)1616 static void start_minute(upb_json_parser *p, const char *ptr) {
1617   capture_begin(p, ptr);
1618 }
1619 
end_minute(upb_json_parser * p,const char * ptr)1620 static bool end_minute(upb_json_parser *p, const char *ptr) {
1621   if (!capture_end(p, ptr)) {
1622     return false;
1623   }
1624   p->tm.tm_min = parse_timestamp_number(p);
1625   return true;
1626 }
1627 
start_second(upb_json_parser * p,const char * ptr)1628 static void start_second(upb_json_parser *p, const char *ptr) {
1629   capture_begin(p, ptr);
1630 }
1631 
end_second(upb_json_parser * p,const char * ptr)1632 static bool end_second(upb_json_parser *p, const char *ptr) {
1633   if (!capture_end(p, ptr)) {
1634     return false;
1635   }
1636   p->tm.tm_sec = parse_timestamp_number(p);
1637   return true;
1638 }
1639 
start_timestamp_base(upb_json_parser * p)1640 static void start_timestamp_base(upb_json_parser *p) {
1641   memset(&p->tm, 0, sizeof(struct tm));
1642 }
1643 
start_timestamp_fraction(upb_json_parser * p,const char * ptr)1644 static void start_timestamp_fraction(upb_json_parser *p, const char *ptr) {
1645   capture_begin(p, ptr);
1646 }
1647 
end_timestamp_fraction(upb_json_parser * p,const char * ptr)1648 static bool end_timestamp_fraction(upb_json_parser *p, const char *ptr) {
1649   size_t len;
1650   const char *buf;
1651   char nanos_buf[12];
1652   char *end;
1653   double val = 0.0;
1654   int32_t nanos;
1655   const char *nanos_membername = "nanos";
1656 
1657   memset(nanos_buf, 0, 12);
1658 
1659   if (!capture_end(p, ptr)) {
1660     return false;
1661   }
1662 
1663   buf = accumulate_getptr(p, &len);
1664 
1665   if (len > 10) {
1666     upb_status_seterrf(p->status,
1667         "error parsing timestamp: at most 9-digit fraction.");
1668     return false;
1669   }
1670 
1671   /* Parse nanos */
1672   nanos_buf[0] = '0';
1673   memcpy(nanos_buf + 1, buf, len);
1674   val = strtod(nanos_buf, &end);
1675 
1676   if (errno == ERANGE || end != nanos_buf + len + 1) {
1677     upb_status_seterrf(p->status, "error parsing timestamp nanos: %s",
1678                        nanos_buf);
1679     return false;
1680   }
1681 
1682   nanos = val * 1000000000;
1683 
1684   /* Clean up previous environment */
1685   multipart_end(p);
1686 
1687   /* Set nanos */
1688   start_member(p);
1689   capture_begin(p, nanos_membername);
1690   capture_end(p, nanos_membername + 5);
1691   end_membername(p);
1692   upb_sink_putint32(p->top->sink, parser_getsel(p), nanos);
1693   end_member(p);
1694 
1695   /* Continue previous environment */
1696   multipart_startaccum(p);
1697 
1698   return true;
1699 }
1700 
start_timestamp_zone(upb_json_parser * p,const char * ptr)1701 static void start_timestamp_zone(upb_json_parser *p, const char *ptr) {
1702   capture_begin(p, ptr);
1703 }
1704 
1705 /* epoch_days(1970, 1, 1) == 1970-01-01 == 0. */
epoch_days(int year,int month,int day)1706 static int epoch_days(int year, int month, int day) {
1707   static const uint16_t month_yday[12] = {0,   31,  59,  90,  120, 151,
1708                                           181, 212, 243, 273, 304, 334};
1709   uint32_t year_adj = year + 4800;  /* Ensure positive year, multiple of 400. */
1710   uint32_t febs = year_adj - (month <= 2 ? 1 : 0);  /* Februaries since base. */
1711   uint32_t leap_days = 1 + (febs / 4) - (febs / 100) + (febs / 400);
1712   uint32_t days = 365 * year_adj + leap_days + month_yday[month - 1] + day - 1;
1713   return days - 2472692;  /* Adjust to Unix epoch. */
1714 }
1715 
upb_timegm(const struct tm * tp)1716 static int64_t upb_timegm(const struct tm *tp) {
1717   int64_t ret = epoch_days(tp->tm_year + 1900, tp->tm_mon + 1, tp->tm_mday);
1718   ret = (ret * 24) + tp->tm_hour;
1719   ret = (ret * 60) + tp->tm_min;
1720   ret = (ret * 60) + tp->tm_sec;
1721   return ret;
1722 }
1723 
end_timestamp_zone(upb_json_parser * p,const char * ptr)1724 static bool end_timestamp_zone(upb_json_parser *p, const char *ptr) {
1725   size_t len;
1726   const char *buf;
1727   int hours;
1728   int64_t seconds;
1729   const char *seconds_membername = "seconds";
1730 
1731   if (!capture_end(p, ptr)) {
1732     return false;
1733   }
1734 
1735   buf = accumulate_getptr(p, &len);
1736 
1737   if (buf[0] != 'Z') {
1738     if (sscanf(buf + 1, "%2d:00", &hours) != 1) {
1739       upb_status_seterrf(p->status, "error parsing timestamp offset");
1740       return false;
1741     }
1742 
1743     if (buf[0] == '+') {
1744       hours = -hours;
1745     }
1746 
1747     p->tm.tm_hour += hours;
1748   }
1749 
1750   /* Normalize tm */
1751   seconds = upb_timegm(&p->tm);
1752 
1753   /* Check timestamp boundary */
1754   if (seconds < -62135596800) {
1755     upb_status_seterrf(p->status, "error parsing timestamp: "
1756                                    "minimum acceptable value is "
1757                                    "0001-01-01T00:00:00Z");
1758     return false;
1759   }
1760 
1761   /* Clean up previous environment */
1762   multipart_end(p);
1763 
1764   /* Set seconds */
1765   start_member(p);
1766   capture_begin(p, seconds_membername);
1767   capture_end(p, seconds_membername + 7);
1768   end_membername(p);
1769   upb_sink_putint64(p->top->sink, parser_getsel(p), seconds);
1770   end_member(p);
1771 
1772   /* Continue previous environment */
1773   multipart_startaccum(p);
1774 
1775   return true;
1776 }
1777 
start_fieldmask_path_text(upb_json_parser * p,const char * ptr)1778 static void start_fieldmask_path_text(upb_json_parser *p, const char *ptr) {
1779   capture_begin(p, ptr);
1780 }
1781 
end_fieldmask_path_text(upb_json_parser * p,const char * ptr)1782 static bool end_fieldmask_path_text(upb_json_parser *p, const char *ptr) {
1783   return capture_end(p, ptr);
1784 }
1785 
start_fieldmask_path(upb_json_parser * p)1786 static bool start_fieldmask_path(upb_json_parser *p) {
1787   upb_jsonparser_frame *inner;
1788   upb_selector_t sel;
1789 
1790   if (!check_stack(p)) return false;
1791 
1792   /* Start a new parser frame: parser frames correspond one-to-one with
1793    * handler frames, and string events occur in a sub-frame. */
1794   inner = start_jsonparser_frame(p);
1795   sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
1796   upb_sink_startstr(p->top->sink, sel, 0, &inner->sink);
1797   inner->m = p->top->m;
1798   inner->f = p->top->f;
1799   p->top = inner;
1800 
1801   multipart_startaccum(p);
1802   return true;
1803 }
1804 
lower_camel_push(upb_json_parser * p,upb_selector_t sel,const char * ptr,size_t len)1805 static bool lower_camel_push(
1806     upb_json_parser *p, upb_selector_t sel, const char *ptr, size_t len) {
1807   const char *limit = ptr + len;
1808   bool first = true;
1809   for (;ptr < limit; ptr++) {
1810     if (*ptr >= 'A' && *ptr <= 'Z' && !first) {
1811       char lower = tolower(*ptr);
1812       upb_sink_putstring(p->top->sink, sel, "_", 1, NULL);
1813       upb_sink_putstring(p->top->sink, sel, &lower, 1, NULL);
1814     } else {
1815       upb_sink_putstring(p->top->sink, sel, ptr, 1, NULL);
1816     }
1817     first = false;
1818   }
1819   return true;
1820 }
1821 
end_fieldmask_path(upb_json_parser * p)1822 static bool end_fieldmask_path(upb_json_parser *p) {
1823   upb_selector_t sel;
1824 
1825   if (!lower_camel_push(
1826            p, getsel_for_handlertype(p, UPB_HANDLER_STRING),
1827            p->accumulated, p->accumulated_len)) {
1828     return false;
1829   }
1830 
1831   sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
1832   upb_sink_endstr(p->top->sink, sel);
1833   p->top--;
1834 
1835   multipart_end(p);
1836   return true;
1837 }
1838 
start_member(upb_json_parser * p)1839 static void start_member(upb_json_parser *p) {
1840   UPB_ASSERT(!p->top->f);
1841   multipart_startaccum(p);
1842 }
1843 
1844 /* Helper: invoked during parse_mapentry() to emit the mapentry message's key
1845  * field based on the current contents of the accumulate buffer. */
parse_mapentry_key(upb_json_parser * p)1846 static bool parse_mapentry_key(upb_json_parser *p) {
1847 
1848   size_t len;
1849   const char *buf = accumulate_getptr(p, &len);
1850 
1851   /* Emit the key field. We do a bit of ad-hoc parsing here because the
1852    * parser state machine has already decided that this is a string field
1853    * name, and we are reinterpreting it as some arbitrary key type. In
1854    * particular, integer and bool keys are quoted, so we need to parse the
1855    * quoted string contents here. */
1856 
1857   p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_KEY);
1858   if (p->top->f == NULL) {
1859     upb_status_seterrmsg(p->status, "mapentry message has no key");
1860     return false;
1861   }
1862   switch (upb_fielddef_type(p->top->f)) {
1863     case UPB_TYPE_INT32:
1864     case UPB_TYPE_INT64:
1865     case UPB_TYPE_UINT32:
1866     case UPB_TYPE_UINT64:
1867       /* Invoke end_number. The accum buffer has the number's text already. */
1868       if (!parse_number(p, true)) {
1869         return false;
1870       }
1871       break;
1872     case UPB_TYPE_BOOL:
1873       if (len == 4 && !strncmp(buf, "true", 4)) {
1874         if (!parser_putbool(p, true)) {
1875           return false;
1876         }
1877       } else if (len == 5 && !strncmp(buf, "false", 5)) {
1878         if (!parser_putbool(p, false)) {
1879           return false;
1880         }
1881       } else {
1882         upb_status_seterrmsg(p->status,
1883                              "Map bool key not 'true' or 'false'");
1884         return false;
1885       }
1886       multipart_end(p);
1887       break;
1888     case UPB_TYPE_STRING:
1889     case UPB_TYPE_BYTES: {
1890       upb_sink subsink;
1891       upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
1892       upb_sink_startstr(p->top->sink, sel, len, &subsink);
1893       sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
1894       upb_sink_putstring(subsink, sel, buf, len, NULL);
1895       sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
1896       upb_sink_endstr(subsink, sel);
1897       multipart_end(p);
1898       break;
1899     }
1900     default:
1901       upb_status_seterrmsg(p->status, "Invalid field type for map key");
1902       return false;
1903   }
1904 
1905   return true;
1906 }
1907 
1908 /* Helper: emit one map entry (as a submessage in the map field sequence). This
1909  * is invoked from end_membername(), at the end of the map entry's key string,
1910  * with the map key in the accumulate buffer. It parses the key from that
1911  * buffer, emits the handler calls to start the mapentry submessage (setting up
1912  * its subframe in the process), and sets up state in the subframe so that the
1913  * value parser (invoked next) will emit the mapentry's value field and then
1914  * end the mapentry message. */
1915 
handle_mapentry(upb_json_parser * p)1916 static bool handle_mapentry(upb_json_parser *p) {
1917   const upb_fielddef *mapfield;
1918   const upb_msgdef *mapentrymsg;
1919   upb_jsonparser_frame *inner;
1920   upb_selector_t sel;
1921 
1922   /* Map entry: p->top->sink is the seq frame, so we need to start a frame
1923    * for the mapentry itself, and then set |f| in that frame so that the map
1924    * value field is parsed, and also set a flag to end the frame after the
1925    * map-entry value is parsed. */
1926   if (!check_stack(p)) return false;
1927 
1928   mapfield = p->top->mapfield;
1929   mapentrymsg = upb_fielddef_msgsubdef(mapfield);
1930 
1931   inner = start_jsonparser_frame(p);
1932   p->top->f = mapfield;
1933   sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
1934   upb_sink_startsubmsg(p->top->sink, sel, &inner->sink);
1935   inner->m = mapentrymsg;
1936   inner->mapfield = mapfield;
1937 
1938   /* Don't set this to true *yet* -- we reuse parsing handlers below to push
1939    * the key field value to the sink, and these handlers will pop the frame
1940    * if they see is_mapentry (when invoked by the parser state machine, they
1941    * would have just seen the map-entry value, not key). */
1942   inner->is_mapentry = false;
1943   p->top = inner;
1944 
1945   /* send STARTMSG in submsg frame. */
1946   upb_sink_startmsg(p->top->sink);
1947 
1948   parse_mapentry_key(p);
1949 
1950   /* Set up the value field to receive the map-entry value. */
1951   p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_VALUE);
1952   p->top->is_mapentry = true;  /* set up to pop frame after value is parsed. */
1953   p->top->mapfield = mapfield;
1954   if (p->top->f == NULL) {
1955     upb_status_seterrmsg(p->status, "mapentry message has no value");
1956     return false;
1957   }
1958 
1959   return true;
1960 }
1961 
end_membername(upb_json_parser * p)1962 static bool end_membername(upb_json_parser *p) {
1963   UPB_ASSERT(!p->top->f);
1964 
1965   if (!p->top->m) {
1966     p->top->is_unknown_field = true;
1967     multipart_end(p);
1968     return true;
1969   }
1970 
1971   if (p->top->is_any) {
1972     return end_any_membername(p);
1973   } else if (p->top->is_map) {
1974     return handle_mapentry(p);
1975   } else {
1976     size_t len;
1977     const char *buf = accumulate_getptr(p, &len);
1978     upb_value v;
1979 
1980     if (upb_strtable_lookup2(p->top->name_table, buf, len, &v)) {
1981       p->top->f = upb_value_getconstptr(v);
1982       multipart_end(p);
1983 
1984       return true;
1985     } else if (p->ignore_json_unknown) {
1986       p->top->is_unknown_field = true;
1987       multipart_end(p);
1988       return true;
1989     } else {
1990       upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf);
1991       return false;
1992     }
1993   }
1994 }
1995 
end_any_membername(upb_json_parser * p)1996 static bool end_any_membername(upb_json_parser *p) {
1997   size_t len;
1998   const char *buf = accumulate_getptr(p, &len);
1999   upb_value v;
2000 
2001   if (len == 5 && strncmp(buf, "@type", len) == 0) {
2002     upb_strtable_lookup2(p->top->name_table, "type_url", 8, &v);
2003     p->top->f = upb_value_getconstptr(v);
2004     multipart_end(p);
2005     return true;
2006   } else {
2007     p->top->is_unknown_field = true;
2008     multipart_end(p);
2009     return true;
2010   }
2011 }
2012 
end_member(upb_json_parser * p)2013 static void end_member(upb_json_parser *p) {
2014   /* If we just parsed a map-entry value, end that frame too. */
2015   if (p->top->is_mapentry) {
2016     upb_selector_t sel;
2017     bool ok;
2018     const upb_fielddef *mapfield;
2019 
2020     UPB_ASSERT(p->top > p->stack);
2021     /* send ENDMSG on submsg. */
2022     upb_sink_endmsg(p->top->sink, p->status);
2023     mapfield = p->top->mapfield;
2024 
2025     /* send ENDSUBMSG in repeated-field-of-mapentries frame. */
2026     p->top--;
2027     ok = upb_handlers_getselector(mapfield, UPB_HANDLER_ENDSUBMSG, &sel);
2028     UPB_ASSUME(ok);
2029     upb_sink_endsubmsg(p->top->sink, (p->top + 1)->sink, sel);
2030   }
2031 
2032   p->top->f = NULL;
2033   p->top->is_unknown_field = false;
2034 }
2035 
start_any_member(upb_json_parser * p,const char * ptr)2036 static void start_any_member(upb_json_parser *p, const char *ptr) {
2037   start_member(p);
2038   json_parser_any_frame_set_after_type_url_start_once(p->top->any_frame, ptr);
2039 }
2040 
end_any_member(upb_json_parser * p,const char * ptr)2041 static void end_any_member(upb_json_parser *p, const char *ptr) {
2042   json_parser_any_frame_set_before_type_url_end(p->top->any_frame, ptr);
2043   end_member(p);
2044 }
2045 
start_subobject(upb_json_parser * p)2046 static bool start_subobject(upb_json_parser *p) {
2047   if (p->top->is_unknown_field) {
2048     if (!check_stack(p)) return false;
2049 
2050     p->top = start_jsonparser_frame(p);
2051     return true;
2052   }
2053 
2054   if (upb_fielddef_ismap(p->top->f)) {
2055     upb_jsonparser_frame *inner;
2056     upb_selector_t sel;
2057 
2058     /* Beginning of a map. Start a new parser frame in a repeated-field
2059      * context. */
2060     if (!check_stack(p)) return false;
2061 
2062     inner = start_jsonparser_frame(p);
2063     sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
2064     upb_sink_startseq(p->top->sink, sel, &inner->sink);
2065     inner->m = upb_fielddef_msgsubdef(p->top->f);
2066     inner->mapfield = p->top->f;
2067     inner->is_map = true;
2068     p->top = inner;
2069 
2070     return true;
2071   } else if (upb_fielddef_issubmsg(p->top->f)) {
2072     upb_jsonparser_frame *inner;
2073     upb_selector_t sel;
2074 
2075     /* Beginning of a subobject. Start a new parser frame in the submsg
2076      * context. */
2077     if (!check_stack(p)) return false;
2078 
2079     inner = start_jsonparser_frame(p);
2080     sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
2081     upb_sink_startsubmsg(p->top->sink, sel, &inner->sink);
2082     inner->m = upb_fielddef_msgsubdef(p->top->f);
2083     set_name_table(p, inner);
2084     p->top = inner;
2085 
2086     if (is_wellknown_msg(p, UPB_WELLKNOWN_ANY)) {
2087       p->top->is_any = true;
2088       p->top->any_frame = json_parser_any_frame_new(p);
2089     } else {
2090       p->top->is_any = false;
2091       p->top->any_frame = NULL;
2092     }
2093 
2094     return true;
2095   } else {
2096     upb_status_seterrf(p->status,
2097                        "Object specified for non-message/group field: %s",
2098                        upb_fielddef_name(p->top->f));
2099     return false;
2100   }
2101 }
2102 
start_subobject_full(upb_json_parser * p)2103 static bool start_subobject_full(upb_json_parser *p) {
2104   if (is_top_level(p)) {
2105     if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
2106       start_value_object(p, VALUE_STRUCTVALUE);
2107       if (!start_subobject(p)) return false;
2108       start_structvalue_object(p);
2109     } else if (is_wellknown_msg(p, UPB_WELLKNOWN_STRUCT)) {
2110       start_structvalue_object(p);
2111     } else {
2112       return true;
2113     }
2114   } else if (is_wellknown_field(p, UPB_WELLKNOWN_STRUCT)) {
2115     if (!start_subobject(p)) return false;
2116     start_structvalue_object(p);
2117   } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) {
2118     if (!start_subobject(p)) return false;
2119     start_value_object(p, VALUE_STRUCTVALUE);
2120     if (!start_subobject(p)) return false;
2121     start_structvalue_object(p);
2122   }
2123 
2124   return start_subobject(p);
2125 }
2126 
end_subobject(upb_json_parser * p)2127 static void end_subobject(upb_json_parser *p) {
2128   if (is_top_level(p)) {
2129     return;
2130   }
2131 
2132   if (p->top->is_map) {
2133     upb_selector_t sel;
2134     p->top--;
2135     sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
2136     upb_sink_endseq(p->top->sink, sel);
2137   } else {
2138     upb_selector_t sel;
2139     bool is_unknown = p->top->m == NULL;
2140     p->top--;
2141     if (!is_unknown) {
2142       sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
2143       upb_sink_endsubmsg(p->top->sink, (p->top + 1)->sink, sel);
2144     }
2145   }
2146 }
2147 
end_subobject_full(upb_json_parser * p)2148 static void end_subobject_full(upb_json_parser *p) {
2149   end_subobject(p);
2150 
2151   if (is_wellknown_msg(p, UPB_WELLKNOWN_STRUCT)) {
2152     end_structvalue_object(p);
2153     if (!is_top_level(p)) {
2154       end_subobject(p);
2155     }
2156   }
2157 
2158   if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
2159     end_value_object(p);
2160     if (!is_top_level(p)) {
2161       end_subobject(p);
2162     }
2163   }
2164 }
2165 
start_array(upb_json_parser * p)2166 static bool start_array(upb_json_parser *p) {
2167   upb_jsonparser_frame *inner;
2168   upb_selector_t sel;
2169 
2170   if (is_top_level(p)) {
2171     if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
2172       start_value_object(p, VALUE_LISTVALUE);
2173       if (!start_subobject(p)) return false;
2174       start_listvalue_object(p);
2175     } else if (is_wellknown_msg(p, UPB_WELLKNOWN_LISTVALUE)) {
2176       start_listvalue_object(p);
2177     } else {
2178       return false;
2179     }
2180   } else if (is_wellknown_field(p, UPB_WELLKNOWN_LISTVALUE) &&
2181              (!upb_fielddef_isseq(p->top->f) ||
2182               p->top->is_repeated)) {
2183     if (!start_subobject(p)) return false;
2184     start_listvalue_object(p);
2185   } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE) &&
2186              (!upb_fielddef_isseq(p->top->f) ||
2187               p->top->is_repeated)) {
2188     if (!start_subobject(p)) return false;
2189     start_value_object(p, VALUE_LISTVALUE);
2190     if (!start_subobject(p)) return false;
2191     start_listvalue_object(p);
2192   }
2193 
2194   if (p->top->is_unknown_field) {
2195     inner = start_jsonparser_frame(p);
2196     inner->is_unknown_field = true;
2197     p->top = inner;
2198 
2199     return true;
2200   }
2201 
2202   if (!upb_fielddef_isseq(p->top->f)) {
2203     upb_status_seterrf(p->status,
2204                        "Array specified for non-repeated field: %s",
2205                        upb_fielddef_name(p->top->f));
2206     return false;
2207   }
2208 
2209   if (!check_stack(p)) return false;
2210 
2211   inner = start_jsonparser_frame(p);
2212   sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
2213   upb_sink_startseq(p->top->sink, sel, &inner->sink);
2214   inner->m = p->top->m;
2215   inner->f = p->top->f;
2216   inner->is_repeated = true;
2217   p->top = inner;
2218 
2219   return true;
2220 }
2221 
end_array(upb_json_parser * p)2222 static void end_array(upb_json_parser *p) {
2223   upb_selector_t sel;
2224 
2225   UPB_ASSERT(p->top > p->stack);
2226 
2227   p->top--;
2228 
2229   if (p->top->is_unknown_field) {
2230     return;
2231   }
2232 
2233   sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
2234   upb_sink_endseq(p->top->sink, sel);
2235 
2236   if (is_wellknown_msg(p, UPB_WELLKNOWN_LISTVALUE)) {
2237     end_listvalue_object(p);
2238     if (!is_top_level(p)) {
2239       end_subobject(p);
2240     }
2241   }
2242 
2243   if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
2244     end_value_object(p);
2245     if (!is_top_level(p)) {
2246       end_subobject(p);
2247     }
2248   }
2249 }
2250 
start_object(upb_json_parser * p)2251 static void start_object(upb_json_parser *p) {
2252   if (!p->top->is_map && p->top->m != NULL) {
2253     upb_sink_startmsg(p->top->sink);
2254   }
2255 }
2256 
end_object(upb_json_parser * p)2257 static void end_object(upb_json_parser *p) {
2258   if (!p->top->is_map && p->top->m != NULL) {
2259     upb_sink_endmsg(p->top->sink, p->status);
2260   }
2261 }
2262 
start_any_object(upb_json_parser * p,const char * ptr)2263 static void start_any_object(upb_json_parser *p, const char *ptr) {
2264   start_object(p);
2265   p->top->any_frame->before_type_url_start = ptr;
2266   p->top->any_frame->before_type_url_end = ptr;
2267 }
2268 
end_any_object(upb_json_parser * p,const char * ptr)2269 static bool end_any_object(upb_json_parser *p, const char *ptr) {
2270   const char *value_membername = "value";
2271   bool is_well_known_packed = false;
2272   const char *packed_end = ptr + 1;
2273   upb_selector_t sel;
2274   upb_jsonparser_frame *inner;
2275 
2276   if (json_parser_any_frame_has_value(p->top->any_frame) &&
2277       !json_parser_any_frame_has_type_url(p->top->any_frame)) {
2278     upb_status_seterrmsg(p->status, "No valid type url");
2279     return false;
2280   }
2281 
2282   /* Well known types data is represented as value field. */
2283   if (upb_msgdef_wellknowntype(p->top->any_frame->parser->top->m) !=
2284           UPB_WELLKNOWN_UNSPECIFIED) {
2285     is_well_known_packed = true;
2286 
2287     if (json_parser_any_frame_has_value_before_type_url(p->top->any_frame)) {
2288       p->top->any_frame->before_type_url_start =
2289           memchr(p->top->any_frame->before_type_url_start, ':',
2290                  p->top->any_frame->before_type_url_end -
2291                  p->top->any_frame->before_type_url_start);
2292       if (p->top->any_frame->before_type_url_start == NULL) {
2293         upb_status_seterrmsg(p->status, "invalid data for well known type.");
2294         return false;
2295       }
2296       p->top->any_frame->before_type_url_start++;
2297     }
2298 
2299     if (json_parser_any_frame_has_value_after_type_url(p->top->any_frame)) {
2300       p->top->any_frame->after_type_url_start =
2301           memchr(p->top->any_frame->after_type_url_start, ':',
2302                  (ptr + 1) -
2303                  p->top->any_frame->after_type_url_start);
2304       if (p->top->any_frame->after_type_url_start == NULL) {
2305         upb_status_seterrmsg(p->status, "Invalid data for well known type.");
2306         return false;
2307       }
2308       p->top->any_frame->after_type_url_start++;
2309       packed_end = ptr;
2310     }
2311   }
2312 
2313   if (json_parser_any_frame_has_value_before_type_url(p->top->any_frame)) {
2314     if (!parse(p->top->any_frame->parser, NULL,
2315                p->top->any_frame->before_type_url_start,
2316                p->top->any_frame->before_type_url_end -
2317                p->top->any_frame->before_type_url_start, NULL)) {
2318       return false;
2319     }
2320   } else {
2321     if (!is_well_known_packed) {
2322       if (!parse(p->top->any_frame->parser, NULL, "{", 1, NULL)) {
2323         return false;
2324       }
2325     }
2326   }
2327 
2328   if (json_parser_any_frame_has_value_before_type_url(p->top->any_frame) &&
2329       json_parser_any_frame_has_value_after_type_url(p->top->any_frame)) {
2330     if (!parse(p->top->any_frame->parser, NULL, ",", 1, NULL)) {
2331       return false;
2332     }
2333   }
2334 
2335   if (json_parser_any_frame_has_value_after_type_url(p->top->any_frame)) {
2336     if (!parse(p->top->any_frame->parser, NULL,
2337                p->top->any_frame->after_type_url_start,
2338                packed_end - p->top->any_frame->after_type_url_start, NULL)) {
2339       return false;
2340     }
2341   } else {
2342     if (!is_well_known_packed) {
2343       if (!parse(p->top->any_frame->parser, NULL, "}", 1, NULL)) {
2344         return false;
2345       }
2346     }
2347   }
2348 
2349   if (!end(p->top->any_frame->parser, NULL)) {
2350     return false;
2351   }
2352 
2353   p->top->is_any = false;
2354 
2355   /* Set value */
2356   start_member(p);
2357   capture_begin(p, value_membername);
2358   capture_end(p, value_membername + 5);
2359   end_membername(p);
2360 
2361   if (!check_stack(p)) return false;
2362   inner = p->top + 1;
2363 
2364   sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
2365   upb_sink_startstr(p->top->sink, sel, 0, &inner->sink);
2366   sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
2367   upb_sink_putstring(inner->sink, sel, p->top->any_frame->stringsink.ptr,
2368                      p->top->any_frame->stringsink.len, NULL);
2369   sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
2370   upb_sink_endstr(inner->sink, sel);
2371 
2372   end_member(p);
2373 
2374   end_object(p);
2375 
2376   /* Deallocate any parse frame. */
2377   json_parser_any_frame_free(p->top->any_frame);
2378 
2379   return true;
2380 }
2381 
is_string_wrapper(const upb_msgdef * m)2382 static bool is_string_wrapper(const upb_msgdef *m) {
2383   upb_wellknowntype_t type = upb_msgdef_wellknowntype(m);
2384   return type == UPB_WELLKNOWN_STRINGVALUE ||
2385          type == UPB_WELLKNOWN_BYTESVALUE;
2386 }
2387 
is_fieldmask(const upb_msgdef * m)2388 static bool is_fieldmask(const upb_msgdef *m) {
2389   upb_wellknowntype_t type = upb_msgdef_wellknowntype(m);
2390   return type == UPB_WELLKNOWN_FIELDMASK;
2391 }
2392 
start_fieldmask_object(upb_json_parser * p)2393 static void start_fieldmask_object(upb_json_parser *p) {
2394   const char *membername = "paths";
2395 
2396   start_object(p);
2397 
2398   /* Set up context for parsing value */
2399   start_member(p);
2400   capture_begin(p, membername);
2401   capture_end(p, membername + 5);
2402   end_membername(p);
2403 
2404   start_array(p);
2405 }
2406 
end_fieldmask_object(upb_json_parser * p)2407 static void end_fieldmask_object(upb_json_parser *p) {
2408   end_array(p);
2409   end_member(p);
2410   end_object(p);
2411 }
2412 
start_wrapper_object(upb_json_parser * p)2413 static void start_wrapper_object(upb_json_parser *p) {
2414   const char *membername = "value";
2415 
2416   start_object(p);
2417 
2418   /* Set up context for parsing value */
2419   start_member(p);
2420   capture_begin(p, membername);
2421   capture_end(p, membername + 5);
2422   end_membername(p);
2423 }
2424 
end_wrapper_object(upb_json_parser * p)2425 static void end_wrapper_object(upb_json_parser *p) {
2426   end_member(p);
2427   end_object(p);
2428 }
2429 
start_value_object(upb_json_parser * p,int value_type)2430 static void start_value_object(upb_json_parser *p, int value_type) {
2431   const char *nullmember = "null_value";
2432   const char *numbermember = "number_value";
2433   const char *stringmember = "string_value";
2434   const char *boolmember = "bool_value";
2435   const char *structmember = "struct_value";
2436   const char *listmember = "list_value";
2437   const char *membername = "";
2438 
2439   switch (value_type) {
2440     case VALUE_NULLVALUE:
2441       membername = nullmember;
2442       break;
2443     case VALUE_NUMBERVALUE:
2444       membername = numbermember;
2445       break;
2446     case VALUE_STRINGVALUE:
2447       membername = stringmember;
2448       break;
2449     case VALUE_BOOLVALUE:
2450       membername = boolmember;
2451       break;
2452     case VALUE_STRUCTVALUE:
2453       membername = structmember;
2454       break;
2455     case VALUE_LISTVALUE:
2456       membername = listmember;
2457       break;
2458   }
2459 
2460   start_object(p);
2461 
2462   /* Set up context for parsing value */
2463   start_member(p);
2464   capture_begin(p, membername);
2465   capture_end(p, membername + strlen(membername));
2466   end_membername(p);
2467 }
2468 
end_value_object(upb_json_parser * p)2469 static void end_value_object(upb_json_parser *p) {
2470   end_member(p);
2471   end_object(p);
2472 }
2473 
start_listvalue_object(upb_json_parser * p)2474 static void start_listvalue_object(upb_json_parser *p) {
2475   const char *membername = "values";
2476 
2477   start_object(p);
2478 
2479   /* Set up context for parsing value */
2480   start_member(p);
2481   capture_begin(p, membername);
2482   capture_end(p, membername + strlen(membername));
2483   end_membername(p);
2484 }
2485 
end_listvalue_object(upb_json_parser * p)2486 static void end_listvalue_object(upb_json_parser *p) {
2487   end_member(p);
2488   end_object(p);
2489 }
2490 
start_structvalue_object(upb_json_parser * p)2491 static void start_structvalue_object(upb_json_parser *p) {
2492   const char *membername = "fields";
2493 
2494   start_object(p);
2495 
2496   /* Set up context for parsing value */
2497   start_member(p);
2498   capture_begin(p, membername);
2499   capture_end(p, membername + strlen(membername));
2500   end_membername(p);
2501 }
2502 
end_structvalue_object(upb_json_parser * p)2503 static void end_structvalue_object(upb_json_parser *p) {
2504   end_member(p);
2505   end_object(p);
2506 }
2507 
is_top_level(upb_json_parser * p)2508 static bool is_top_level(upb_json_parser *p) {
2509   return p->top == p->stack && p->top->f == NULL && !p->top->is_unknown_field;
2510 }
2511 
is_wellknown_msg(upb_json_parser * p,upb_wellknowntype_t type)2512 static bool is_wellknown_msg(upb_json_parser *p, upb_wellknowntype_t type) {
2513   return p->top->m != NULL && upb_msgdef_wellknowntype(p->top->m) == type;
2514 }
2515 
is_wellknown_field(upb_json_parser * p,upb_wellknowntype_t type)2516 static bool is_wellknown_field(upb_json_parser *p, upb_wellknowntype_t type) {
2517   return p->top->f != NULL &&
2518          upb_fielddef_issubmsg(p->top->f) &&
2519          (upb_msgdef_wellknowntype(upb_fielddef_msgsubdef(p->top->f))
2520               == type);
2521 }
2522 
does_number_wrapper_start(upb_json_parser * p)2523 static bool does_number_wrapper_start(upb_json_parser *p) {
2524   return p->top->f != NULL &&
2525          upb_fielddef_issubmsg(p->top->f) &&
2526          upb_msgdef_isnumberwrapper(upb_fielddef_msgsubdef(p->top->f));
2527 }
2528 
does_number_wrapper_end(upb_json_parser * p)2529 static bool does_number_wrapper_end(upb_json_parser *p) {
2530   return p->top->m != NULL && upb_msgdef_isnumberwrapper(p->top->m);
2531 }
2532 
is_number_wrapper_object(upb_json_parser * p)2533 static bool is_number_wrapper_object(upb_json_parser *p) {
2534   return p->top->m != NULL && upb_msgdef_isnumberwrapper(p->top->m);
2535 }
2536 
does_string_wrapper_start(upb_json_parser * p)2537 static bool does_string_wrapper_start(upb_json_parser *p) {
2538   return p->top->f != NULL &&
2539          upb_fielddef_issubmsg(p->top->f) &&
2540          is_string_wrapper(upb_fielddef_msgsubdef(p->top->f));
2541 }
2542 
does_string_wrapper_end(upb_json_parser * p)2543 static bool does_string_wrapper_end(upb_json_parser *p) {
2544   return p->top->m != NULL && is_string_wrapper(p->top->m);
2545 }
2546 
is_string_wrapper_object(upb_json_parser * p)2547 static bool is_string_wrapper_object(upb_json_parser *p) {
2548   return p->top->m != NULL && is_string_wrapper(p->top->m);
2549 }
2550 
does_fieldmask_start(upb_json_parser * p)2551 static bool does_fieldmask_start(upb_json_parser *p) {
2552   return p->top->f != NULL &&
2553          upb_fielddef_issubmsg(p->top->f) &&
2554          is_fieldmask(upb_fielddef_msgsubdef(p->top->f));
2555 }
2556 
does_fieldmask_end(upb_json_parser * p)2557 static bool does_fieldmask_end(upb_json_parser *p) {
2558   return p->top->m != NULL && is_fieldmask(p->top->m);
2559 }
2560 
2561 #define CHECK_RETURN_TOP(x) if (!(x)) goto error
2562 
2563 
2564 /* The actual parser **********************************************************/
2565 
2566 /* What follows is the Ragel parser itself.  The language is specified in Ragel
2567  * and the actions call our C functions above.
2568  *
2569  * Ragel has an extensive set of functionality, and we use only a small part of
2570  * it.  There are many action types but we only use a few:
2571  *
2572  *   ">" -- transition into a machine
2573  *   "%" -- transition out of a machine
2574  *   "@" -- transition into a final state of a machine.
2575  *
2576  * "@" transitions are tricky because a machine can transition into a final
2577  * state repeatedly.  But in some cases we know this can't happen, for example
2578  * a string which is delimited by a final '"' can only transition into its
2579  * final state once, when the closing '"' is seen. */
2580 
2581 
2582 #line 2784 "upb/json/parser.rl"
2583 
2584 
2585 
2586 #line 2587 "upb/json/parser.c"
2587 static const char _json_actions[] = {
2588 	0, 1, 0, 1, 1, 1, 3, 1,
2589 	4, 1, 6, 1, 7, 1, 8, 1,
2590 	9, 1, 11, 1, 12, 1, 13, 1,
2591 	14, 1, 15, 1, 16, 1, 17, 1,
2592 	18, 1, 19, 1, 20, 1, 22, 1,
2593 	23, 1, 24, 1, 35, 1, 37, 1,
2594 	39, 1, 40, 1, 42, 1, 43, 1,
2595 	44, 1, 46, 1, 48, 1, 49, 1,
2596 	50, 1, 51, 1, 53, 1, 54, 2,
2597 	4, 9, 2, 5, 6, 2, 7, 3,
2598 	2, 7, 9, 2, 21, 26, 2, 25,
2599 	10, 2, 27, 28, 2, 29, 30, 2,
2600 	32, 34, 2, 33, 31, 2, 38, 36,
2601 	2, 40, 42, 2, 45, 2, 2, 46,
2602 	54, 2, 47, 36, 2, 49, 54, 2,
2603 	50, 54, 2, 51, 54, 2, 52, 41,
2604 	2, 53, 54, 3, 32, 34, 35, 4,
2605 	21, 26, 27, 28
2606 };
2607 
2608 static const short _json_key_offsets[] = {
2609 	0, 0, 12, 13, 18, 23, 28, 29,
2610 	30, 31, 32, 33, 34, 35, 36, 37,
2611 	38, 43, 44, 48, 53, 58, 63, 67,
2612 	71, 74, 77, 79, 83, 87, 89, 91,
2613 	96, 98, 100, 109, 115, 121, 127, 133,
2614 	135, 139, 142, 144, 146, 149, 150, 154,
2615 	156, 158, 160, 162, 163, 165, 167, 168,
2616 	170, 172, 173, 175, 177, 178, 180, 182,
2617 	183, 185, 187, 191, 193, 195, 196, 197,
2618 	198, 199, 201, 206, 208, 210, 212, 221,
2619 	222, 222, 222, 227, 232, 237, 238, 239,
2620 	240, 241, 241, 242, 243, 244, 244, 245,
2621 	246, 247, 247, 252, 253, 257, 262, 267,
2622 	272, 276, 276, 279, 282, 285, 288, 291,
2623 	294, 294, 294, 294, 294, 294
2624 };
2625 
2626 static const char _json_trans_keys[] = {
2627 	32, 34, 45, 91, 102, 110, 116, 123,
2628 	9, 13, 48, 57, 34, 32, 93, 125,
2629 	9, 13, 32, 44, 93, 9, 13, 32,
2630 	93, 125, 9, 13, 97, 108, 115, 101,
2631 	117, 108, 108, 114, 117, 101, 32, 34,
2632 	125, 9, 13, 34, 32, 58, 9, 13,
2633 	32, 93, 125, 9, 13, 32, 44, 125,
2634 	9, 13, 32, 44, 125, 9, 13, 32,
2635 	34, 9, 13, 45, 48, 49, 57, 48,
2636 	49, 57, 46, 69, 101, 48, 57, 69,
2637 	101, 48, 57, 43, 45, 48, 57, 48,
2638 	57, 48, 57, 46, 69, 101, 48, 57,
2639 	34, 92, 34, 92, 34, 47, 92, 98,
2640 	102, 110, 114, 116, 117, 48, 57, 65,
2641 	70, 97, 102, 48, 57, 65, 70, 97,
2642 	102, 48, 57, 65, 70, 97, 102, 48,
2643 	57, 65, 70, 97, 102, 34, 92, 45,
2644 	48, 49, 57, 48, 49, 57, 46, 115,
2645 	48, 57, 115, 48, 57, 34, 46, 115,
2646 	48, 57, 48, 57, 48, 57, 48, 57,
2647 	48, 57, 45, 48, 57, 48, 57, 45,
2648 	48, 57, 48, 57, 84, 48, 57, 48,
2649 	57, 58, 48, 57, 48, 57, 58, 48,
2650 	57, 48, 57, 43, 45, 46, 90, 48,
2651 	57, 48, 57, 58, 48, 48, 34, 48,
2652 	57, 43, 45, 90, 48, 57, 34, 44,
2653 	34, 44, 34, 44, 34, 45, 91, 102,
2654 	110, 116, 123, 48, 57, 34, 32, 93,
2655 	125, 9, 13, 32, 44, 93, 9, 13,
2656 	32, 93, 125, 9, 13, 97, 108, 115,
2657 	101, 117, 108, 108, 114, 117, 101, 32,
2658 	34, 125, 9, 13, 34, 32, 58, 9,
2659 	13, 32, 93, 125, 9, 13, 32, 44,
2660 	125, 9, 13, 32, 44, 125, 9, 13,
2661 	32, 34, 9, 13, 32, 9, 13, 32,
2662 	9, 13, 32, 9, 13, 32, 9, 13,
2663 	32, 9, 13, 32, 9, 13, 0
2664 };
2665 
2666 static const char _json_single_lengths[] = {
2667 	0, 8, 1, 3, 3, 3, 1, 1,
2668 	1, 1, 1, 1, 1, 1, 1, 1,
2669 	3, 1, 2, 3, 3, 3, 2, 2,
2670 	1, 3, 0, 2, 2, 0, 0, 3,
2671 	2, 2, 9, 0, 0, 0, 0, 2,
2672 	2, 1, 2, 0, 1, 1, 2, 0,
2673 	0, 0, 0, 1, 0, 0, 1, 0,
2674 	0, 1, 0, 0, 1, 0, 0, 1,
2675 	0, 0, 4, 0, 0, 1, 1, 1,
2676 	1, 0, 3, 2, 2, 2, 7, 1,
2677 	0, 0, 3, 3, 3, 1, 1, 1,
2678 	1, 0, 1, 1, 1, 0, 1, 1,
2679 	1, 0, 3, 1, 2, 3, 3, 3,
2680 	2, 0, 1, 1, 1, 1, 1, 1,
2681 	0, 0, 0, 0, 0, 0
2682 };
2683 
2684 static const char _json_range_lengths[] = {
2685 	0, 2, 0, 1, 1, 1, 0, 0,
2686 	0, 0, 0, 0, 0, 0, 0, 0,
2687 	1, 0, 1, 1, 1, 1, 1, 1,
2688 	1, 0, 1, 1, 1, 1, 1, 1,
2689 	0, 0, 0, 3, 3, 3, 3, 0,
2690 	1, 1, 0, 1, 1, 0, 1, 1,
2691 	1, 1, 1, 0, 1, 1, 0, 1,
2692 	1, 0, 1, 1, 0, 1, 1, 0,
2693 	1, 1, 0, 1, 1, 0, 0, 0,
2694 	0, 1, 1, 0, 0, 0, 1, 0,
2695 	0, 0, 1, 1, 1, 0, 0, 0,
2696 	0, 0, 0, 0, 0, 0, 0, 0,
2697 	0, 0, 1, 0, 1, 1, 1, 1,
2698 	1, 0, 1, 1, 1, 1, 1, 1,
2699 	0, 0, 0, 0, 0, 0
2700 };
2701 
2702 static const short _json_index_offsets[] = {
2703 	0, 0, 11, 13, 18, 23, 28, 30,
2704 	32, 34, 36, 38, 40, 42, 44, 46,
2705 	48, 53, 55, 59, 64, 69, 74, 78,
2706 	82, 85, 89, 91, 95, 99, 101, 103,
2707 	108, 111, 114, 124, 128, 132, 136, 140,
2708 	143, 147, 150, 153, 155, 158, 160, 164,
2709 	166, 168, 170, 172, 174, 176, 178, 180,
2710 	182, 184, 186, 188, 190, 192, 194, 196,
2711 	198, 200, 202, 207, 209, 211, 213, 215,
2712 	217, 219, 221, 226, 229, 232, 235, 244,
2713 	246, 247, 248, 253, 258, 263, 265, 267,
2714 	269, 271, 272, 274, 276, 278, 279, 281,
2715 	283, 285, 286, 291, 293, 297, 302, 307,
2716 	312, 316, 317, 320, 323, 326, 329, 332,
2717 	335, 336, 337, 338, 339, 340
2718 };
2719 
2720 static const unsigned char _json_indicies[] = {
2721 	0, 2, 3, 4, 5, 6, 7, 8,
2722 	0, 3, 1, 9, 1, 11, 12, 1,
2723 	11, 10, 13, 14, 12, 13, 1, 14,
2724 	1, 1, 14, 10, 15, 1, 16, 1,
2725 	17, 1, 18, 1, 19, 1, 20, 1,
2726 	21, 1, 22, 1, 23, 1, 24, 1,
2727 	25, 26, 27, 25, 1, 28, 1, 29,
2728 	30, 29, 1, 30, 1, 1, 30, 31,
2729 	32, 33, 34, 32, 1, 35, 36, 27,
2730 	35, 1, 36, 26, 36, 1, 37, 38,
2731 	39, 1, 38, 39, 1, 41, 42, 42,
2732 	40, 43, 1, 42, 42, 43, 40, 44,
2733 	44, 45, 1, 45, 1, 45, 40, 41,
2734 	42, 42, 39, 40, 47, 48, 46, 50,
2735 	51, 49, 52, 52, 52, 52, 52, 52,
2736 	52, 52, 53, 1, 54, 54, 54, 1,
2737 	55, 55, 55, 1, 56, 56, 56, 1,
2738 	57, 57, 57, 1, 59, 60, 58, 61,
2739 	62, 63, 1, 64, 65, 1, 66, 67,
2740 	1, 68, 1, 67, 68, 1, 69, 1,
2741 	66, 67, 65, 1, 70, 1, 71, 1,
2742 	72, 1, 73, 1, 74, 1, 75, 1,
2743 	76, 1, 77, 1, 78, 1, 79, 1,
2744 	80, 1, 81, 1, 82, 1, 83, 1,
2745 	84, 1, 85, 1, 86, 1, 87, 1,
2746 	88, 1, 89, 89, 90, 91, 1, 92,
2747 	1, 93, 1, 94, 1, 95, 1, 96,
2748 	1, 97, 1, 98, 1, 99, 99, 100,
2749 	98, 1, 102, 1, 101, 104, 105, 103,
2750 	1, 1, 101, 106, 107, 108, 109, 110,
2751 	111, 112, 107, 1, 113, 1, 114, 115,
2752 	117, 118, 1, 117, 116, 119, 120, 118,
2753 	119, 1, 120, 1, 1, 120, 116, 121,
2754 	1, 122, 1, 123, 1, 124, 1, 125,
2755 	126, 1, 127, 1, 128, 1, 129, 130,
2756 	1, 131, 1, 132, 1, 133, 134, 135,
2757 	136, 134, 1, 137, 1, 138, 139, 138,
2758 	1, 139, 1, 1, 139, 140, 141, 142,
2759 	143, 141, 1, 144, 145, 136, 144, 1,
2760 	145, 135, 145, 1, 146, 147, 147, 1,
2761 	148, 148, 1, 149, 149, 1, 150, 150,
2762 	1, 151, 151, 1, 152, 152, 1, 1,
2763 	1, 1, 1, 1, 1, 0
2764 };
2765 
2766 static const char _json_trans_targs[] = {
2767 	1, 0, 2, 107, 3, 6, 10, 13,
2768 	16, 106, 4, 3, 106, 4, 5, 7,
2769 	8, 9, 108, 11, 12, 109, 14, 15,
2770 	110, 16, 17, 111, 18, 18, 19, 20,
2771 	21, 22, 111, 21, 22, 24, 25, 31,
2772 	112, 26, 28, 27, 29, 30, 33, 113,
2773 	34, 33, 113, 34, 32, 35, 36, 37,
2774 	38, 39, 33, 113, 34, 41, 42, 46,
2775 	42, 46, 43, 45, 44, 114, 48, 49,
2776 	50, 51, 52, 53, 54, 55, 56, 57,
2777 	58, 59, 60, 61, 62, 63, 64, 65,
2778 	66, 67, 73, 72, 68, 69, 70, 71,
2779 	72, 115, 74, 67, 72, 76, 116, 76,
2780 	116, 77, 79, 81, 82, 85, 90, 94,
2781 	98, 80, 117, 117, 83, 82, 80, 83,
2782 	84, 86, 87, 88, 89, 117, 91, 92,
2783 	93, 117, 95, 96, 97, 117, 98, 99,
2784 	105, 100, 100, 101, 102, 103, 104, 105,
2785 	103, 104, 117, 106, 106, 106, 106, 106,
2786 	106
2787 };
2788 
2789 static const unsigned char _json_trans_actions[] = {
2790 	0, 0, 113, 107, 53, 0, 0, 0,
2791 	125, 59, 45, 0, 55, 0, 0, 0,
2792 	0, 0, 0, 0, 0, 0, 0, 0,
2793 	0, 0, 101, 51, 47, 0, 0, 45,
2794 	49, 49, 104, 0, 0, 0, 0, 0,
2795 	3, 0, 0, 0, 0, 0, 5, 15,
2796 	0, 0, 71, 7, 13, 0, 74, 9,
2797 	9, 9, 77, 80, 11, 37, 37, 37,
2798 	0, 0, 0, 39, 0, 41, 86, 0,
2799 	0, 0, 17, 19, 0, 21, 23, 0,
2800 	25, 27, 0, 29, 31, 0, 33, 35,
2801 	0, 135, 83, 135, 0, 0, 0, 0,
2802 	0, 92, 0, 89, 89, 98, 43, 0,
2803 	131, 95, 113, 107, 53, 0, 0, 0,
2804 	125, 59, 69, 110, 45, 0, 55, 0,
2805 	0, 0, 0, 0, 0, 119, 0, 0,
2806 	0, 122, 0, 0, 0, 116, 0, 101,
2807 	51, 47, 0, 0, 45, 49, 49, 104,
2808 	0, 0, 128, 0, 57, 63, 65, 61,
2809 	67
2810 };
2811 
2812 static const unsigned char _json_eof_actions[] = {
2813 	0, 0, 0, 0, 0, 0, 0, 0,
2814 	0, 0, 0, 0, 0, 0, 0, 0,
2815 	0, 0, 0, 0, 0, 0, 0, 0,
2816 	0, 1, 0, 1, 0, 0, 1, 1,
2817 	0, 0, 0, 0, 0, 0, 0, 0,
2818 	0, 0, 0, 0, 0, 0, 0, 0,
2819 	0, 0, 0, 0, 0, 0, 0, 0,
2820 	0, 0, 0, 0, 0, 0, 0, 0,
2821 	0, 0, 0, 0, 0, 0, 0, 0,
2822 	0, 0, 0, 0, 0, 0, 0, 0,
2823 	0, 0, 0, 0, 0, 0, 0, 0,
2824 	0, 0, 0, 0, 0, 0, 0, 0,
2825 	0, 0, 0, 0, 0, 0, 0, 0,
2826 	0, 0, 0, 57, 63, 65, 61, 67,
2827 	0, 0, 0, 0, 0, 0
2828 };
2829 
2830 static const int json_start = 1;
2831 
2832 static const int json_en_number_machine = 23;
2833 static const int json_en_string_machine = 32;
2834 static const int json_en_duration_machine = 40;
2835 static const int json_en_timestamp_machine = 47;
2836 static const int json_en_fieldmask_machine = 75;
2837 static const int json_en_value_machine = 78;
2838 static const int json_en_main = 1;
2839 
2840 
2841 #line 2787 "upb/json/parser.rl"
2842 
parse(void * closure,const void * hd,const char * buf,size_t size,const upb_bufhandle * handle)2843 size_t parse(void *closure, const void *hd, const char *buf, size_t size,
2844              const upb_bufhandle *handle) {
2845   upb_json_parser *parser = closure;
2846 
2847   /* Variables used by Ragel's generated code. */
2848   int cs = parser->current_state;
2849   int *stack = parser->parser_stack;
2850   int top = parser->parser_top;
2851 
2852   const char *p = buf;
2853   const char *pe = buf + size;
2854   const char *eof = &eof_ch;
2855 
2856   parser->handle = handle;
2857 
2858   UPB_UNUSED(hd);
2859   UPB_UNUSED(handle);
2860 
2861   capture_resume(parser, buf);
2862 
2863 
2864 #line 2865 "upb/json/parser.c"
2865 	{
2866 	int _klen;
2867 	unsigned int _trans;
2868 	const char *_acts;
2869 	unsigned int _nacts;
2870 	const char *_keys;
2871 
2872 	if ( p == pe )
2873 		goto _test_eof;
2874 	if ( cs == 0 )
2875 		goto _out;
2876 _resume:
2877 	_keys = _json_trans_keys + _json_key_offsets[cs];
2878 	_trans = _json_index_offsets[cs];
2879 
2880 	_klen = _json_single_lengths[cs];
2881 	if ( _klen > 0 ) {
2882 		const char *_lower = _keys;
2883 		const char *_mid;
2884 		const char *_upper = _keys + _klen - 1;
2885 		while (1) {
2886 			if ( _upper < _lower )
2887 				break;
2888 
2889 			_mid = _lower + ((_upper-_lower) >> 1);
2890 			if ( (*p) < *_mid )
2891 				_upper = _mid - 1;
2892 			else if ( (*p) > *_mid )
2893 				_lower = _mid + 1;
2894 			else {
2895 				_trans += (unsigned int)(_mid - _keys);
2896 				goto _match;
2897 			}
2898 		}
2899 		_keys += _klen;
2900 		_trans += _klen;
2901 	}
2902 
2903 	_klen = _json_range_lengths[cs];
2904 	if ( _klen > 0 ) {
2905 		const char *_lower = _keys;
2906 		const char *_mid;
2907 		const char *_upper = _keys + (_klen<<1) - 2;
2908 		while (1) {
2909 			if ( _upper < _lower )
2910 				break;
2911 
2912 			_mid = _lower + (((_upper-_lower) >> 1) & ~1);
2913 			if ( (*p) < _mid[0] )
2914 				_upper = _mid - 2;
2915 			else if ( (*p) > _mid[1] )
2916 				_lower = _mid + 2;
2917 			else {
2918 				_trans += (unsigned int)((_mid - _keys)>>1);
2919 				goto _match;
2920 			}
2921 		}
2922 		_trans += _klen;
2923 	}
2924 
2925 _match:
2926 	_trans = _json_indicies[_trans];
2927 	cs = _json_trans_targs[_trans];
2928 
2929 	if ( _json_trans_actions[_trans] == 0 )
2930 		goto _again;
2931 
2932 	_acts = _json_actions + _json_trans_actions[_trans];
2933 	_nacts = (unsigned int) *_acts++;
2934 	while ( _nacts-- > 0 )
2935 	{
2936 		switch ( *_acts++ )
2937 		{
2938 	case 1:
2939 #line 2592 "upb/json/parser.rl"
2940 	{ p--; {cs = stack[--top]; goto _again;} }
2941 	break;
2942 	case 2:
2943 #line 2594 "upb/json/parser.rl"
2944 	{ p--; {stack[top++] = cs; cs = 23;goto _again;} }
2945 	break;
2946 	case 3:
2947 #line 2598 "upb/json/parser.rl"
2948 	{ start_text(parser, p); }
2949 	break;
2950 	case 4:
2951 #line 2599 "upb/json/parser.rl"
2952 	{ CHECK_RETURN_TOP(end_text(parser, p)); }
2953 	break;
2954 	case 5:
2955 #line 2605 "upb/json/parser.rl"
2956 	{ start_hex(parser); }
2957 	break;
2958 	case 6:
2959 #line 2606 "upb/json/parser.rl"
2960 	{ hexdigit(parser, p); }
2961 	break;
2962 	case 7:
2963 #line 2607 "upb/json/parser.rl"
2964 	{ CHECK_RETURN_TOP(end_hex(parser)); }
2965 	break;
2966 	case 8:
2967 #line 2613 "upb/json/parser.rl"
2968 	{ CHECK_RETURN_TOP(escape(parser, p)); }
2969 	break;
2970 	case 9:
2971 #line 2619 "upb/json/parser.rl"
2972 	{ p--; {cs = stack[--top]; goto _again;} }
2973 	break;
2974 	case 10:
2975 #line 2624 "upb/json/parser.rl"
2976 	{ start_year(parser, p); }
2977 	break;
2978 	case 11:
2979 #line 2625 "upb/json/parser.rl"
2980 	{ CHECK_RETURN_TOP(end_year(parser, p)); }
2981 	break;
2982 	case 12:
2983 #line 2629 "upb/json/parser.rl"
2984 	{ start_month(parser, p); }
2985 	break;
2986 	case 13:
2987 #line 2630 "upb/json/parser.rl"
2988 	{ CHECK_RETURN_TOP(end_month(parser, p)); }
2989 	break;
2990 	case 14:
2991 #line 2634 "upb/json/parser.rl"
2992 	{ start_day(parser, p); }
2993 	break;
2994 	case 15:
2995 #line 2635 "upb/json/parser.rl"
2996 	{ CHECK_RETURN_TOP(end_day(parser, p)); }
2997 	break;
2998 	case 16:
2999 #line 2639 "upb/json/parser.rl"
3000 	{ start_hour(parser, p); }
3001 	break;
3002 	case 17:
3003 #line 2640 "upb/json/parser.rl"
3004 	{ CHECK_RETURN_TOP(end_hour(parser, p)); }
3005 	break;
3006 	case 18:
3007 #line 2644 "upb/json/parser.rl"
3008 	{ start_minute(parser, p); }
3009 	break;
3010 	case 19:
3011 #line 2645 "upb/json/parser.rl"
3012 	{ CHECK_RETURN_TOP(end_minute(parser, p)); }
3013 	break;
3014 	case 20:
3015 #line 2649 "upb/json/parser.rl"
3016 	{ start_second(parser, p); }
3017 	break;
3018 	case 21:
3019 #line 2650 "upb/json/parser.rl"
3020 	{ CHECK_RETURN_TOP(end_second(parser, p)); }
3021 	break;
3022 	case 22:
3023 #line 2655 "upb/json/parser.rl"
3024 	{ start_duration_base(parser, p); }
3025 	break;
3026 	case 23:
3027 #line 2656 "upb/json/parser.rl"
3028 	{ CHECK_RETURN_TOP(end_duration_base(parser, p)); }
3029 	break;
3030 	case 24:
3031 #line 2658 "upb/json/parser.rl"
3032 	{ p--; {cs = stack[--top]; goto _again;} }
3033 	break;
3034 	case 25:
3035 #line 2663 "upb/json/parser.rl"
3036 	{ start_timestamp_base(parser); }
3037 	break;
3038 	case 26:
3039 #line 2665 "upb/json/parser.rl"
3040 	{ start_timestamp_fraction(parser, p); }
3041 	break;
3042 	case 27:
3043 #line 2666 "upb/json/parser.rl"
3044 	{ CHECK_RETURN_TOP(end_timestamp_fraction(parser, p)); }
3045 	break;
3046 	case 28:
3047 #line 2668 "upb/json/parser.rl"
3048 	{ start_timestamp_zone(parser, p); }
3049 	break;
3050 	case 29:
3051 #line 2669 "upb/json/parser.rl"
3052 	{ CHECK_RETURN_TOP(end_timestamp_zone(parser, p)); }
3053 	break;
3054 	case 30:
3055 #line 2671 "upb/json/parser.rl"
3056 	{ p--; {cs = stack[--top]; goto _again;} }
3057 	break;
3058 	case 31:
3059 #line 2676 "upb/json/parser.rl"
3060 	{ start_fieldmask_path_text(parser, p); }
3061 	break;
3062 	case 32:
3063 #line 2677 "upb/json/parser.rl"
3064 	{ end_fieldmask_path_text(parser, p); }
3065 	break;
3066 	case 33:
3067 #line 2682 "upb/json/parser.rl"
3068 	{ start_fieldmask_path(parser); }
3069 	break;
3070 	case 34:
3071 #line 2683 "upb/json/parser.rl"
3072 	{ end_fieldmask_path(parser); }
3073 	break;
3074 	case 35:
3075 #line 2689 "upb/json/parser.rl"
3076 	{ p--; {cs = stack[--top]; goto _again;} }
3077 	break;
3078 	case 36:
3079 #line 2694 "upb/json/parser.rl"
3080 	{
3081         if (is_wellknown_msg(parser, UPB_WELLKNOWN_TIMESTAMP)) {
3082           {stack[top++] = cs; cs = 47;goto _again;}
3083         } else if (is_wellknown_msg(parser, UPB_WELLKNOWN_DURATION)) {
3084           {stack[top++] = cs; cs = 40;goto _again;}
3085         } else if (is_wellknown_msg(parser, UPB_WELLKNOWN_FIELDMASK)) {
3086           {stack[top++] = cs; cs = 75;goto _again;}
3087         } else {
3088           {stack[top++] = cs; cs = 32;goto _again;}
3089         }
3090       }
3091 	break;
3092 	case 37:
3093 #line 2707 "upb/json/parser.rl"
3094 	{ p--; {stack[top++] = cs; cs = 78;goto _again;} }
3095 	break;
3096 	case 38:
3097 #line 2712 "upb/json/parser.rl"
3098 	{
3099         if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) {
3100           start_any_member(parser, p);
3101         } else {
3102           start_member(parser);
3103         }
3104       }
3105 	break;
3106 	case 39:
3107 #line 2719 "upb/json/parser.rl"
3108 	{ CHECK_RETURN_TOP(end_membername(parser)); }
3109 	break;
3110 	case 40:
3111 #line 2722 "upb/json/parser.rl"
3112 	{
3113         if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) {
3114           end_any_member(parser, p);
3115         } else {
3116           end_member(parser);
3117         }
3118       }
3119 	break;
3120 	case 41:
3121 #line 2733 "upb/json/parser.rl"
3122 	{
3123         if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) {
3124           start_any_object(parser, p);
3125         } else {
3126           start_object(parser);
3127         }
3128       }
3129 	break;
3130 	case 42:
3131 #line 2742 "upb/json/parser.rl"
3132 	{
3133         if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) {
3134           CHECK_RETURN_TOP(end_any_object(parser, p));
3135         } else {
3136           end_object(parser);
3137         }
3138       }
3139 	break;
3140 	case 43:
3141 #line 2754 "upb/json/parser.rl"
3142 	{ CHECK_RETURN_TOP(start_array(parser)); }
3143 	break;
3144 	case 44:
3145 #line 2758 "upb/json/parser.rl"
3146 	{ end_array(parser); }
3147 	break;
3148 	case 45:
3149 #line 2763 "upb/json/parser.rl"
3150 	{ CHECK_RETURN_TOP(start_number(parser, p)); }
3151 	break;
3152 	case 46:
3153 #line 2764 "upb/json/parser.rl"
3154 	{ CHECK_RETURN_TOP(end_number(parser, p)); }
3155 	break;
3156 	case 47:
3157 #line 2766 "upb/json/parser.rl"
3158 	{ CHECK_RETURN_TOP(start_stringval(parser)); }
3159 	break;
3160 	case 48:
3161 #line 2767 "upb/json/parser.rl"
3162 	{ CHECK_RETURN_TOP(end_stringval(parser)); }
3163 	break;
3164 	case 49:
3165 #line 2769 "upb/json/parser.rl"
3166 	{ CHECK_RETURN_TOP(end_bool(parser, true)); }
3167 	break;
3168 	case 50:
3169 #line 2771 "upb/json/parser.rl"
3170 	{ CHECK_RETURN_TOP(end_bool(parser, false)); }
3171 	break;
3172 	case 51:
3173 #line 2773 "upb/json/parser.rl"
3174 	{ CHECK_RETURN_TOP(end_null(parser)); }
3175 	break;
3176 	case 52:
3177 #line 2775 "upb/json/parser.rl"
3178 	{ CHECK_RETURN_TOP(start_subobject_full(parser)); }
3179 	break;
3180 	case 53:
3181 #line 2776 "upb/json/parser.rl"
3182 	{ end_subobject_full(parser); }
3183 	break;
3184 	case 54:
3185 #line 2781 "upb/json/parser.rl"
3186 	{ p--; {cs = stack[--top]; goto _again;} }
3187 	break;
3188 #line 3189 "upb/json/parser.c"
3189 		}
3190 	}
3191 
3192 _again:
3193 	if ( cs == 0 )
3194 		goto _out;
3195 	if ( ++p != pe )
3196 		goto _resume;
3197 	_test_eof: {}
3198 	if ( p == eof )
3199 	{
3200 	const char *__acts = _json_actions + _json_eof_actions[cs];
3201 	unsigned int __nacts = (unsigned int) *__acts++;
3202 	while ( __nacts-- > 0 ) {
3203 		switch ( *__acts++ ) {
3204 	case 0:
3205 #line 2590 "upb/json/parser.rl"
3206 	{ p--; {cs = stack[--top]; 	if ( p == pe )
3207 		goto _test_eof;
3208 goto _again;} }
3209 	break;
3210 	case 46:
3211 #line 2764 "upb/json/parser.rl"
3212 	{ CHECK_RETURN_TOP(end_number(parser, p)); }
3213 	break;
3214 	case 49:
3215 #line 2769 "upb/json/parser.rl"
3216 	{ CHECK_RETURN_TOP(end_bool(parser, true)); }
3217 	break;
3218 	case 50:
3219 #line 2771 "upb/json/parser.rl"
3220 	{ CHECK_RETURN_TOP(end_bool(parser, false)); }
3221 	break;
3222 	case 51:
3223 #line 2773 "upb/json/parser.rl"
3224 	{ CHECK_RETURN_TOP(end_null(parser)); }
3225 	break;
3226 	case 53:
3227 #line 2776 "upb/json/parser.rl"
3228 	{ end_subobject_full(parser); }
3229 	break;
3230 #line 3231 "upb/json/parser.c"
3231 		}
3232 	}
3233 	}
3234 
3235 	_out: {}
3236 	}
3237 
3238 #line 2809 "upb/json/parser.rl"
3239 
3240   if (p != pe) {
3241     upb_status_seterrf(parser->status, "Parse error at '%.*s'\n", pe - p, p);
3242   } else {
3243     capture_suspend(parser, &p);
3244   }
3245 
3246 error:
3247   /* Save parsing state back to parser. */
3248   parser->current_state = cs;
3249   parser->parser_top = top;
3250 
3251   return p - buf;
3252 }
3253 
end(void * closure,const void * hd)3254 static bool end(void *closure, const void *hd) {
3255   upb_json_parser *parser = closure;
3256 
3257   /* Prevent compile warning on unused static constants. */
3258   UPB_UNUSED(json_start);
3259   UPB_UNUSED(json_en_duration_machine);
3260   UPB_UNUSED(json_en_fieldmask_machine);
3261   UPB_UNUSED(json_en_number_machine);
3262   UPB_UNUSED(json_en_string_machine);
3263   UPB_UNUSED(json_en_timestamp_machine);
3264   UPB_UNUSED(json_en_value_machine);
3265   UPB_UNUSED(json_en_main);
3266 
3267   parse(parser, hd, &eof_ch, 0, NULL);
3268 
3269   return parser->current_state >= 106;
3270 }
3271 
json_parser_reset(upb_json_parser * p)3272 static void json_parser_reset(upb_json_parser *p) {
3273   int cs;
3274   int top;
3275 
3276   p->top = p->stack;
3277   init_frame(p->top);
3278 
3279   /* Emit Ragel initialization of the parser. */
3280 
3281 #line 3282 "upb/json/parser.c"
3282 	{
3283 	cs = json_start;
3284 	top = 0;
3285 	}
3286 
3287 #line 2851 "upb/json/parser.rl"
3288   p->current_state = cs;
3289   p->parser_top = top;
3290   accumulate_clear(p);
3291   p->multipart_state = MULTIPART_INACTIVE;
3292   p->capture = NULL;
3293   p->accumulated = NULL;
3294 }
3295 
parsermethod_new(upb_json_codecache * c,const upb_msgdef * md)3296 static upb_json_parsermethod *parsermethod_new(upb_json_codecache *c,
3297                                                const upb_msgdef *md) {
3298   int i, n;
3299   upb_alloc *alloc = upb_arena_alloc(c->arena);
3300 
3301   upb_json_parsermethod *m = upb_malloc(alloc, sizeof(*m));
3302 
3303   m->cache = c;
3304 
3305   upb_byteshandler_init(&m->input_handler_);
3306   upb_byteshandler_setstring(&m->input_handler_, parse, m);
3307   upb_byteshandler_setendstr(&m->input_handler_, end, m);
3308 
3309   upb_strtable_init2(&m->name_table, UPB_CTYPE_CONSTPTR, 4, alloc);
3310 
3311   /* Build name_table */
3312 
3313   n = upb_msgdef_fieldcount(md);
3314   for(i = 0; i < n; i++) {
3315     const upb_fielddef *f = upb_msgdef_field(md, i);
3316     upb_value v = upb_value_constptr(f);
3317     const char *name;
3318 
3319     /* Add an entry for the JSON name. */
3320     name = upb_fielddef_jsonname(f);
3321     upb_strtable_insert3(&m->name_table, name, strlen(name), v, alloc);
3322 
3323     if (strcmp(name, upb_fielddef_name(f)) != 0) {
3324       /* Since the JSON name is different from the regular field name, add an
3325        * entry for the raw name (compliant proto3 JSON parsers must accept
3326        * both). */
3327       const char *name = upb_fielddef_name(f);
3328       upb_strtable_insert3(&m->name_table, name, strlen(name), v, alloc);
3329     }
3330   }
3331 
3332   return m;
3333 }
3334 
3335 /* Public API *****************************************************************/
3336 
upb_json_parser_create(upb_arena * arena,const upb_json_parsermethod * method,const upb_symtab * symtab,upb_sink output,upb_status * status,bool ignore_json_unknown)3337 upb_json_parser *upb_json_parser_create(upb_arena *arena,
3338                                         const upb_json_parsermethod *method,
3339                                         const upb_symtab* symtab,
3340                                         upb_sink output,
3341                                         upb_status *status,
3342                                         bool ignore_json_unknown) {
3343   upb_json_parser *p = upb_arena_malloc(arena, sizeof(upb_json_parser));
3344   if (!p) return false;
3345 
3346   p->arena = arena;
3347   p->method = method;
3348   p->status = status;
3349   p->limit = p->stack + UPB_JSON_MAX_DEPTH;
3350   p->accumulate_buf = NULL;
3351   p->accumulate_buf_size = 0;
3352   upb_bytessink_reset(&p->input_, &method->input_handler_, p);
3353 
3354   json_parser_reset(p);
3355   p->top->sink = output;
3356   p->top->m = upb_handlers_msgdef(output.handlers);
3357   if (is_wellknown_msg(p, UPB_WELLKNOWN_ANY)) {
3358     p->top->is_any = true;
3359     p->top->any_frame = json_parser_any_frame_new(p);
3360   } else {
3361     p->top->is_any = false;
3362     p->top->any_frame = NULL;
3363   }
3364   set_name_table(p, p->top);
3365   p->symtab = symtab;
3366 
3367   p->ignore_json_unknown = ignore_json_unknown;
3368 
3369   return p;
3370 }
3371 
upb_json_parser_input(upb_json_parser * p)3372 upb_bytessink upb_json_parser_input(upb_json_parser *p) {
3373   return p->input_;
3374 }
3375 
upb_json_parsermethod_inputhandler(const upb_json_parsermethod * m)3376 const upb_byteshandler *upb_json_parsermethod_inputhandler(
3377     const upb_json_parsermethod *m) {
3378   return &m->input_handler_;
3379 }
3380 
upb_json_codecache_new(void)3381 upb_json_codecache *upb_json_codecache_new(void) {
3382   upb_alloc *alloc;
3383   upb_json_codecache *c;
3384 
3385   c = upb_gmalloc(sizeof(*c));
3386 
3387   c->arena = upb_arena_new();
3388   alloc = upb_arena_alloc(c->arena);
3389 
3390   upb_inttable_init2(&c->methods, UPB_CTYPE_CONSTPTR, alloc);
3391 
3392   return c;
3393 }
3394 
upb_json_codecache_free(upb_json_codecache * c)3395 void upb_json_codecache_free(upb_json_codecache *c) {
3396   upb_arena_free(c->arena);
3397   upb_gfree(c);
3398 }
3399 
upb_json_codecache_get(upb_json_codecache * c,const upb_msgdef * md)3400 const upb_json_parsermethod *upb_json_codecache_get(upb_json_codecache *c,
3401                                                     const upb_msgdef *md) {
3402   upb_json_parsermethod *m;
3403   upb_value v;
3404   int i, n;
3405   upb_alloc *alloc = upb_arena_alloc(c->arena);
3406 
3407   if (upb_inttable_lookupptr(&c->methods, md, &v)) {
3408     return upb_value_getconstptr(v);
3409   }
3410 
3411   m = parsermethod_new(c, md);
3412   v = upb_value_constptr(m);
3413 
3414   if (!m) return NULL;
3415   if (!upb_inttable_insertptr2(&c->methods, md, v, alloc)) return NULL;
3416 
3417   /* Populate parser methods for all submessages, so the name tables will
3418    * be available during parsing. */
3419   n = upb_msgdef_fieldcount(md);
3420   for(i = 0; i < n; i++) {
3421     const upb_fielddef *f = upb_msgdef_field(md, i);
3422 
3423     if (upb_fielddef_issubmsg(f)) {
3424       const upb_msgdef *subdef = upb_fielddef_msgsubdef(f);
3425       const upb_json_parsermethod *sub_method =
3426           upb_json_codecache_get(c, subdef);
3427 
3428       if (!sub_method) return NULL;
3429     }
3430   }
3431 
3432   return m;
3433 }
3434