1 // Amalgamated source file
2 #include "upb.h"
3 
4 
5 #include <ctype.h>
6 #include <stdlib.h>
7 #include <string.h>
8 
9 typedef struct {
10   size_t len;
11   char str[1];  /* Null-terminated string data follows. */
12 } str_t;
13 
newstr(const char * data,size_t len)14 static str_t *newstr(const char *data, size_t len) {
15   str_t *ret = upb_gmalloc(sizeof(*ret) + len);
16   if (!ret) return NULL;
17   ret->len = len;
18   memcpy(ret->str, data, len);
19   ret->str[len] = '\0';
20   return ret;
21 }
22 
freestr(str_t * s)23 static void freestr(str_t *s) { upb_gfree(s); }
24 
25 /* isalpha() etc. from <ctype.h> are locale-dependent, which we don't want. */
upb_isbetween(char c,char low,char high)26 static bool upb_isbetween(char c, char low, char high) {
27   return c >= low && c <= high;
28 }
29 
upb_isletter(char c)30 static bool upb_isletter(char c) {
31   return upb_isbetween(c, 'A', 'Z') || upb_isbetween(c, 'a', 'z') || c == '_';
32 }
33 
upb_isalphanum(char c)34 static bool upb_isalphanum(char c) {
35   return upb_isletter(c) || upb_isbetween(c, '0', '9');
36 }
37 
upb_isident(const char * str,size_t len,bool full,upb_status * s)38 static bool upb_isident(const char *str, size_t len, bool full, upb_status *s) {
39   bool start = true;
40   size_t i;
41   for (i = 0; i < len; i++) {
42     char c = str[i];
43     if (c == '.') {
44       if (start || !full) {
45         upb_status_seterrf(s, "invalid name: unexpected '.' (%s)", str);
46         return false;
47       }
48       start = true;
49     } else if (start) {
50       if (!upb_isletter(c)) {
51         upb_status_seterrf(
52             s, "invalid name: path components must start with a letter (%s)",
53             str);
54         return false;
55       }
56       start = false;
57     } else {
58       if (!upb_isalphanum(c)) {
59         upb_status_seterrf(s, "invalid name: non-alphanumeric character (%s)",
60                            str);
61         return false;
62       }
63     }
64   }
65   return !start;
66 }
67 
upb_isoneof(const upb_refcounted * def)68 static bool upb_isoneof(const upb_refcounted *def) {
69   return def->vtbl == &upb_oneofdef_vtbl;
70 }
71 
upb_isfield(const upb_refcounted * def)72 static bool upb_isfield(const upb_refcounted *def) {
73   return def->vtbl == &upb_fielddef_vtbl;
74 }
75 
upb_trygetoneof(const upb_refcounted * def)76 static const upb_oneofdef *upb_trygetoneof(const upb_refcounted *def) {
77   return upb_isoneof(def) ? (const upb_oneofdef*)def : NULL;
78 }
79 
upb_trygetfield(const upb_refcounted * def)80 static const upb_fielddef *upb_trygetfield(const upb_refcounted *def) {
81   return upb_isfield(def) ? (const upb_fielddef*)def : NULL;
82 }
83 
84 
85 /* upb_def ********************************************************************/
86 
upb_def_type(const upb_def * d)87 upb_deftype_t upb_def_type(const upb_def *d) { return d->type; }
88 
upb_def_fullname(const upb_def * d)89 const char *upb_def_fullname(const upb_def *d) { return d->fullname; }
90 
upb_def_name(const upb_def * d)91 const char *upb_def_name(const upb_def *d) {
92   const char *p;
93 
94   if (d->fullname == NULL) {
95     return NULL;
96   } else if ((p = strrchr(d->fullname, '.')) == NULL) {
97     /* No '.' in the name, return the full string. */
98     return d->fullname;
99   } else {
100     /* Return one past the last '.'. */
101     return p + 1;
102   }
103 }
104 
upb_def_setfullname(upb_def * def,const char * fullname,upb_status * s)105 bool upb_def_setfullname(upb_def *def, const char *fullname, upb_status *s) {
106   assert(!upb_def_isfrozen(def));
107   if (!upb_isident(fullname, strlen(fullname), true, s)) {
108     return false;
109   }
110 
111   fullname = upb_gstrdup(fullname);
112   if (!fullname) {
113     upb_upberr_setoom(s);
114     return false;
115   }
116 
117   upb_gfree((void*)def->fullname);
118   def->fullname = fullname;
119   return true;
120 }
121 
upb_def_file(const upb_def * d)122 const upb_filedef *upb_def_file(const upb_def *d) { return d->file; }
123 
upb_def_dup(const upb_def * def,const void * o)124 upb_def *upb_def_dup(const upb_def *def, const void *o) {
125   switch (def->type) {
126     case UPB_DEF_MSG:
127       return upb_msgdef_upcast_mutable(
128           upb_msgdef_dup(upb_downcast_msgdef(def), o));
129     case UPB_DEF_FIELD:
130       return upb_fielddef_upcast_mutable(
131           upb_fielddef_dup(upb_downcast_fielddef(def), o));
132     case UPB_DEF_ENUM:
133       return upb_enumdef_upcast_mutable(
134           upb_enumdef_dup(upb_downcast_enumdef(def), o));
135     default: assert(false); return NULL;
136   }
137 }
138 
upb_def_init(upb_def * def,upb_deftype_t type,const struct upb_refcounted_vtbl * vtbl,const void * owner)139 static bool upb_def_init(upb_def *def, upb_deftype_t type,
140                          const struct upb_refcounted_vtbl *vtbl,
141                          const void *owner) {
142   if (!upb_refcounted_init(upb_def_upcast_mutable(def), vtbl, owner)) return false;
143   def->type = type;
144   def->fullname = NULL;
145   def->came_from_user = false;
146   def->file = NULL;
147   return true;
148 }
149 
upb_def_uninit(upb_def * def)150 static void upb_def_uninit(upb_def *def) {
151   upb_gfree((void*)def->fullname);
152 }
153 
msgdef_name(const upb_msgdef * m)154 static const char *msgdef_name(const upb_msgdef *m) {
155   const char *name = upb_def_fullname(upb_msgdef_upcast(m));
156   return name ? name : "(anonymous)";
157 }
158 
upb_validate_field(upb_fielddef * f,upb_status * s)159 static bool upb_validate_field(upb_fielddef *f, upb_status *s) {
160   if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) {
161     upb_status_seterrmsg(s, "fielddef must have name and number set");
162     return false;
163   }
164 
165   if (!f->type_is_set_) {
166     upb_status_seterrmsg(s, "fielddef type was not initialized");
167     return false;
168   }
169 
170   if (upb_fielddef_lazy(f) &&
171       upb_fielddef_descriptortype(f) != UPB_DESCRIPTOR_TYPE_MESSAGE) {
172     upb_status_seterrmsg(s,
173                          "only length-delimited submessage fields may be lazy");
174     return false;
175   }
176 
177   if (upb_fielddef_hassubdef(f)) {
178     const upb_def *subdef;
179 
180     if (f->subdef_is_symbolic) {
181       upb_status_seterrf(s, "field '%s.%s' has not been resolved",
182                          msgdef_name(f->msg.def), upb_fielddef_name(f));
183       return false;
184     }
185 
186     subdef = upb_fielddef_subdef(f);
187     if (subdef == NULL) {
188       upb_status_seterrf(s, "field %s.%s is missing required subdef",
189                          msgdef_name(f->msg.def), upb_fielddef_name(f));
190       return false;
191     }
192 
193     if (!upb_def_isfrozen(subdef) && !subdef->came_from_user) {
194       upb_status_seterrf(s,
195                          "subdef of field %s.%s is not frozen or being frozen",
196                          msgdef_name(f->msg.def), upb_fielddef_name(f));
197       return false;
198     }
199   }
200 
201   if (upb_fielddef_type(f) == UPB_TYPE_ENUM) {
202     bool has_default_name = upb_fielddef_enumhasdefaultstr(f);
203     bool has_default_number = upb_fielddef_enumhasdefaultint32(f);
204 
205     /* Previously verified by upb_validate_enumdef(). */
206     assert(upb_enumdef_numvals(upb_fielddef_enumsubdef(f)) > 0);
207 
208     /* We've already validated that we have an associated enumdef and that it
209      * has at least one member, so at least one of these should be true.
210      * Because if the user didn't set anything, we'll pick up the enum's
211      * default, but if the user *did* set something we should at least pick up
212      * the one they set (int32 or string). */
213     assert(has_default_name || has_default_number);
214 
215     if (!has_default_name) {
216       upb_status_seterrf(s,
217                          "enum default for field %s.%s (%d) is not in the enum",
218                          msgdef_name(f->msg.def), upb_fielddef_name(f),
219                          upb_fielddef_defaultint32(f));
220       return false;
221     }
222 
223     if (!has_default_number) {
224       upb_status_seterrf(s,
225                          "enum default for field %s.%s (%s) is not in the enum",
226                          msgdef_name(f->msg.def), upb_fielddef_name(f),
227                          upb_fielddef_defaultstr(f, NULL));
228       return false;
229     }
230 
231     /* Lift the effective numeric default into the field's default slot, in case
232      * we were only getting it "by reference" from the enumdef. */
233     upb_fielddef_setdefaultint32(f, upb_fielddef_defaultint32(f));
234   }
235 
236   /* Ensure that MapEntry submessages only appear as repeated fields, not
237    * optional/required (singular) fields. */
238   if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
239       upb_fielddef_msgsubdef(f) != NULL) {
240     const upb_msgdef *subdef = upb_fielddef_msgsubdef(f);
241     if (upb_msgdef_mapentry(subdef) && !upb_fielddef_isseq(f)) {
242       upb_status_seterrf(s,
243                          "Field %s refers to mapentry message but is not "
244                          "a repeated field",
245                          upb_fielddef_name(f) ? upb_fielddef_name(f) :
246                          "(unnamed)");
247       return false;
248     }
249   }
250 
251   return true;
252 }
253 
upb_validate_enumdef(const upb_enumdef * e,upb_status * s)254 static bool upb_validate_enumdef(const upb_enumdef *e, upb_status *s) {
255   if (upb_enumdef_numvals(e) == 0) {
256     upb_status_seterrf(s, "enum %s has no members (must have at least one)",
257                        upb_enumdef_fullname(e));
258     return false;
259   }
260 
261   return true;
262 }
263 
264 /* All submessage fields are lower than all other fields.
265  * Secondly, fields are increasing in order. */
field_rank(const upb_fielddef * f)266 uint32_t field_rank(const upb_fielddef *f) {
267   uint32_t ret = upb_fielddef_number(f);
268   const uint32_t high_bit = 1 << 30;
269   assert(ret < high_bit);
270   if (!upb_fielddef_issubmsg(f))
271     ret |= high_bit;
272   return ret;
273 }
274 
cmp_fields(const void * p1,const void * p2)275 int cmp_fields(const void *p1, const void *p2) {
276   const upb_fielddef *f1 = *(upb_fielddef*const*)p1;
277   const upb_fielddef *f2 = *(upb_fielddef*const*)p2;
278   return field_rank(f1) - field_rank(f2);
279 }
280 
assign_msg_indices(upb_msgdef * m,upb_status * s)281 static bool assign_msg_indices(upb_msgdef *m, upb_status *s) {
282   /* Sort fields.  upb internally relies on UPB_TYPE_MESSAGE fields having the
283    * lowest indexes, but we do not publicly guarantee this. */
284   upb_msg_field_iter j;
285   int i;
286   uint32_t selector;
287   int n = upb_msgdef_numfields(m);
288   upb_fielddef **fields;
289 
290   if (n == 0) {
291     m->selector_count = UPB_STATIC_SELECTOR_COUNT;
292     m->submsg_field_count = 0;
293     return true;
294   }
295 
296   fields = upb_gmalloc(n * sizeof(*fields));
297   if (!fields) {
298     upb_upberr_setoom(s);
299     return false;
300   }
301 
302   m->submsg_field_count = 0;
303   for(i = 0, upb_msg_field_begin(&j, m);
304       !upb_msg_field_done(&j);
305       upb_msg_field_next(&j), i++) {
306     upb_fielddef *f = upb_msg_iter_field(&j);
307     assert(f->msg.def == m);
308     if (!upb_validate_field(f, s)) {
309       upb_gfree(fields);
310       return false;
311     }
312     if (upb_fielddef_issubmsg(f)) {
313       m->submsg_field_count++;
314     }
315     fields[i] = f;
316   }
317 
318   qsort(fields, n, sizeof(*fields), cmp_fields);
319 
320   selector = UPB_STATIC_SELECTOR_COUNT + m->submsg_field_count;
321   for (i = 0; i < n; i++) {
322     upb_fielddef *f = fields[i];
323     f->index_ = i;
324     f->selector_base = selector + upb_handlers_selectorbaseoffset(f);
325     selector += upb_handlers_selectorcount(f);
326   }
327   m->selector_count = selector;
328 
329 #ifndef NDEBUG
330   {
331     /* Verify that all selectors for the message are distinct. */
332 #define TRY(type) \
333     if (upb_handlers_getselector(f, type, &sel)) upb_inttable_insert(&t, sel, v);
334 
335     upb_inttable t;
336     upb_value v;
337     upb_selector_t sel;
338 
339     upb_inttable_init(&t, UPB_CTYPE_BOOL);
340     v = upb_value_bool(true);
341     upb_inttable_insert(&t, UPB_STARTMSG_SELECTOR, v);
342     upb_inttable_insert(&t, UPB_ENDMSG_SELECTOR, v);
343     for(upb_msg_field_begin(&j, m);
344         !upb_msg_field_done(&j);
345         upb_msg_field_next(&j)) {
346       upb_fielddef *f = upb_msg_iter_field(&j);
347       /* These calls will assert-fail in upb_table if the value already
348        * exists. */
349       TRY(UPB_HANDLER_INT32);
350       TRY(UPB_HANDLER_INT64)
351       TRY(UPB_HANDLER_UINT32)
352       TRY(UPB_HANDLER_UINT64)
353       TRY(UPB_HANDLER_FLOAT)
354       TRY(UPB_HANDLER_DOUBLE)
355       TRY(UPB_HANDLER_BOOL)
356       TRY(UPB_HANDLER_STARTSTR)
357       TRY(UPB_HANDLER_STRING)
358       TRY(UPB_HANDLER_ENDSTR)
359       TRY(UPB_HANDLER_STARTSUBMSG)
360       TRY(UPB_HANDLER_ENDSUBMSG)
361       TRY(UPB_HANDLER_STARTSEQ)
362       TRY(UPB_HANDLER_ENDSEQ)
363     }
364     upb_inttable_uninit(&t);
365   }
366 #undef TRY
367 #endif
368 
369   upb_gfree(fields);
370   return true;
371 }
372 
_upb_def_validate(upb_def * const * defs,size_t n,upb_status * s)373 bool _upb_def_validate(upb_def *const*defs, size_t n, upb_status *s) {
374   size_t i;
375 
376   /* First perform validation, in two passes so we can check that we have a
377    * transitive closure without needing to search. */
378   for (i = 0; i < n; i++) {
379     upb_def *def = defs[i];
380     if (upb_def_isfrozen(def)) {
381       /* Could relax this requirement if it's annoying. */
382       upb_status_seterrmsg(s, "def is already frozen");
383       goto err;
384     } else if (def->type == UPB_DEF_FIELD) {
385       upb_status_seterrmsg(s, "standalone fielddefs can not be frozen");
386       goto err;
387     } else if (def->type == UPB_DEF_ENUM) {
388       if (!upb_validate_enumdef(upb_dyncast_enumdef(def), s)) {
389         goto err;
390       }
391     } else {
392       /* Set now to detect transitive closure in the second pass. */
393       def->came_from_user = true;
394     }
395   }
396 
397   /* Second pass of validation.  Also assign selector bases and indexes, and
398    * compact tables. */
399   for (i = 0; i < n; i++) {
400     upb_def *def = defs[i];
401     upb_msgdef *m = upb_dyncast_msgdef_mutable(def);
402     upb_enumdef *e = upb_dyncast_enumdef_mutable(def);
403     if (m) {
404       upb_inttable_compact(&m->itof);
405       if (!assign_msg_indices(m, s)) {
406         goto err;
407       }
408     } else if (e) {
409       upb_inttable_compact(&e->iton);
410     }
411   }
412 
413   return true;
414 
415 err:
416   for (i = 0; i < n; i++) {
417     upb_def *def = defs[i];
418     def->came_from_user = false;
419   }
420   assert(!(s && upb_ok(s)));
421   return false;
422 }
423 
upb_def_freeze(upb_def * const * defs,size_t n,upb_status * s)424 bool upb_def_freeze(upb_def *const* defs, size_t n, upb_status *s) {
425   /* Def graph contains FieldDefs between each MessageDef, so double the
426    * limit. */
427   const size_t maxdepth = UPB_MAX_MESSAGE_DEPTH * 2;
428 
429   if (!_upb_def_validate(defs, n, s)) {
430     return false;
431   }
432 
433 
434   /* Validation all passed; freeze the objects. */
435   return upb_refcounted_freeze((upb_refcounted *const*)defs, n, s, maxdepth);
436 }
437 
438 
439 /* upb_enumdef ****************************************************************/
440 
upb_enumdef_free(upb_refcounted * r)441 static void upb_enumdef_free(upb_refcounted *r) {
442   upb_enumdef *e = (upb_enumdef*)r;
443   upb_inttable_iter i;
444   upb_inttable_begin(&i, &e->iton);
445   for( ; !upb_inttable_done(&i); upb_inttable_next(&i)) {
446     /* To clean up the upb_gstrdup() from upb_enumdef_addval(). */
447     upb_gfree(upb_value_getcstr(upb_inttable_iter_value(&i)));
448   }
449   upb_strtable_uninit(&e->ntoi);
450   upb_inttable_uninit(&e->iton);
451   upb_def_uninit(upb_enumdef_upcast_mutable(e));
452   upb_gfree(e);
453 }
454 
455 const struct upb_refcounted_vtbl upb_enumdef_vtbl = {NULL, &upb_enumdef_free};
456 
upb_enumdef_new(const void * owner)457 upb_enumdef *upb_enumdef_new(const void *owner) {
458   upb_enumdef *e = upb_gmalloc(sizeof(*e));
459   if (!e) return NULL;
460 
461   if (!upb_def_init(upb_enumdef_upcast_mutable(e), UPB_DEF_ENUM,
462                     &upb_enumdef_vtbl, owner)) {
463     goto err2;
464   }
465 
466   if (!upb_strtable_init(&e->ntoi, UPB_CTYPE_INT32)) goto err2;
467   if (!upb_inttable_init(&e->iton, UPB_CTYPE_CSTR)) goto err1;
468   return e;
469 
470 err1:
471   upb_strtable_uninit(&e->ntoi);
472 err2:
473   upb_gfree(e);
474   return NULL;
475 }
476 
upb_enumdef_dup(const upb_enumdef * e,const void * owner)477 upb_enumdef *upb_enumdef_dup(const upb_enumdef *e, const void *owner) {
478   upb_enum_iter i;
479   upb_enumdef *new_e = upb_enumdef_new(owner);
480   if (!new_e) return NULL;
481   for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) {
482     bool success = upb_enumdef_addval(
483         new_e, upb_enum_iter_name(&i),upb_enum_iter_number(&i), NULL);
484     if (!success) {
485       upb_enumdef_unref(new_e, owner);
486       return NULL;
487     }
488   }
489   return new_e;
490 }
491 
upb_enumdef_freeze(upb_enumdef * e,upb_status * status)492 bool upb_enumdef_freeze(upb_enumdef *e, upb_status *status) {
493   upb_def *d = upb_enumdef_upcast_mutable(e);
494   return upb_def_freeze(&d, 1, status);
495 }
496 
upb_enumdef_fullname(const upb_enumdef * e)497 const char *upb_enumdef_fullname(const upb_enumdef *e) {
498   return upb_def_fullname(upb_enumdef_upcast(e));
499 }
500 
upb_enumdef_name(const upb_enumdef * e)501 const char *upb_enumdef_name(const upb_enumdef *e) {
502   return upb_def_name(upb_enumdef_upcast(e));
503 }
504 
upb_enumdef_setfullname(upb_enumdef * e,const char * fullname,upb_status * s)505 bool upb_enumdef_setfullname(upb_enumdef *e, const char *fullname,
506                              upb_status *s) {
507   return upb_def_setfullname(upb_enumdef_upcast_mutable(e), fullname, s);
508 }
509 
upb_enumdef_addval(upb_enumdef * e,const char * name,int32_t num,upb_status * status)510 bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num,
511                         upb_status *status) {
512   char *name2;
513 
514   if (!upb_isident(name, strlen(name), false, status)) {
515     return false;
516   }
517 
518   if (upb_enumdef_ntoiz(e, name, NULL)) {
519     upb_status_seterrf(status, "name '%s' is already defined", name);
520     return false;
521   }
522 
523   if (!upb_strtable_insert(&e->ntoi, name, upb_value_int32(num))) {
524     upb_status_seterrmsg(status, "out of memory");
525     return false;
526   }
527 
528   if (!upb_inttable_lookup(&e->iton, num, NULL)) {
529     name2 = upb_gstrdup(name);
530     if (!name2 || !upb_inttable_insert(&e->iton, num, upb_value_cstr(name2))) {
531       upb_status_seterrmsg(status, "out of memory");
532       upb_strtable_remove(&e->ntoi, name, NULL);
533       return false;
534     }
535   }
536 
537   if (upb_enumdef_numvals(e) == 1) {
538     bool ok = upb_enumdef_setdefault(e, num, NULL);
539     UPB_ASSERT_VAR(ok, ok);
540   }
541 
542   return true;
543 }
544 
upb_enumdef_default(const upb_enumdef * e)545 int32_t upb_enumdef_default(const upb_enumdef *e) {
546   assert(upb_enumdef_iton(e, e->defaultval));
547   return e->defaultval;
548 }
549 
upb_enumdef_setdefault(upb_enumdef * e,int32_t val,upb_status * s)550 bool upb_enumdef_setdefault(upb_enumdef *e, int32_t val, upb_status *s) {
551   assert(!upb_enumdef_isfrozen(e));
552   if (!upb_enumdef_iton(e, val)) {
553     upb_status_seterrf(s, "number '%d' is not in the enum.", val);
554     return false;
555   }
556   e->defaultval = val;
557   return true;
558 }
559 
upb_enumdef_numvals(const upb_enumdef * e)560 int upb_enumdef_numvals(const upb_enumdef *e) {
561   return upb_strtable_count(&e->ntoi);
562 }
563 
upb_enum_begin(upb_enum_iter * i,const upb_enumdef * e)564 void upb_enum_begin(upb_enum_iter *i, const upb_enumdef *e) {
565   /* We iterate over the ntoi table, to account for duplicate numbers. */
566   upb_strtable_begin(i, &e->ntoi);
567 }
568 
upb_enum_next(upb_enum_iter * iter)569 void upb_enum_next(upb_enum_iter *iter) { upb_strtable_next(iter); }
upb_enum_done(upb_enum_iter * iter)570 bool upb_enum_done(upb_enum_iter *iter) { return upb_strtable_done(iter); }
571 
upb_enumdef_ntoi(const upb_enumdef * def,const char * name,size_t len,int32_t * num)572 bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name,
573                       size_t len, int32_t *num) {
574   upb_value v;
575   if (!upb_strtable_lookup2(&def->ntoi, name, len, &v)) {
576     return false;
577   }
578   if (num) *num = upb_value_getint32(v);
579   return true;
580 }
581 
upb_enumdef_iton(const upb_enumdef * def,int32_t num)582 const char *upb_enumdef_iton(const upb_enumdef *def, int32_t num) {
583   upb_value v;
584   return upb_inttable_lookup32(&def->iton, num, &v) ?
585       upb_value_getcstr(v) : NULL;
586 }
587 
upb_enum_iter_name(upb_enum_iter * iter)588 const char *upb_enum_iter_name(upb_enum_iter *iter) {
589   return upb_strtable_iter_key(iter);
590 }
591 
upb_enum_iter_number(upb_enum_iter * iter)592 int32_t upb_enum_iter_number(upb_enum_iter *iter) {
593   return upb_value_getint32(upb_strtable_iter_value(iter));
594 }
595 
596 
597 /* upb_fielddef ***************************************************************/
598 
599 static void upb_fielddef_init_default(upb_fielddef *f);
600 
upb_fielddef_uninit_default(upb_fielddef * f)601 static void upb_fielddef_uninit_default(upb_fielddef *f) {
602   if (f->type_is_set_ && f->default_is_string && f->defaultval.bytes)
603     freestr(f->defaultval.bytes);
604 }
605 
upb_fielddef_fullname(const upb_fielddef * e)606 const char *upb_fielddef_fullname(const upb_fielddef *e) {
607   return upb_def_fullname(upb_fielddef_upcast(e));
608 }
609 
visitfield(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)610 static void visitfield(const upb_refcounted *r, upb_refcounted_visit *visit,
611                        void *closure) {
612   const upb_fielddef *f = (const upb_fielddef*)r;
613   if (upb_fielddef_containingtype(f)) {
614     visit(r, upb_msgdef_upcast2(upb_fielddef_containingtype(f)), closure);
615   }
616   if (upb_fielddef_containingoneof(f)) {
617     visit(r, upb_oneofdef_upcast(upb_fielddef_containingoneof(f)), closure);
618   }
619   if (upb_fielddef_subdef(f)) {
620     visit(r, upb_def_upcast(upb_fielddef_subdef(f)), closure);
621   }
622 }
623 
freefield(upb_refcounted * r)624 static void freefield(upb_refcounted *r) {
625   upb_fielddef *f = (upb_fielddef*)r;
626   upb_fielddef_uninit_default(f);
627   if (f->subdef_is_symbolic)
628     upb_gfree(f->sub.name);
629   upb_def_uninit(upb_fielddef_upcast_mutable(f));
630   upb_gfree(f);
631 }
632 
enumdefaultstr(const upb_fielddef * f)633 static const char *enumdefaultstr(const upb_fielddef *f) {
634   const upb_enumdef *e;
635   assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
636   e = upb_fielddef_enumsubdef(f);
637   if (f->default_is_string && f->defaultval.bytes) {
638     /* Default was explicitly set as a string. */
639     str_t *s = f->defaultval.bytes;
640     return s->str;
641   } else if (e) {
642     if (!f->default_is_string) {
643       /* Default was explicitly set as an integer; look it up in enumdef. */
644       const char *name = upb_enumdef_iton(e, f->defaultval.sint);
645       if (name) {
646         return name;
647       }
648     } else {
649       /* Default is completely unset; pull enumdef default. */
650       if (upb_enumdef_numvals(e) > 0) {
651         const char *name = upb_enumdef_iton(e, upb_enumdef_default(e));
652         assert(name);
653         return name;
654       }
655     }
656   }
657   return NULL;
658 }
659 
enumdefaultint32(const upb_fielddef * f,int32_t * val)660 static bool enumdefaultint32(const upb_fielddef *f, int32_t *val) {
661   const upb_enumdef *e;
662   assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
663   e = upb_fielddef_enumsubdef(f);
664   if (!f->default_is_string) {
665     /* Default was explicitly set as an integer. */
666     *val = f->defaultval.sint;
667     return true;
668   } else if (e) {
669     if (f->defaultval.bytes) {
670       /* Default was explicitly set as a str; try to lookup corresponding int. */
671       str_t *s = f->defaultval.bytes;
672       if (upb_enumdef_ntoiz(e, s->str, val)) {
673         return true;
674       }
675     } else {
676       /* Default is unset; try to pull in enumdef default. */
677       if (upb_enumdef_numvals(e) > 0) {
678         *val = upb_enumdef_default(e);
679         return true;
680       }
681     }
682   }
683   return false;
684 }
685 
686 const struct upb_refcounted_vtbl upb_fielddef_vtbl = {visitfield, freefield};
687 
upb_fielddef_new(const void * o)688 upb_fielddef *upb_fielddef_new(const void *o) {
689   upb_fielddef *f = upb_gmalloc(sizeof(*f));
690   if (!f) return NULL;
691   if (!upb_def_init(upb_fielddef_upcast_mutable(f), UPB_DEF_FIELD,
692                     &upb_fielddef_vtbl, o)) {
693     upb_gfree(f);
694     return NULL;
695   }
696   f->msg.def = NULL;
697   f->sub.def = NULL;
698   f->oneof = NULL;
699   f->subdef_is_symbolic = false;
700   f->msg_is_symbolic = false;
701   f->label_ = UPB_LABEL_OPTIONAL;
702   f->type_ = UPB_TYPE_INT32;
703   f->number_ = 0;
704   f->type_is_set_ = false;
705   f->tagdelim = false;
706   f->is_extension_ = false;
707   f->lazy_ = false;
708   f->packed_ = true;
709 
710   /* For the moment we default this to UPB_INTFMT_VARIABLE, since it will work
711    * with all integer types and is in some since more "default" since the most
712    * normal-looking proto2 types int32/int64/uint32/uint64 use variable.
713    *
714    * Other options to consider:
715    * - there is no default; users must set this manually (like type).
716    * - default signed integers to UPB_INTFMT_ZIGZAG, since it's more likely to
717    *   be an optimal default for signed integers. */
718   f->intfmt = UPB_INTFMT_VARIABLE;
719   return f;
720 }
721 
upb_fielddef_dup(const upb_fielddef * f,const void * owner)722 upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, const void *owner) {
723   const char *srcname;
724   upb_fielddef *newf = upb_fielddef_new(owner);
725   if (!newf) return NULL;
726   upb_fielddef_settype(newf, upb_fielddef_type(f));
727   upb_fielddef_setlabel(newf, upb_fielddef_label(f));
728   upb_fielddef_setnumber(newf, upb_fielddef_number(f), NULL);
729   upb_fielddef_setname(newf, upb_fielddef_name(f), NULL);
730   if (f->default_is_string && f->defaultval.bytes) {
731     str_t *s = f->defaultval.bytes;
732     upb_fielddef_setdefaultstr(newf, s->str, s->len, NULL);
733   } else {
734     newf->default_is_string = f->default_is_string;
735     newf->defaultval = f->defaultval;
736   }
737 
738   if (f->subdef_is_symbolic) {
739     srcname = f->sub.name;  /* Might be NULL. */
740   } else {
741     srcname = f->sub.def ? upb_def_fullname(f->sub.def) : NULL;
742   }
743   if (srcname) {
744     char *newname = upb_gmalloc(strlen(f->sub.def->fullname) + 2);
745     if (!newname) {
746       upb_fielddef_unref(newf, owner);
747       return NULL;
748     }
749     strcpy(newname, ".");
750     strcat(newname, f->sub.def->fullname);
751     upb_fielddef_setsubdefname(newf, newname, NULL);
752     upb_gfree(newname);
753   }
754 
755   return newf;
756 }
757 
upb_fielddef_typeisset(const upb_fielddef * f)758 bool upb_fielddef_typeisset(const upb_fielddef *f) {
759   return f->type_is_set_;
760 }
761 
upb_fielddef_type(const upb_fielddef * f)762 upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f) {
763   assert(f->type_is_set_);
764   return f->type_;
765 }
766 
upb_fielddef_index(const upb_fielddef * f)767 uint32_t upb_fielddef_index(const upb_fielddef *f) {
768   return f->index_;
769 }
770 
upb_fielddef_label(const upb_fielddef * f)771 upb_label_t upb_fielddef_label(const upb_fielddef *f) {
772   return f->label_;
773 }
774 
upb_fielddef_intfmt(const upb_fielddef * f)775 upb_intfmt_t upb_fielddef_intfmt(const upb_fielddef *f) {
776   return f->intfmt;
777 }
778 
upb_fielddef_istagdelim(const upb_fielddef * f)779 bool upb_fielddef_istagdelim(const upb_fielddef *f) {
780   return f->tagdelim;
781 }
782 
upb_fielddef_number(const upb_fielddef * f)783 uint32_t upb_fielddef_number(const upb_fielddef *f) {
784   return f->number_;
785 }
786 
upb_fielddef_isextension(const upb_fielddef * f)787 bool upb_fielddef_isextension(const upb_fielddef *f) {
788   return f->is_extension_;
789 }
790 
upb_fielddef_lazy(const upb_fielddef * f)791 bool upb_fielddef_lazy(const upb_fielddef *f) {
792   return f->lazy_;
793 }
794 
upb_fielddef_packed(const upb_fielddef * f)795 bool upb_fielddef_packed(const upb_fielddef *f) {
796   return f->packed_;
797 }
798 
upb_fielddef_name(const upb_fielddef * f)799 const char *upb_fielddef_name(const upb_fielddef *f) {
800   return upb_def_fullname(upb_fielddef_upcast(f));
801 }
802 
upb_fielddef_getjsonname(const upb_fielddef * f,char * buf,size_t len)803 size_t upb_fielddef_getjsonname(const upb_fielddef *f, char *buf, size_t len) {
804   const char *name = upb_fielddef_name(f);
805   size_t src, dst = 0;
806   bool ucase_next = false;
807 
808 #define WRITE(byte) \
809   ++dst; \
810   if (dst < len) buf[dst - 1] = byte; \
811   else if (dst == len) buf[dst - 1] = '\0'
812 
813   if (!name) {
814     WRITE('\0');
815     return 0;
816   }
817 
818   /* Implement the transformation as described in the spec:
819    *   1. upper case all letters after an underscore.
820    *   2. remove all underscores.
821    */
822   for (src = 0; name[src]; src++) {
823     if (name[src] == '_') {
824       ucase_next = true;
825       continue;
826     }
827 
828     if (ucase_next) {
829       WRITE(toupper(name[src]));
830       ucase_next = false;
831     } else {
832       WRITE(name[src]);
833     }
834   }
835 
836   WRITE('\0');
837   return dst;
838 
839 #undef WRITE
840 }
841 
upb_fielddef_containingtype(const upb_fielddef * f)842 const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f) {
843   return f->msg_is_symbolic ? NULL : f->msg.def;
844 }
845 
upb_fielddef_containingoneof(const upb_fielddef * f)846 const upb_oneofdef *upb_fielddef_containingoneof(const upb_fielddef *f) {
847   return f->oneof;
848 }
849 
upb_fielddef_containingtype_mutable(upb_fielddef * f)850 upb_msgdef *upb_fielddef_containingtype_mutable(upb_fielddef *f) {
851   return (upb_msgdef*)upb_fielddef_containingtype(f);
852 }
853 
upb_fielddef_containingtypename(upb_fielddef * f)854 const char *upb_fielddef_containingtypename(upb_fielddef *f) {
855   return f->msg_is_symbolic ? f->msg.name : NULL;
856 }
857 
release_containingtype(upb_fielddef * f)858 static void release_containingtype(upb_fielddef *f) {
859   if (f->msg_is_symbolic) upb_gfree(f->msg.name);
860 }
861 
upb_fielddef_setcontainingtypename(upb_fielddef * f,const char * name,upb_status * s)862 bool upb_fielddef_setcontainingtypename(upb_fielddef *f, const char *name,
863                                         upb_status *s) {
864   char *name_copy;
865   assert(!upb_fielddef_isfrozen(f));
866   if (upb_fielddef_containingtype(f)) {
867     upb_status_seterrmsg(s, "field has already been added to a message.");
868     return false;
869   }
870   /* TODO: validate name (upb_isident() doesn't quite work atm because this name
871    * may have a leading "."). */
872 
873   name_copy = upb_gstrdup(name);
874   if (!name_copy) {
875     upb_upberr_setoom(s);
876     return false;
877   }
878 
879   release_containingtype(f);
880   f->msg.name = name_copy;
881   f->msg_is_symbolic = true;
882   return true;
883 }
884 
upb_fielddef_setname(upb_fielddef * f,const char * name,upb_status * s)885 bool upb_fielddef_setname(upb_fielddef *f, const char *name, upb_status *s) {
886   if (upb_fielddef_containingtype(f) || upb_fielddef_containingoneof(f)) {
887     upb_status_seterrmsg(s, "Already added to message or oneof");
888     return false;
889   }
890   return upb_def_setfullname(upb_fielddef_upcast_mutable(f), name, s);
891 }
892 
chkdefaulttype(const upb_fielddef * f,upb_fieldtype_t type)893 static void chkdefaulttype(const upb_fielddef *f, upb_fieldtype_t type) {
894   UPB_UNUSED(f);
895   UPB_UNUSED(type);
896   assert(f->type_is_set_ && upb_fielddef_type(f) == type);
897 }
898 
upb_fielddef_defaultint64(const upb_fielddef * f)899 int64_t upb_fielddef_defaultint64(const upb_fielddef *f) {
900   chkdefaulttype(f, UPB_TYPE_INT64);
901   return f->defaultval.sint;
902 }
903 
upb_fielddef_defaultint32(const upb_fielddef * f)904 int32_t upb_fielddef_defaultint32(const upb_fielddef *f) {
905   if (f->type_is_set_ && upb_fielddef_type(f) == UPB_TYPE_ENUM) {
906     int32_t val;
907     bool ok = enumdefaultint32(f, &val);
908     UPB_ASSERT_VAR(ok, ok);
909     return val;
910   } else {
911     chkdefaulttype(f, UPB_TYPE_INT32);
912     return f->defaultval.sint;
913   }
914 }
915 
upb_fielddef_defaultuint64(const upb_fielddef * f)916 uint64_t upb_fielddef_defaultuint64(const upb_fielddef *f) {
917   chkdefaulttype(f, UPB_TYPE_UINT64);
918   return f->defaultval.uint;
919 }
920 
upb_fielddef_defaultuint32(const upb_fielddef * f)921 uint32_t upb_fielddef_defaultuint32(const upb_fielddef *f) {
922   chkdefaulttype(f, UPB_TYPE_UINT32);
923   return f->defaultval.uint;
924 }
925 
upb_fielddef_defaultbool(const upb_fielddef * f)926 bool upb_fielddef_defaultbool(const upb_fielddef *f) {
927   chkdefaulttype(f, UPB_TYPE_BOOL);
928   return f->defaultval.uint;
929 }
930 
upb_fielddef_defaultfloat(const upb_fielddef * f)931 float upb_fielddef_defaultfloat(const upb_fielddef *f) {
932   chkdefaulttype(f, UPB_TYPE_FLOAT);
933   return f->defaultval.flt;
934 }
935 
upb_fielddef_defaultdouble(const upb_fielddef * f)936 double upb_fielddef_defaultdouble(const upb_fielddef *f) {
937   chkdefaulttype(f, UPB_TYPE_DOUBLE);
938   return f->defaultval.dbl;
939 }
940 
upb_fielddef_defaultstr(const upb_fielddef * f,size_t * len)941 const char *upb_fielddef_defaultstr(const upb_fielddef *f, size_t *len) {
942   assert(f->type_is_set_);
943   assert(upb_fielddef_type(f) == UPB_TYPE_STRING ||
944          upb_fielddef_type(f) == UPB_TYPE_BYTES ||
945          upb_fielddef_type(f) == UPB_TYPE_ENUM);
946 
947   if (upb_fielddef_type(f) == UPB_TYPE_ENUM) {
948     const char *ret = enumdefaultstr(f);
949     assert(ret);
950     /* Enum defaults can't have embedded NULLs. */
951     if (len) *len = strlen(ret);
952     return ret;
953   }
954 
955   if (f->default_is_string) {
956     str_t *str = f->defaultval.bytes;
957     if (len) *len = str->len;
958     return str->str;
959   }
960 
961   return NULL;
962 }
963 
upb_fielddef_init_default(upb_fielddef * f)964 static void upb_fielddef_init_default(upb_fielddef *f) {
965   f->default_is_string = false;
966   switch (upb_fielddef_type(f)) {
967     case UPB_TYPE_DOUBLE: f->defaultval.dbl = 0; break;
968     case UPB_TYPE_FLOAT: f->defaultval.flt = 0; break;
969     case UPB_TYPE_INT32:
970     case UPB_TYPE_INT64: f->defaultval.sint = 0; break;
971     case UPB_TYPE_UINT64:
972     case UPB_TYPE_UINT32:
973     case UPB_TYPE_BOOL: f->defaultval.uint = 0; break;
974     case UPB_TYPE_STRING:
975     case UPB_TYPE_BYTES:
976       f->defaultval.bytes = newstr("", 0);
977       f->default_is_string = true;
978       break;
979     case UPB_TYPE_MESSAGE: break;
980     case UPB_TYPE_ENUM:
981       /* This is our special sentinel that indicates "not set" for an enum. */
982       f->default_is_string = true;
983       f->defaultval.bytes = NULL;
984       break;
985   }
986 }
987 
upb_fielddef_subdef(const upb_fielddef * f)988 const upb_def *upb_fielddef_subdef(const upb_fielddef *f) {
989   return f->subdef_is_symbolic ? NULL : f->sub.def;
990 }
991 
upb_fielddef_msgsubdef(const upb_fielddef * f)992 const upb_msgdef *upb_fielddef_msgsubdef(const upb_fielddef *f) {
993   const upb_def *def = upb_fielddef_subdef(f);
994   return def ? upb_dyncast_msgdef(def) : NULL;
995 }
996 
upb_fielddef_enumsubdef(const upb_fielddef * f)997 const upb_enumdef *upb_fielddef_enumsubdef(const upb_fielddef *f) {
998   const upb_def *def = upb_fielddef_subdef(f);
999   return def ? upb_dyncast_enumdef(def) : NULL;
1000 }
1001 
upb_fielddef_subdef_mutable(upb_fielddef * f)1002 upb_def *upb_fielddef_subdef_mutable(upb_fielddef *f) {
1003   return (upb_def*)upb_fielddef_subdef(f);
1004 }
1005 
upb_fielddef_subdefname(const upb_fielddef * f)1006 const char *upb_fielddef_subdefname(const upb_fielddef *f) {
1007   if (f->subdef_is_symbolic) {
1008     return f->sub.name;
1009   } else if (f->sub.def) {
1010     return upb_def_fullname(f->sub.def);
1011   } else {
1012     return NULL;
1013   }
1014 }
1015 
upb_fielddef_setnumber(upb_fielddef * f,uint32_t number,upb_status * s)1016 bool upb_fielddef_setnumber(upb_fielddef *f, uint32_t number, upb_status *s) {
1017   if (upb_fielddef_containingtype(f)) {
1018     upb_status_seterrmsg(
1019         s, "cannot change field number after adding to a message");
1020     return false;
1021   }
1022   if (number == 0 || number > UPB_MAX_FIELDNUMBER) {
1023     upb_status_seterrf(s, "invalid field number (%u)", number);
1024     return false;
1025   }
1026   f->number_ = number;
1027   return true;
1028 }
1029 
upb_fielddef_settype(upb_fielddef * f,upb_fieldtype_t type)1030 void upb_fielddef_settype(upb_fielddef *f, upb_fieldtype_t type) {
1031   assert(!upb_fielddef_isfrozen(f));
1032   assert(upb_fielddef_checktype(type));
1033   upb_fielddef_uninit_default(f);
1034   f->type_ = type;
1035   f->type_is_set_ = true;
1036   upb_fielddef_init_default(f);
1037 }
1038 
upb_fielddef_setdescriptortype(upb_fielddef * f,int type)1039 void upb_fielddef_setdescriptortype(upb_fielddef *f, int type) {
1040   assert(!upb_fielddef_isfrozen(f));
1041   switch (type) {
1042     case UPB_DESCRIPTOR_TYPE_DOUBLE:
1043       upb_fielddef_settype(f, UPB_TYPE_DOUBLE);
1044       break;
1045     case UPB_DESCRIPTOR_TYPE_FLOAT:
1046       upb_fielddef_settype(f, UPB_TYPE_FLOAT);
1047       break;
1048     case UPB_DESCRIPTOR_TYPE_INT64:
1049     case UPB_DESCRIPTOR_TYPE_SFIXED64:
1050     case UPB_DESCRIPTOR_TYPE_SINT64:
1051       upb_fielddef_settype(f, UPB_TYPE_INT64);
1052       break;
1053     case UPB_DESCRIPTOR_TYPE_UINT64:
1054     case UPB_DESCRIPTOR_TYPE_FIXED64:
1055       upb_fielddef_settype(f, UPB_TYPE_UINT64);
1056       break;
1057     case UPB_DESCRIPTOR_TYPE_INT32:
1058     case UPB_DESCRIPTOR_TYPE_SFIXED32:
1059     case UPB_DESCRIPTOR_TYPE_SINT32:
1060       upb_fielddef_settype(f, UPB_TYPE_INT32);
1061       break;
1062     case UPB_DESCRIPTOR_TYPE_UINT32:
1063     case UPB_DESCRIPTOR_TYPE_FIXED32:
1064       upb_fielddef_settype(f, UPB_TYPE_UINT32);
1065       break;
1066     case UPB_DESCRIPTOR_TYPE_BOOL:
1067       upb_fielddef_settype(f, UPB_TYPE_BOOL);
1068       break;
1069     case UPB_DESCRIPTOR_TYPE_STRING:
1070       upb_fielddef_settype(f, UPB_TYPE_STRING);
1071       break;
1072     case UPB_DESCRIPTOR_TYPE_BYTES:
1073       upb_fielddef_settype(f, UPB_TYPE_BYTES);
1074       break;
1075     case UPB_DESCRIPTOR_TYPE_GROUP:
1076     case UPB_DESCRIPTOR_TYPE_MESSAGE:
1077       upb_fielddef_settype(f, UPB_TYPE_MESSAGE);
1078       break;
1079     case UPB_DESCRIPTOR_TYPE_ENUM:
1080       upb_fielddef_settype(f, UPB_TYPE_ENUM);
1081       break;
1082     default: assert(false);
1083   }
1084 
1085   if (type == UPB_DESCRIPTOR_TYPE_FIXED64 ||
1086       type == UPB_DESCRIPTOR_TYPE_FIXED32 ||
1087       type == UPB_DESCRIPTOR_TYPE_SFIXED64 ||
1088       type == UPB_DESCRIPTOR_TYPE_SFIXED32) {
1089     upb_fielddef_setintfmt(f, UPB_INTFMT_FIXED);
1090   } else if (type == UPB_DESCRIPTOR_TYPE_SINT64 ||
1091              type == UPB_DESCRIPTOR_TYPE_SINT32) {
1092     upb_fielddef_setintfmt(f, UPB_INTFMT_ZIGZAG);
1093   } else {
1094     upb_fielddef_setintfmt(f, UPB_INTFMT_VARIABLE);
1095   }
1096 
1097   upb_fielddef_settagdelim(f, type == UPB_DESCRIPTOR_TYPE_GROUP);
1098 }
1099 
upb_fielddef_descriptortype(const upb_fielddef * f)1100 upb_descriptortype_t upb_fielddef_descriptortype(const upb_fielddef *f) {
1101   switch (upb_fielddef_type(f)) {
1102     case UPB_TYPE_FLOAT:  return UPB_DESCRIPTOR_TYPE_FLOAT;
1103     case UPB_TYPE_DOUBLE: return UPB_DESCRIPTOR_TYPE_DOUBLE;
1104     case UPB_TYPE_BOOL:   return UPB_DESCRIPTOR_TYPE_BOOL;
1105     case UPB_TYPE_STRING: return UPB_DESCRIPTOR_TYPE_STRING;
1106     case UPB_TYPE_BYTES:  return UPB_DESCRIPTOR_TYPE_BYTES;
1107     case UPB_TYPE_ENUM:   return UPB_DESCRIPTOR_TYPE_ENUM;
1108     case UPB_TYPE_INT32:
1109       switch (upb_fielddef_intfmt(f)) {
1110         case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_INT32;
1111         case UPB_INTFMT_FIXED:    return UPB_DESCRIPTOR_TYPE_SFIXED32;
1112         case UPB_INTFMT_ZIGZAG:   return UPB_DESCRIPTOR_TYPE_SINT32;
1113       }
1114     case UPB_TYPE_INT64:
1115       switch (upb_fielddef_intfmt(f)) {
1116         case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_INT64;
1117         case UPB_INTFMT_FIXED:    return UPB_DESCRIPTOR_TYPE_SFIXED64;
1118         case UPB_INTFMT_ZIGZAG:   return UPB_DESCRIPTOR_TYPE_SINT64;
1119       }
1120     case UPB_TYPE_UINT32:
1121       switch (upb_fielddef_intfmt(f)) {
1122         case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_UINT32;
1123         case UPB_INTFMT_FIXED:    return UPB_DESCRIPTOR_TYPE_FIXED32;
1124         case UPB_INTFMT_ZIGZAG:   return -1;
1125       }
1126     case UPB_TYPE_UINT64:
1127       switch (upb_fielddef_intfmt(f)) {
1128         case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_UINT64;
1129         case UPB_INTFMT_FIXED:    return UPB_DESCRIPTOR_TYPE_FIXED64;
1130         case UPB_INTFMT_ZIGZAG:   return -1;
1131       }
1132     case UPB_TYPE_MESSAGE:
1133       return upb_fielddef_istagdelim(f) ?
1134           UPB_DESCRIPTOR_TYPE_GROUP : UPB_DESCRIPTOR_TYPE_MESSAGE;
1135   }
1136   return 0;
1137 }
1138 
upb_fielddef_setisextension(upb_fielddef * f,bool is_extension)1139 void upb_fielddef_setisextension(upb_fielddef *f, bool is_extension) {
1140   assert(!upb_fielddef_isfrozen(f));
1141   f->is_extension_ = is_extension;
1142 }
1143 
upb_fielddef_setlazy(upb_fielddef * f,bool lazy)1144 void upb_fielddef_setlazy(upb_fielddef *f, bool lazy) {
1145   assert(!upb_fielddef_isfrozen(f));
1146   f->lazy_ = lazy;
1147 }
1148 
upb_fielddef_setpacked(upb_fielddef * f,bool packed)1149 void upb_fielddef_setpacked(upb_fielddef *f, bool packed) {
1150   assert(!upb_fielddef_isfrozen(f));
1151   f->packed_ = packed;
1152 }
1153 
upb_fielddef_setlabel(upb_fielddef * f,upb_label_t label)1154 void upb_fielddef_setlabel(upb_fielddef *f, upb_label_t label) {
1155   assert(!upb_fielddef_isfrozen(f));
1156   assert(upb_fielddef_checklabel(label));
1157   f->label_ = label;
1158 }
1159 
upb_fielddef_setintfmt(upb_fielddef * f,upb_intfmt_t fmt)1160 void upb_fielddef_setintfmt(upb_fielddef *f, upb_intfmt_t fmt) {
1161   assert(!upb_fielddef_isfrozen(f));
1162   assert(upb_fielddef_checkintfmt(fmt));
1163   f->intfmt = fmt;
1164 }
1165 
upb_fielddef_settagdelim(upb_fielddef * f,bool tag_delim)1166 void upb_fielddef_settagdelim(upb_fielddef *f, bool tag_delim) {
1167   assert(!upb_fielddef_isfrozen(f));
1168   f->tagdelim = tag_delim;
1169   f->tagdelim = tag_delim;
1170 }
1171 
checksetdefault(upb_fielddef * f,upb_fieldtype_t type)1172 static bool checksetdefault(upb_fielddef *f, upb_fieldtype_t type) {
1173   if (!f->type_is_set_ || upb_fielddef_isfrozen(f) ||
1174       upb_fielddef_type(f) != type) {
1175     assert(false);
1176     return false;
1177   }
1178   if (f->default_is_string) {
1179     str_t *s = f->defaultval.bytes;
1180     assert(s || type == UPB_TYPE_ENUM);
1181     if (s) freestr(s);
1182   }
1183   f->default_is_string = false;
1184   return true;
1185 }
1186 
upb_fielddef_setdefaultint64(upb_fielddef * f,int64_t value)1187 void upb_fielddef_setdefaultint64(upb_fielddef *f, int64_t value) {
1188   if (checksetdefault(f, UPB_TYPE_INT64))
1189     f->defaultval.sint = value;
1190 }
1191 
upb_fielddef_setdefaultint32(upb_fielddef * f,int32_t value)1192 void upb_fielddef_setdefaultint32(upb_fielddef *f, int32_t value) {
1193   if ((upb_fielddef_type(f) == UPB_TYPE_ENUM &&
1194        checksetdefault(f, UPB_TYPE_ENUM)) ||
1195       checksetdefault(f, UPB_TYPE_INT32)) {
1196     f->defaultval.sint = value;
1197   }
1198 }
1199 
upb_fielddef_setdefaultuint64(upb_fielddef * f,uint64_t value)1200 void upb_fielddef_setdefaultuint64(upb_fielddef *f, uint64_t value) {
1201   if (checksetdefault(f, UPB_TYPE_UINT64))
1202     f->defaultval.uint = value;
1203 }
1204 
upb_fielddef_setdefaultuint32(upb_fielddef * f,uint32_t value)1205 void upb_fielddef_setdefaultuint32(upb_fielddef *f, uint32_t value) {
1206   if (checksetdefault(f, UPB_TYPE_UINT32))
1207     f->defaultval.uint = value;
1208 }
1209 
upb_fielddef_setdefaultbool(upb_fielddef * f,bool value)1210 void upb_fielddef_setdefaultbool(upb_fielddef *f, bool value) {
1211   if (checksetdefault(f, UPB_TYPE_BOOL))
1212     f->defaultval.uint = value;
1213 }
1214 
upb_fielddef_setdefaultfloat(upb_fielddef * f,float value)1215 void upb_fielddef_setdefaultfloat(upb_fielddef *f, float value) {
1216   if (checksetdefault(f, UPB_TYPE_FLOAT))
1217     f->defaultval.flt = value;
1218 }
1219 
upb_fielddef_setdefaultdouble(upb_fielddef * f,double value)1220 void upb_fielddef_setdefaultdouble(upb_fielddef *f, double value) {
1221   if (checksetdefault(f, UPB_TYPE_DOUBLE))
1222     f->defaultval.dbl = value;
1223 }
1224 
upb_fielddef_setdefaultstr(upb_fielddef * f,const void * str,size_t len,upb_status * s)1225 bool upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len,
1226                                 upb_status *s) {
1227   str_t *str2;
1228   assert(upb_fielddef_isstring(f) || f->type_ == UPB_TYPE_ENUM);
1229   if (f->type_ == UPB_TYPE_ENUM && !upb_isident(str, len, false, s))
1230     return false;
1231 
1232   if (f->default_is_string) {
1233     str_t *s = f->defaultval.bytes;
1234     assert(s || f->type_ == UPB_TYPE_ENUM);
1235     if (s) freestr(s);
1236   } else {
1237     assert(f->type_ == UPB_TYPE_ENUM);
1238   }
1239 
1240   str2 = newstr(str, len);
1241   f->defaultval.bytes = str2;
1242   f->default_is_string = true;
1243   return true;
1244 }
1245 
upb_fielddef_setdefaultcstr(upb_fielddef * f,const char * str,upb_status * s)1246 void upb_fielddef_setdefaultcstr(upb_fielddef *f, const char *str,
1247                                  upb_status *s) {
1248   assert(f->type_is_set_);
1249   upb_fielddef_setdefaultstr(f, str, str ? strlen(str) : 0, s);
1250 }
1251 
upb_fielddef_enumhasdefaultint32(const upb_fielddef * f)1252 bool upb_fielddef_enumhasdefaultint32(const upb_fielddef *f) {
1253   int32_t val;
1254   assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
1255   return enumdefaultint32(f, &val);
1256 }
1257 
upb_fielddef_enumhasdefaultstr(const upb_fielddef * f)1258 bool upb_fielddef_enumhasdefaultstr(const upb_fielddef *f) {
1259   assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
1260   return enumdefaultstr(f) != NULL;
1261 }
1262 
upb_subdef_typecheck(upb_fielddef * f,const upb_def * subdef,upb_status * s)1263 static bool upb_subdef_typecheck(upb_fielddef *f, const upb_def *subdef,
1264                                  upb_status *s) {
1265   if (f->type_ == UPB_TYPE_MESSAGE) {
1266     if (upb_dyncast_msgdef(subdef)) return true;
1267     upb_status_seterrmsg(s, "invalid subdef type for this submessage field");
1268     return false;
1269   } else if (f->type_ == UPB_TYPE_ENUM) {
1270     if (upb_dyncast_enumdef(subdef)) return true;
1271     upb_status_seterrmsg(s, "invalid subdef type for this enum field");
1272     return false;
1273   } else {
1274     upb_status_seterrmsg(s, "only message and enum fields can have a subdef");
1275     return false;
1276   }
1277 }
1278 
release_subdef(upb_fielddef * f)1279 static void release_subdef(upb_fielddef *f) {
1280   if (f->subdef_is_symbolic) {
1281     upb_gfree(f->sub.name);
1282   } else if (f->sub.def) {
1283     upb_unref2(f->sub.def, f);
1284   }
1285 }
1286 
upb_fielddef_setsubdef(upb_fielddef * f,const upb_def * subdef,upb_status * s)1287 bool upb_fielddef_setsubdef(upb_fielddef *f, const upb_def *subdef,
1288                             upb_status *s) {
1289   assert(!upb_fielddef_isfrozen(f));
1290   assert(upb_fielddef_hassubdef(f));
1291   if (subdef && !upb_subdef_typecheck(f, subdef, s)) return false;
1292   release_subdef(f);
1293   f->sub.def = subdef;
1294   f->subdef_is_symbolic = false;
1295   if (f->sub.def) upb_ref2(f->sub.def, f);
1296   return true;
1297 }
1298 
upb_fielddef_setmsgsubdef(upb_fielddef * f,const upb_msgdef * subdef,upb_status * s)1299 bool upb_fielddef_setmsgsubdef(upb_fielddef *f, const upb_msgdef *subdef,
1300                                upb_status *s) {
1301   return upb_fielddef_setsubdef(f, upb_msgdef_upcast(subdef), s);
1302 }
1303 
upb_fielddef_setenumsubdef(upb_fielddef * f,const upb_enumdef * subdef,upb_status * s)1304 bool upb_fielddef_setenumsubdef(upb_fielddef *f, const upb_enumdef *subdef,
1305                                 upb_status *s) {
1306   return upb_fielddef_setsubdef(f, upb_enumdef_upcast(subdef), s);
1307 }
1308 
upb_fielddef_setsubdefname(upb_fielddef * f,const char * name,upb_status * s)1309 bool upb_fielddef_setsubdefname(upb_fielddef *f, const char *name,
1310                                 upb_status *s) {
1311   char *name_copy;
1312   assert(!upb_fielddef_isfrozen(f));
1313   if (!upb_fielddef_hassubdef(f)) {
1314     upb_status_seterrmsg(s, "field type does not accept a subdef");
1315     return false;
1316   }
1317 
1318   name_copy = upb_gstrdup(name);
1319   if (!name_copy) {
1320     upb_upberr_setoom(s);
1321     return false;
1322   }
1323 
1324   /* TODO: validate name (upb_isident() doesn't quite work atm because this name
1325    * may have a leading "."). */
1326   release_subdef(f);
1327   f->sub.name = name_copy;
1328   f->subdef_is_symbolic = true;
1329   return true;
1330 }
1331 
upb_fielddef_issubmsg(const upb_fielddef * f)1332 bool upb_fielddef_issubmsg(const upb_fielddef *f) {
1333   return upb_fielddef_type(f) == UPB_TYPE_MESSAGE;
1334 }
1335 
upb_fielddef_isstring(const upb_fielddef * f)1336 bool upb_fielddef_isstring(const upb_fielddef *f) {
1337   return upb_fielddef_type(f) == UPB_TYPE_STRING ||
1338          upb_fielddef_type(f) == UPB_TYPE_BYTES;
1339 }
1340 
upb_fielddef_isseq(const upb_fielddef * f)1341 bool upb_fielddef_isseq(const upb_fielddef *f) {
1342   return upb_fielddef_label(f) == UPB_LABEL_REPEATED;
1343 }
1344 
upb_fielddef_isprimitive(const upb_fielddef * f)1345 bool upb_fielddef_isprimitive(const upb_fielddef *f) {
1346   return !upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f);
1347 }
1348 
upb_fielddef_ismap(const upb_fielddef * f)1349 bool upb_fielddef_ismap(const upb_fielddef *f) {
1350   return upb_fielddef_isseq(f) && upb_fielddef_issubmsg(f) &&
1351          upb_msgdef_mapentry(upb_fielddef_msgsubdef(f));
1352 }
1353 
upb_fielddef_haspresence(const upb_fielddef * f)1354 bool upb_fielddef_haspresence(const upb_fielddef *f) {
1355   if (upb_fielddef_isseq(f)) return false;
1356   if (upb_fielddef_issubmsg(f)) return true;
1357 
1358   /* Primitive field: return true unless there is a message that specifies
1359    * presence should not exist. */
1360   if (f->msg_is_symbolic || !f->msg.def) return true;
1361   return f->msg.def->syntax == UPB_SYNTAX_PROTO2;
1362 }
1363 
upb_fielddef_hassubdef(const upb_fielddef * f)1364 bool upb_fielddef_hassubdef(const upb_fielddef *f) {
1365   return upb_fielddef_issubmsg(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM;
1366 }
1367 
between(int32_t x,int32_t low,int32_t high)1368 static bool between(int32_t x, int32_t low, int32_t high) {
1369   return x >= low && x <= high;
1370 }
1371 
upb_fielddef_checklabel(int32_t label)1372 bool upb_fielddef_checklabel(int32_t label) { return between(label, 1, 3); }
upb_fielddef_checktype(int32_t type)1373 bool upb_fielddef_checktype(int32_t type) { return between(type, 1, 11); }
upb_fielddef_checkintfmt(int32_t fmt)1374 bool upb_fielddef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); }
1375 
upb_fielddef_checkdescriptortype(int32_t type)1376 bool upb_fielddef_checkdescriptortype(int32_t type) {
1377   return between(type, 1, 18);
1378 }
1379 
1380 /* upb_msgdef *****************************************************************/
1381 
visitmsg(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)1382 static void visitmsg(const upb_refcounted *r, upb_refcounted_visit *visit,
1383                      void *closure) {
1384   upb_msg_oneof_iter o;
1385   const upb_msgdef *m = (const upb_msgdef*)r;
1386   upb_msg_field_iter i;
1387   for(upb_msg_field_begin(&i, m);
1388       !upb_msg_field_done(&i);
1389       upb_msg_field_next(&i)) {
1390     upb_fielddef *f = upb_msg_iter_field(&i);
1391     visit(r, upb_fielddef_upcast2(f), closure);
1392   }
1393   for(upb_msg_oneof_begin(&o, m);
1394       !upb_msg_oneof_done(&o);
1395       upb_msg_oneof_next(&o)) {
1396     upb_oneofdef *f = upb_msg_iter_oneof(&o);
1397     visit(r, upb_oneofdef_upcast(f), closure);
1398   }
1399 }
1400 
freemsg(upb_refcounted * r)1401 static void freemsg(upb_refcounted *r) {
1402   upb_msgdef *m = (upb_msgdef*)r;
1403   upb_strtable_uninit(&m->ntof);
1404   upb_inttable_uninit(&m->itof);
1405   upb_def_uninit(upb_msgdef_upcast_mutable(m));
1406   upb_gfree(m);
1407 }
1408 
1409 const struct upb_refcounted_vtbl upb_msgdef_vtbl = {visitmsg, freemsg};
1410 
upb_msgdef_new(const void * owner)1411 upb_msgdef *upb_msgdef_new(const void *owner) {
1412   upb_msgdef *m = upb_gmalloc(sizeof(*m));
1413   if (!m) return NULL;
1414 
1415   if (!upb_def_init(upb_msgdef_upcast_mutable(m), UPB_DEF_MSG, &upb_msgdef_vtbl,
1416                     owner)) {
1417     goto err2;
1418   }
1419 
1420   if (!upb_inttable_init(&m->itof, UPB_CTYPE_PTR)) goto err2;
1421   if (!upb_strtable_init(&m->ntof, UPB_CTYPE_PTR)) goto err1;
1422   m->map_entry = false;
1423   m->syntax = UPB_SYNTAX_PROTO2;
1424   return m;
1425 
1426 err1:
1427   upb_inttable_uninit(&m->itof);
1428 err2:
1429   upb_gfree(m);
1430   return NULL;
1431 }
1432 
upb_msgdef_dup(const upb_msgdef * m,const void * owner)1433 upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner) {
1434   bool ok;
1435   upb_msg_field_iter i;
1436   upb_msg_oneof_iter o;
1437 
1438   upb_msgdef *newm = upb_msgdef_new(owner);
1439   if (!newm) return NULL;
1440   ok = upb_def_setfullname(upb_msgdef_upcast_mutable(newm),
1441                            upb_def_fullname(upb_msgdef_upcast(m)),
1442                            NULL);
1443   newm->map_entry = m->map_entry;
1444   newm->syntax = m->syntax;
1445   UPB_ASSERT_VAR(ok, ok);
1446   for(upb_msg_field_begin(&i, m);
1447       !upb_msg_field_done(&i);
1448       upb_msg_field_next(&i)) {
1449     upb_fielddef *f = upb_fielddef_dup(upb_msg_iter_field(&i), &f);
1450     /* Fields in oneofs are dup'd below. */
1451     if (upb_fielddef_containingoneof(f)) continue;
1452     if (!f || !upb_msgdef_addfield(newm, f, &f, NULL)) {
1453       upb_msgdef_unref(newm, owner);
1454       return NULL;
1455     }
1456   }
1457   for(upb_msg_oneof_begin(&o, m);
1458       !upb_msg_oneof_done(&o);
1459       upb_msg_oneof_next(&o)) {
1460     upb_oneofdef *f = upb_oneofdef_dup(upb_msg_iter_oneof(&o), &f);
1461     if (!f || !upb_msgdef_addoneof(newm, f, &f, NULL)) {
1462       upb_msgdef_unref(newm, owner);
1463       return NULL;
1464     }
1465   }
1466   return newm;
1467 }
1468 
upb_msgdef_freeze(upb_msgdef * m,upb_status * status)1469 bool upb_msgdef_freeze(upb_msgdef *m, upb_status *status) {
1470   upb_def *d = upb_msgdef_upcast_mutable(m);
1471   return upb_def_freeze(&d, 1, status);
1472 }
1473 
upb_msgdef_fullname(const upb_msgdef * m)1474 const char *upb_msgdef_fullname(const upb_msgdef *m) {
1475   return upb_def_fullname(upb_msgdef_upcast(m));
1476 }
1477 
upb_msgdef_name(const upb_msgdef * m)1478 const char *upb_msgdef_name(const upb_msgdef *m) {
1479   return upb_def_name(upb_msgdef_upcast(m));
1480 }
1481 
upb_msgdef_setfullname(upb_msgdef * m,const char * fullname,upb_status * s)1482 bool upb_msgdef_setfullname(upb_msgdef *m, const char *fullname,
1483                             upb_status *s) {
1484   return upb_def_setfullname(upb_msgdef_upcast_mutable(m), fullname, s);
1485 }
1486 
upb_msgdef_setsyntax(upb_msgdef * m,upb_syntax_t syntax)1487 bool upb_msgdef_setsyntax(upb_msgdef *m, upb_syntax_t syntax) {
1488   if (syntax != UPB_SYNTAX_PROTO2 && syntax != UPB_SYNTAX_PROTO3) {
1489     return false;
1490   }
1491 
1492   m->syntax = syntax;
1493   return true;
1494 }
1495 
upb_msgdef_syntax(const upb_msgdef * m)1496 upb_syntax_t upb_msgdef_syntax(const upb_msgdef *m) {
1497   return m->syntax;
1498 }
1499 
1500 /* Helper: check that the field |f| is safe to add to msgdef |m|. Set an error
1501  * on status |s| and return false if not. */
check_field_add(const upb_msgdef * m,const upb_fielddef * f,upb_status * s)1502 static bool check_field_add(const upb_msgdef *m, const upb_fielddef *f,
1503                             upb_status *s) {
1504   if (upb_fielddef_containingtype(f) != NULL) {
1505     upb_status_seterrmsg(s, "fielddef already belongs to a message");
1506     return false;
1507   } else if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) {
1508     upb_status_seterrmsg(s, "field name or number were not set");
1509     return false;
1510   } else if (upb_msgdef_itof(m, upb_fielddef_number(f))) {
1511     upb_status_seterrmsg(s, "duplicate field number");
1512     return false;
1513   } else if (upb_strtable_lookup(&m->ntof, upb_fielddef_name(f), NULL)) {
1514     upb_status_seterrmsg(s, "name conflicts with existing field or oneof");
1515     return false;
1516   }
1517   return true;
1518 }
1519 
add_field(upb_msgdef * m,upb_fielddef * f,const void * ref_donor)1520 static void add_field(upb_msgdef *m, upb_fielddef *f, const void *ref_donor) {
1521   release_containingtype(f);
1522   f->msg.def = m;
1523   f->msg_is_symbolic = false;
1524   upb_inttable_insert(&m->itof, upb_fielddef_number(f), upb_value_ptr(f));
1525   upb_strtable_insert(&m->ntof, upb_fielddef_name(f), upb_value_ptr(f));
1526   upb_ref2(f, m);
1527   upb_ref2(m, f);
1528   if (ref_donor) upb_fielddef_unref(f, ref_donor);
1529 }
1530 
upb_msgdef_addfield(upb_msgdef * m,upb_fielddef * f,const void * ref_donor,upb_status * s)1531 bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f, const void *ref_donor,
1532                          upb_status *s) {
1533   /* TODO: extensions need to have a separate namespace, because proto2 allows a
1534    * top-level extension (ie. one not in any package) to have the same name as a
1535    * field from the message.
1536    *
1537    * This also implies that there needs to be a separate lookup-by-name method
1538    * for extensions.  It seems desirable for iteration to return both extensions
1539    * and non-extensions though.
1540    *
1541    * We also need to validate that the field number is in an extension range iff
1542    * it is an extension.
1543    *
1544    * This method is idempotent. Check if |f| is already part of this msgdef and
1545    * return immediately if so. */
1546   if (upb_fielddef_containingtype(f) == m) {
1547     return true;
1548   }
1549 
1550   /* Check constraints for all fields before performing any action. */
1551   if (!check_field_add(m, f, s)) {
1552     return false;
1553   } else if (upb_fielddef_containingoneof(f) != NULL) {
1554     /* Fields in a oneof can only be added by adding the oneof to the msgdef. */
1555     upb_status_seterrmsg(s, "fielddef is part of a oneof");
1556     return false;
1557   }
1558 
1559   /* Constraint checks ok, perform the action. */
1560   add_field(m, f, ref_donor);
1561   return true;
1562 }
1563 
upb_msgdef_addoneof(upb_msgdef * m,upb_oneofdef * o,const void * ref_donor,upb_status * s)1564 bool upb_msgdef_addoneof(upb_msgdef *m, upb_oneofdef *o, const void *ref_donor,
1565                          upb_status *s) {
1566   upb_oneof_iter it;
1567 
1568   /* Check various conditions that would prevent this oneof from being added. */
1569   if (upb_oneofdef_containingtype(o)) {
1570     upb_status_seterrmsg(s, "oneofdef already belongs to a message");
1571     return false;
1572   } else if (upb_oneofdef_name(o) == NULL) {
1573     upb_status_seterrmsg(s, "oneofdef name was not set");
1574     return false;
1575   } else if (upb_strtable_lookup(&m->ntof, upb_oneofdef_name(o), NULL)) {
1576     upb_status_seterrmsg(s, "name conflicts with existing field or oneof");
1577     return false;
1578   }
1579 
1580   /* Check that all of the oneof's fields do not conflict with names or numbers
1581    * of fields already in the message. */
1582   for (upb_oneof_begin(&it, o); !upb_oneof_done(&it); upb_oneof_next(&it)) {
1583     const upb_fielddef *f = upb_oneof_iter_field(&it);
1584     if (!check_field_add(m, f, s)) {
1585       return false;
1586     }
1587   }
1588 
1589   /* Everything checks out -- commit now. */
1590 
1591   /* Add oneof itself first. */
1592   o->parent = m;
1593   upb_strtable_insert(&m->ntof, upb_oneofdef_name(o), upb_value_ptr(o));
1594   upb_ref2(o, m);
1595   upb_ref2(m, o);
1596 
1597   /* Add each field of the oneof directly to the msgdef. */
1598   for (upb_oneof_begin(&it, o); !upb_oneof_done(&it); upb_oneof_next(&it)) {
1599     upb_fielddef *f = upb_oneof_iter_field(&it);
1600     add_field(m, f, NULL);
1601   }
1602 
1603   if (ref_donor) upb_oneofdef_unref(o, ref_donor);
1604 
1605   return true;
1606 }
1607 
upb_msgdef_itof(const upb_msgdef * m,uint32_t i)1608 const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) {
1609   upb_value val;
1610   return upb_inttable_lookup32(&m->itof, i, &val) ?
1611       upb_value_getptr(val) : NULL;
1612 }
1613 
upb_msgdef_ntof(const upb_msgdef * m,const char * name,size_t len)1614 const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name,
1615                                     size_t len) {
1616   upb_value val;
1617 
1618   if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
1619     return NULL;
1620   }
1621 
1622   return upb_trygetfield(upb_value_getptr(val));
1623 }
1624 
upb_msgdef_ntoo(const upb_msgdef * m,const char * name,size_t len)1625 const upb_oneofdef *upb_msgdef_ntoo(const upb_msgdef *m, const char *name,
1626                                     size_t len) {
1627   upb_value val;
1628 
1629   if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
1630     return NULL;
1631   }
1632 
1633   return upb_trygetoneof(upb_value_getptr(val));
1634 }
1635 
upb_msgdef_lookupname(const upb_msgdef * m,const char * name,size_t len,const upb_fielddef ** f,const upb_oneofdef ** o)1636 bool upb_msgdef_lookupname(const upb_msgdef *m, const char *name, size_t len,
1637                            const upb_fielddef **f, const upb_oneofdef **o) {
1638   upb_value val;
1639 
1640   if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
1641     return false;
1642   }
1643 
1644   *o = upb_trygetoneof(upb_value_getptr(val));
1645   *f = upb_trygetfield(upb_value_getptr(val));
1646   assert((*o != NULL) ^ (*f != NULL));  /* Exactly one of the two should be set. */
1647   return true;
1648 }
1649 
upb_msgdef_numfields(const upb_msgdef * m)1650 int upb_msgdef_numfields(const upb_msgdef *m) {
1651   /* The number table contains only fields. */
1652   return upb_inttable_count(&m->itof);
1653 }
1654 
upb_msgdef_numoneofs(const upb_msgdef * m)1655 int upb_msgdef_numoneofs(const upb_msgdef *m) {
1656   /* The name table includes oneofs, and the number table does not. */
1657   return upb_strtable_count(&m->ntof) - upb_inttable_count(&m->itof);
1658 }
1659 
upb_msgdef_setmapentry(upb_msgdef * m,bool map_entry)1660 void upb_msgdef_setmapentry(upb_msgdef *m, bool map_entry) {
1661   assert(!upb_msgdef_isfrozen(m));
1662   m->map_entry = map_entry;
1663 }
1664 
upb_msgdef_mapentry(const upb_msgdef * m)1665 bool upb_msgdef_mapentry(const upb_msgdef *m) {
1666   return m->map_entry;
1667 }
1668 
upb_msg_field_begin(upb_msg_field_iter * iter,const upb_msgdef * m)1669 void upb_msg_field_begin(upb_msg_field_iter *iter, const upb_msgdef *m) {
1670   upb_inttable_begin(iter, &m->itof);
1671 }
1672 
upb_msg_field_next(upb_msg_field_iter * iter)1673 void upb_msg_field_next(upb_msg_field_iter *iter) { upb_inttable_next(iter); }
1674 
upb_msg_field_done(const upb_msg_field_iter * iter)1675 bool upb_msg_field_done(const upb_msg_field_iter *iter) {
1676   return upb_inttable_done(iter);
1677 }
1678 
upb_msg_iter_field(const upb_msg_field_iter * iter)1679 upb_fielddef *upb_msg_iter_field(const upb_msg_field_iter *iter) {
1680   return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter));
1681 }
1682 
upb_msg_field_iter_setdone(upb_msg_field_iter * iter)1683 void upb_msg_field_iter_setdone(upb_msg_field_iter *iter) {
1684   upb_inttable_iter_setdone(iter);
1685 }
1686 
upb_msg_oneof_begin(upb_msg_oneof_iter * iter,const upb_msgdef * m)1687 void upb_msg_oneof_begin(upb_msg_oneof_iter *iter, const upb_msgdef *m) {
1688   upb_strtable_begin(iter, &m->ntof);
1689   /* We need to skip past any initial fields. */
1690   while (!upb_strtable_done(iter) &&
1691          !upb_isoneof(upb_value_getptr(upb_strtable_iter_value(iter)))) {
1692     upb_strtable_next(iter);
1693   }
1694 }
1695 
upb_msg_oneof_next(upb_msg_oneof_iter * iter)1696 void upb_msg_oneof_next(upb_msg_oneof_iter *iter) {
1697   /* We need to skip past fields to return only oneofs. */
1698   do {
1699     upb_strtable_next(iter);
1700   } while (!upb_strtable_done(iter) &&
1701            !upb_isoneof(upb_value_getptr(upb_strtable_iter_value(iter))));
1702 }
1703 
upb_msg_oneof_done(const upb_msg_oneof_iter * iter)1704 bool upb_msg_oneof_done(const upb_msg_oneof_iter *iter) {
1705   return upb_strtable_done(iter);
1706 }
1707 
upb_msg_iter_oneof(const upb_msg_oneof_iter * iter)1708 upb_oneofdef *upb_msg_iter_oneof(const upb_msg_oneof_iter *iter) {
1709   return (upb_oneofdef*)upb_value_getptr(upb_strtable_iter_value(iter));
1710 }
1711 
upb_msg_oneof_iter_setdone(upb_msg_oneof_iter * iter)1712 void upb_msg_oneof_iter_setdone(upb_msg_oneof_iter *iter) {
1713   upb_strtable_iter_setdone(iter);
1714 }
1715 
1716 /* upb_oneofdef ***************************************************************/
1717 
visitoneof(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)1718 static void visitoneof(const upb_refcounted *r, upb_refcounted_visit *visit,
1719                        void *closure) {
1720   const upb_oneofdef *o = (const upb_oneofdef*)r;
1721   upb_oneof_iter i;
1722   for (upb_oneof_begin(&i, o); !upb_oneof_done(&i); upb_oneof_next(&i)) {
1723     const upb_fielddef *f = upb_oneof_iter_field(&i);
1724     visit(r, upb_fielddef_upcast2(f), closure);
1725   }
1726   if (o->parent) {
1727     visit(r, upb_msgdef_upcast2(o->parent), closure);
1728   }
1729 }
1730 
freeoneof(upb_refcounted * r)1731 static void freeoneof(upb_refcounted *r) {
1732   upb_oneofdef *o = (upb_oneofdef*)r;
1733   upb_strtable_uninit(&o->ntof);
1734   upb_inttable_uninit(&o->itof);
1735   upb_gfree((void*)o->name);
1736   upb_gfree(o);
1737 }
1738 
1739 const struct upb_refcounted_vtbl upb_oneofdef_vtbl = {visitoneof, freeoneof};
1740 
upb_oneofdef_new(const void * owner)1741 upb_oneofdef *upb_oneofdef_new(const void *owner) {
1742   upb_oneofdef *o = upb_gmalloc(sizeof(*o));
1743 
1744   if (!o) {
1745     return NULL;
1746   }
1747 
1748   o->parent = NULL;
1749   o->name = NULL;
1750 
1751   if (!upb_refcounted_init(upb_oneofdef_upcast_mutable(o), &upb_oneofdef_vtbl,
1752                            owner)) {
1753     goto err2;
1754   }
1755 
1756   if (!upb_inttable_init(&o->itof, UPB_CTYPE_PTR)) goto err2;
1757   if (!upb_strtable_init(&o->ntof, UPB_CTYPE_PTR)) goto err1;
1758 
1759   return o;
1760 
1761 err1:
1762   upb_inttable_uninit(&o->itof);
1763 err2:
1764   upb_gfree(o);
1765   return NULL;
1766 }
1767 
upb_oneofdef_dup(const upb_oneofdef * o,const void * owner)1768 upb_oneofdef *upb_oneofdef_dup(const upb_oneofdef *o, const void *owner) {
1769   bool ok;
1770   upb_oneof_iter i;
1771   upb_oneofdef *newo = upb_oneofdef_new(owner);
1772   if (!newo) return NULL;
1773   ok = upb_oneofdef_setname(newo, upb_oneofdef_name(o), NULL);
1774   UPB_ASSERT_VAR(ok, ok);
1775   for (upb_oneof_begin(&i, o); !upb_oneof_done(&i); upb_oneof_next(&i)) {
1776     upb_fielddef *f = upb_fielddef_dup(upb_oneof_iter_field(&i), &f);
1777     if (!f || !upb_oneofdef_addfield(newo, f, &f, NULL)) {
1778       upb_oneofdef_unref(newo, owner);
1779       return NULL;
1780     }
1781   }
1782   return newo;
1783 }
1784 
upb_oneofdef_name(const upb_oneofdef * o)1785 const char *upb_oneofdef_name(const upb_oneofdef *o) { return o->name; }
1786 
upb_oneofdef_setname(upb_oneofdef * o,const char * name,upb_status * s)1787 bool upb_oneofdef_setname(upb_oneofdef *o, const char *name, upb_status *s) {
1788   assert(!upb_oneofdef_isfrozen(o));
1789   if (upb_oneofdef_containingtype(o)) {
1790     upb_status_seterrmsg(s, "oneof already added to a message");
1791     return false;
1792   }
1793 
1794   if (!upb_isident(name, strlen(name), true, s)) {
1795     return false;
1796   }
1797 
1798   name = upb_gstrdup(name);
1799   if (!name) {
1800     upb_status_seterrmsg(s, "One of memory");
1801     return false;
1802   }
1803 
1804   upb_gfree((void*)o->name);
1805   o->name = name;
1806   return true;
1807 }
1808 
upb_oneofdef_containingtype(const upb_oneofdef * o)1809 const upb_msgdef *upb_oneofdef_containingtype(const upb_oneofdef *o) {
1810   return o->parent;
1811 }
1812 
upb_oneofdef_numfields(const upb_oneofdef * o)1813 int upb_oneofdef_numfields(const upb_oneofdef *o) {
1814   return upb_strtable_count(&o->ntof);
1815 }
1816 
upb_oneofdef_addfield(upb_oneofdef * o,upb_fielddef * f,const void * ref_donor,upb_status * s)1817 bool upb_oneofdef_addfield(upb_oneofdef *o, upb_fielddef *f,
1818                            const void *ref_donor,
1819                            upb_status *s) {
1820   assert(!upb_oneofdef_isfrozen(o));
1821   assert(!o->parent || !upb_msgdef_isfrozen(o->parent));
1822 
1823   /* This method is idempotent. Check if |f| is already part of this oneofdef
1824    * and return immediately if so. */
1825   if (upb_fielddef_containingoneof(f) == o) {
1826     return true;
1827   }
1828 
1829   /* The field must have an OPTIONAL label. */
1830   if (upb_fielddef_label(f) != UPB_LABEL_OPTIONAL) {
1831     upb_status_seterrmsg(s, "fields in oneof must have OPTIONAL label");
1832     return false;
1833   }
1834 
1835   /* Check that no field with this name or number exists already in the oneof.
1836    * Also check that the field is not already part of a oneof. */
1837   if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) {
1838     upb_status_seterrmsg(s, "field name or number were not set");
1839     return false;
1840   } else if (upb_oneofdef_itof(o, upb_fielddef_number(f)) ||
1841              upb_oneofdef_ntofz(o, upb_fielddef_name(f))) {
1842     upb_status_seterrmsg(s, "duplicate field name or number");
1843     return false;
1844   } else if (upb_fielddef_containingoneof(f) != NULL) {
1845     upb_status_seterrmsg(s, "fielddef already belongs to a oneof");
1846     return false;
1847   }
1848 
1849   /* We allow adding a field to the oneof either if the field is not part of a
1850    * msgdef, or if it is and we are also part of the same msgdef. */
1851   if (o->parent == NULL) {
1852     /* If we're not in a msgdef, the field cannot be either. Otherwise we would
1853      * need to magically add this oneof to a msgdef to remain consistent, which
1854      * is surprising behavior. */
1855     if (upb_fielddef_containingtype(f) != NULL) {
1856       upb_status_seterrmsg(s, "fielddef already belongs to a message, but "
1857                               "oneof does not");
1858       return false;
1859     }
1860   } else {
1861     /* If we're in a msgdef, the user can add fields that either aren't in any
1862      * msgdef (in which case they're added to our msgdef) or already a part of
1863      * our msgdef. */
1864     if (upb_fielddef_containingtype(f) != NULL &&
1865         upb_fielddef_containingtype(f) != o->parent) {
1866       upb_status_seterrmsg(s, "fielddef belongs to a different message "
1867                               "than oneof");
1868       return false;
1869     }
1870   }
1871 
1872   /* Commit phase. First add the field to our parent msgdef, if any, because
1873    * that may fail; then add the field to our own tables. */
1874 
1875   if (o->parent != NULL && upb_fielddef_containingtype(f) == NULL) {
1876     if (!upb_msgdef_addfield((upb_msgdef*)o->parent, f, NULL, s)) {
1877       return false;
1878     }
1879   }
1880 
1881   release_containingtype(f);
1882   f->oneof = o;
1883   upb_inttable_insert(&o->itof, upb_fielddef_number(f), upb_value_ptr(f));
1884   upb_strtable_insert(&o->ntof, upb_fielddef_name(f), upb_value_ptr(f));
1885   upb_ref2(f, o);
1886   upb_ref2(o, f);
1887   if (ref_donor) upb_fielddef_unref(f, ref_donor);
1888 
1889   return true;
1890 }
1891 
upb_oneofdef_ntof(const upb_oneofdef * o,const char * name,size_t length)1892 const upb_fielddef *upb_oneofdef_ntof(const upb_oneofdef *o,
1893                                       const char *name, size_t length) {
1894   upb_value val;
1895   return upb_strtable_lookup2(&o->ntof, name, length, &val) ?
1896       upb_value_getptr(val) : NULL;
1897 }
1898 
upb_oneofdef_itof(const upb_oneofdef * o,uint32_t num)1899 const upb_fielddef *upb_oneofdef_itof(const upb_oneofdef *o, uint32_t num) {
1900   upb_value val;
1901   return upb_inttable_lookup32(&o->itof, num, &val) ?
1902       upb_value_getptr(val) : NULL;
1903 }
1904 
upb_oneof_begin(upb_oneof_iter * iter,const upb_oneofdef * o)1905 void upb_oneof_begin(upb_oneof_iter *iter, const upb_oneofdef *o) {
1906   upb_inttable_begin(iter, &o->itof);
1907 }
1908 
upb_oneof_next(upb_oneof_iter * iter)1909 void upb_oneof_next(upb_oneof_iter *iter) {
1910   upb_inttable_next(iter);
1911 }
1912 
upb_oneof_done(upb_oneof_iter * iter)1913 bool upb_oneof_done(upb_oneof_iter *iter) {
1914   return upb_inttable_done(iter);
1915 }
1916 
upb_oneof_iter_field(const upb_oneof_iter * iter)1917 upb_fielddef *upb_oneof_iter_field(const upb_oneof_iter *iter) {
1918   return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter));
1919 }
1920 
upb_oneof_iter_setdone(upb_oneof_iter * iter)1921 void upb_oneof_iter_setdone(upb_oneof_iter *iter) {
1922   upb_inttable_iter_setdone(iter);
1923 }
1924 
1925 /* upb_filedef ****************************************************************/
1926 
visitfiledef(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)1927 static void visitfiledef(const upb_refcounted *r, upb_refcounted_visit *visit,
1928                          void *closure) {
1929   const upb_filedef *f = (const upb_filedef*)r;
1930   size_t i;
1931 
1932   for(i = 0; i < upb_filedef_defcount(f); i++) {
1933     visit(r, upb_def_upcast(upb_filedef_def(f, i)), closure);
1934   }
1935 }
1936 
freefiledef(upb_refcounted * r)1937 static void freefiledef(upb_refcounted *r) {
1938   upb_filedef *f = (upb_filedef*)r;
1939   size_t i;
1940 
1941   for(i = 0; i < upb_filedef_depcount(f); i++) {
1942     upb_filedef_unref(upb_filedef_dep(f, i), f);
1943   }
1944 
1945   upb_inttable_uninit(&f->defs);
1946   upb_inttable_uninit(&f->deps);
1947   upb_gfree((void*)f->name);
1948   upb_gfree((void*)f->package);
1949   upb_gfree(f);
1950 }
1951 
1952 const struct upb_refcounted_vtbl upb_filedef_vtbl = {visitfiledef, freefiledef};
1953 
upb_filedef_new(const void * owner)1954 upb_filedef *upb_filedef_new(const void *owner) {
1955   upb_filedef *f = upb_gmalloc(sizeof(*f));
1956 
1957   if (!f) {
1958     return NULL;
1959   }
1960 
1961   f->package = NULL;
1962   f->name = NULL;
1963   f->syntax = UPB_SYNTAX_PROTO2;
1964 
1965   if (!upb_refcounted_init(upb_filedef_upcast_mutable(f), &upb_filedef_vtbl,
1966                            owner)) {
1967     goto err;
1968   }
1969 
1970   if (!upb_inttable_init(&f->defs, UPB_CTYPE_CONSTPTR)) {
1971     goto err;
1972   }
1973 
1974   if (!upb_inttable_init(&f->deps, UPB_CTYPE_CONSTPTR)) {
1975     goto err2;
1976   }
1977 
1978   return f;
1979 
1980 
1981 err2:
1982   upb_inttable_uninit(&f->defs);
1983 
1984 err:
1985   upb_gfree(f);
1986   return NULL;
1987 }
1988 
upb_filedef_name(const upb_filedef * f)1989 const char *upb_filedef_name(const upb_filedef *f) {
1990   return f->name;
1991 }
1992 
upb_filedef_package(const upb_filedef * f)1993 const char *upb_filedef_package(const upb_filedef *f) {
1994   return f->package;
1995 }
1996 
upb_filedef_syntax(const upb_filedef * f)1997 upb_syntax_t upb_filedef_syntax(const upb_filedef *f) {
1998   return f->syntax;
1999 }
2000 
upb_filedef_defcount(const upb_filedef * f)2001 size_t upb_filedef_defcount(const upb_filedef *f) {
2002   return upb_inttable_count(&f->defs);
2003 }
2004 
upb_filedef_depcount(const upb_filedef * f)2005 size_t upb_filedef_depcount(const upb_filedef *f) {
2006   return upb_inttable_count(&f->deps);
2007 }
2008 
upb_filedef_def(const upb_filedef * f,size_t i)2009 const upb_def *upb_filedef_def(const upb_filedef *f, size_t i) {
2010   upb_value v;
2011 
2012   if (upb_inttable_lookup32(&f->defs, i, &v)) {
2013     return upb_value_getconstptr(v);
2014   } else {
2015     return NULL;
2016   }
2017 }
2018 
upb_filedef_dep(const upb_filedef * f,size_t i)2019 const upb_filedef *upb_filedef_dep(const upb_filedef *f, size_t i) {
2020   upb_value v;
2021 
2022   if (upb_inttable_lookup32(&f->deps, i, &v)) {
2023     return upb_value_getconstptr(v);
2024   } else {
2025     return NULL;
2026   }
2027 }
2028 
upb_filedef_setname(upb_filedef * f,const char * name,upb_status * s)2029 bool upb_filedef_setname(upb_filedef *f, const char *name, upb_status *s) {
2030   name = upb_gstrdup(name);
2031   if (!name) {
2032     upb_upberr_setoom(s);
2033     return false;
2034   }
2035   upb_gfree((void*)f->name);
2036   f->name = name;
2037   return true;
2038 }
2039 
upb_filedef_setpackage(upb_filedef * f,const char * package,upb_status * s)2040 bool upb_filedef_setpackage(upb_filedef *f, const char *package,
2041                             upb_status *s) {
2042   if (!upb_isident(package, strlen(package), true, s)) return false;
2043   package = upb_gstrdup(package);
2044   if (!package) {
2045     upb_upberr_setoom(s);
2046     return false;
2047   }
2048   upb_gfree((void*)f->package);
2049   f->package = package;
2050   return true;
2051 }
2052 
upb_filedef_setsyntax(upb_filedef * f,upb_syntax_t syntax,upb_status * s)2053 bool upb_filedef_setsyntax(upb_filedef *f, upb_syntax_t syntax,
2054                            upb_status *s) {
2055   UPB_UNUSED(s);
2056   if (syntax != UPB_SYNTAX_PROTO2 &&
2057       syntax != UPB_SYNTAX_PROTO3) {
2058     upb_status_seterrmsg(s, "Unknown syntax value.");
2059     return false;
2060   }
2061   f->syntax = syntax;
2062 
2063   {
2064     /* Set all messages in this file to match. */
2065     size_t i;
2066     for (i = 0; i < upb_filedef_defcount(f); i++) {
2067       /* Casting const away is safe since all defs in mutable filedef must
2068        * also be mutable. */
2069       upb_def *def = (upb_def*)upb_filedef_def(f, i);
2070 
2071       upb_msgdef *m = upb_dyncast_msgdef_mutable(def);
2072       if (m) {
2073         m->syntax = syntax;
2074       }
2075     }
2076   }
2077 
2078   return true;
2079 }
2080 
upb_filedef_adddef(upb_filedef * f,upb_def * def,const void * ref_donor,upb_status * s)2081 bool upb_filedef_adddef(upb_filedef *f, upb_def *def, const void *ref_donor,
2082                         upb_status *s) {
2083   if (def->file) {
2084     upb_status_seterrmsg(s, "Def is already part of another filedef.");
2085     return false;
2086   }
2087 
2088   if (upb_inttable_push(&f->defs, upb_value_constptr(def))) {
2089     def->file = f;
2090     upb_ref2(def, f);
2091     if (ref_donor) upb_def_unref(def, ref_donor);
2092     if (def->type == UPB_DEF_MSG) {
2093       upb_downcast_msgdef_mutable(def)->syntax = f->syntax;
2094     }
2095     return true;
2096   } else {
2097     upb_upberr_setoom(s);
2098     return false;
2099   }
2100 }
2101 
upb_filedef_adddep(upb_filedef * f,const upb_filedef * dep)2102 bool upb_filedef_adddep(upb_filedef *f, const upb_filedef *dep) {
2103   if (upb_inttable_push(&f->deps, upb_value_constptr(dep))) {
2104     /* Regular ref instead of ref2 because files can't form cycles. */
2105     upb_filedef_ref(dep, f);
2106     return true;
2107   } else {
2108     return false;
2109   }
2110 }
2111 /*
2112 ** TODO(haberman): it's unclear whether a lot of the consistency checks should
2113 ** assert() or return false.
2114 */
2115 
2116 
2117 #include <string.h>
2118 
2119 
upb_calloc(size_t size)2120 static void *upb_calloc(size_t size) {
2121   void *mem = upb_gmalloc(size);
2122   if (mem) {
2123     memset(mem, 0, size);
2124   }
2125   return mem;
2126 }
2127 
2128 /* Defined for the sole purpose of having a unique pointer value for
2129  * UPB_NO_CLOSURE. */
2130 char _upb_noclosure;
2131 
freehandlers(upb_refcounted * r)2132 static void freehandlers(upb_refcounted *r) {
2133   upb_handlers *h = (upb_handlers*)r;
2134 
2135   upb_inttable_iter i;
2136   upb_inttable_begin(&i, &h->cleanup_);
2137   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
2138     void *val = (void*)upb_inttable_iter_key(&i);
2139     upb_value func_val = upb_inttable_iter_value(&i);
2140     upb_handlerfree *func = upb_value_getfptr(func_val);
2141     func(val);
2142   }
2143 
2144   upb_inttable_uninit(&h->cleanup_);
2145   upb_msgdef_unref(h->msg, h);
2146   upb_gfree(h->sub);
2147   upb_gfree(h);
2148 }
2149 
visithandlers(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)2150 static void visithandlers(const upb_refcounted *r, upb_refcounted_visit *visit,
2151                           void *closure) {
2152   const upb_handlers *h = (const upb_handlers*)r;
2153   upb_msg_field_iter i;
2154   for(upb_msg_field_begin(&i, h->msg);
2155       !upb_msg_field_done(&i);
2156       upb_msg_field_next(&i)) {
2157     upb_fielddef *f = upb_msg_iter_field(&i);
2158     const upb_handlers *sub;
2159     if (!upb_fielddef_issubmsg(f)) continue;
2160     sub = upb_handlers_getsubhandlers(h, f);
2161     if (sub) visit(r, upb_handlers_upcast(sub), closure);
2162   }
2163 }
2164 
2165 static const struct upb_refcounted_vtbl vtbl = {visithandlers, freehandlers};
2166 
2167 typedef struct {
2168   upb_inttable tab;  /* maps upb_msgdef* -> upb_handlers*. */
2169   upb_handlers_callback *callback;
2170   const void *closure;
2171 } dfs_state;
2172 
2173 /* TODO(haberman): discard upb_handlers* objects that do not actually have any
2174  * handlers set and cannot reach any upb_handlers* object that does.  This is
2175  * slightly tricky to do correctly. */
newformsg(const upb_msgdef * m,const void * owner,dfs_state * s)2176 static upb_handlers *newformsg(const upb_msgdef *m, const void *owner,
2177                                dfs_state *s) {
2178   upb_msg_field_iter i;
2179   upb_handlers *h = upb_handlers_new(m, owner);
2180   if (!h) return NULL;
2181   if (!upb_inttable_insertptr(&s->tab, m, upb_value_ptr(h))) goto oom;
2182 
2183   s->callback(s->closure, h);
2184 
2185   /* For each submessage field, get or create a handlers object and set it as
2186    * the subhandlers. */
2187   for(upb_msg_field_begin(&i, m);
2188       !upb_msg_field_done(&i);
2189       upb_msg_field_next(&i)) {
2190     upb_fielddef *f = upb_msg_iter_field(&i);
2191     const upb_msgdef *subdef;
2192     upb_value subm_ent;
2193 
2194     if (!upb_fielddef_issubmsg(f)) continue;
2195 
2196     subdef = upb_downcast_msgdef(upb_fielddef_subdef(f));
2197     if (upb_inttable_lookupptr(&s->tab, subdef, &subm_ent)) {
2198       upb_handlers_setsubhandlers(h, f, upb_value_getptr(subm_ent));
2199     } else {
2200       upb_handlers *sub_mh = newformsg(subdef, &sub_mh, s);
2201       if (!sub_mh) goto oom;
2202       upb_handlers_setsubhandlers(h, f, sub_mh);
2203       upb_handlers_unref(sub_mh, &sub_mh);
2204     }
2205   }
2206   return h;
2207 
2208 oom:
2209   upb_handlers_unref(h, owner);
2210   return NULL;
2211 }
2212 
2213 /* Given a selector for a STARTSUBMSG handler, resolves to a pointer to the
2214  * subhandlers for this submessage field. */
2215 #define SUBH(h, selector) (h->sub[selector])
2216 
2217 /* The selector for a submessage field is the field index. */
2218 #define SUBH_F(h, f) SUBH(h, f->index_)
2219 
trygetsel(upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)2220 static int32_t trygetsel(upb_handlers *h, const upb_fielddef *f,
2221                          upb_handlertype_t type) {
2222   upb_selector_t sel;
2223   assert(!upb_handlers_isfrozen(h));
2224   if (upb_handlers_msgdef(h) != upb_fielddef_containingtype(f)) {
2225     upb_status_seterrf(
2226         &h->status_, "type mismatch: field %s does not belong to message %s",
2227         upb_fielddef_name(f), upb_msgdef_fullname(upb_handlers_msgdef(h)));
2228     return -1;
2229   }
2230   if (!upb_handlers_getselector(f, type, &sel)) {
2231     upb_status_seterrf(
2232         &h->status_,
2233         "type mismatch: cannot register handler type %d for field %s",
2234         type, upb_fielddef_name(f));
2235     return -1;
2236   }
2237   return sel;
2238 }
2239 
handlers_getsel(upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)2240 static upb_selector_t handlers_getsel(upb_handlers *h, const upb_fielddef *f,
2241                              upb_handlertype_t type) {
2242   int32_t sel = trygetsel(h, f, type);
2243   assert(sel >= 0);
2244   return sel;
2245 }
2246 
returntype(upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)2247 static const void **returntype(upb_handlers *h, const upb_fielddef *f,
2248                                upb_handlertype_t type) {
2249   return &h->table[handlers_getsel(h, f, type)].attr.return_closure_type_;
2250 }
2251 
doset(upb_handlers * h,int32_t sel,const upb_fielddef * f,upb_handlertype_t type,upb_func * func,upb_handlerattr * attr)2252 static bool doset(upb_handlers *h, int32_t sel, const upb_fielddef *f,
2253                   upb_handlertype_t type, upb_func *func,
2254                   upb_handlerattr *attr) {
2255   upb_handlerattr set_attr = UPB_HANDLERATTR_INITIALIZER;
2256   const void *closure_type;
2257   const void **context_closure_type;
2258 
2259   assert(!upb_handlers_isfrozen(h));
2260 
2261   if (sel < 0) {
2262     upb_status_seterrmsg(&h->status_,
2263                          "incorrect handler type for this field.");
2264     return false;
2265   }
2266 
2267   if (h->table[sel].func) {
2268     upb_status_seterrmsg(&h->status_,
2269                          "cannot change handler once it has been set.");
2270     return false;
2271   }
2272 
2273   if (attr) {
2274     set_attr = *attr;
2275   }
2276 
2277   /* Check that the given closure type matches the closure type that has been
2278    * established for this context (if any). */
2279   closure_type = upb_handlerattr_closuretype(&set_attr);
2280 
2281   if (type == UPB_HANDLER_STRING) {
2282     context_closure_type = returntype(h, f, UPB_HANDLER_STARTSTR);
2283   } else if (f && upb_fielddef_isseq(f) &&
2284              type != UPB_HANDLER_STARTSEQ &&
2285              type != UPB_HANDLER_ENDSEQ) {
2286     context_closure_type = returntype(h, f, UPB_HANDLER_STARTSEQ);
2287   } else {
2288     context_closure_type = &h->top_closure_type;
2289   }
2290 
2291   if (closure_type && *context_closure_type &&
2292       closure_type != *context_closure_type) {
2293     /* TODO(haberman): better message for debugging. */
2294     if (f) {
2295       upb_status_seterrf(&h->status_,
2296                          "closure type does not match for field %s",
2297                          upb_fielddef_name(f));
2298     } else {
2299       upb_status_seterrmsg(
2300           &h->status_, "closure type does not match for message-level handler");
2301     }
2302     return false;
2303   }
2304 
2305   if (closure_type)
2306     *context_closure_type = closure_type;
2307 
2308   /* If this is a STARTSEQ or STARTSTR handler, check that the returned pointer
2309    * matches any pre-existing expectations about what type is expected. */
2310   if (type == UPB_HANDLER_STARTSEQ || type == UPB_HANDLER_STARTSTR) {
2311     const void *return_type = upb_handlerattr_returnclosuretype(&set_attr);
2312     const void *table_return_type =
2313         upb_handlerattr_returnclosuretype(&h->table[sel].attr);
2314     if (return_type && table_return_type && return_type != table_return_type) {
2315       upb_status_seterrmsg(&h->status_, "closure return type does not match");
2316       return false;
2317     }
2318 
2319     if (table_return_type && !return_type)
2320       upb_handlerattr_setreturnclosuretype(&set_attr, table_return_type);
2321   }
2322 
2323   h->table[sel].func = (upb_func*)func;
2324   h->table[sel].attr = set_attr;
2325   return true;
2326 }
2327 
2328 /* Returns the effective closure type for this handler (which will propagate
2329  * from outer frames if this frame has no START* handler).  Not implemented for
2330  * UPB_HANDLER_STRING at the moment since this is not needed.  Returns NULL is
2331  * the effective closure type is unspecified (either no handler was registered
2332  * to specify it or the handler that was registered did not specify the closure
2333  * type). */
effective_closure_type(upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)2334 const void *effective_closure_type(upb_handlers *h, const upb_fielddef *f,
2335                                    upb_handlertype_t type) {
2336   const void *ret;
2337   upb_selector_t sel;
2338 
2339   assert(type != UPB_HANDLER_STRING);
2340   ret = h->top_closure_type;
2341 
2342   if (upb_fielddef_isseq(f) &&
2343       type != UPB_HANDLER_STARTSEQ &&
2344       type != UPB_HANDLER_ENDSEQ &&
2345       h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSEQ)].func) {
2346     ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr);
2347   }
2348 
2349   if (type == UPB_HANDLER_STRING &&
2350       h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSTR)].func) {
2351     ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr);
2352   }
2353 
2354   /* The effective type of the submessage; not used yet.
2355    * if (type == SUBMESSAGE &&
2356    *     h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSUBMSG)].func) {
2357    *   ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr);
2358    * } */
2359 
2360   return ret;
2361 }
2362 
2363 /* Checks whether the START* handler specified by f & type is missing even
2364  * though it is required to convert the established type of an outer frame
2365  * ("closure_type") into the established type of an inner frame (represented in
2366  * the return closure type of this handler's attr. */
checkstart(upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type,upb_status * status)2367 bool checkstart(upb_handlers *h, const upb_fielddef *f, upb_handlertype_t type,
2368                 upb_status *status) {
2369   const void *closure_type;
2370   const upb_handlerattr *attr;
2371   const void *return_closure_type;
2372 
2373   upb_selector_t sel = handlers_getsel(h, f, type);
2374   if (h->table[sel].func) return true;
2375   closure_type = effective_closure_type(h, f, type);
2376   attr = &h->table[sel].attr;
2377   return_closure_type = upb_handlerattr_returnclosuretype(attr);
2378   if (closure_type && return_closure_type &&
2379       closure_type != return_closure_type) {
2380     upb_status_seterrf(status,
2381                        "expected start handler to return sub type for field %f",
2382                        upb_fielddef_name(f));
2383     return false;
2384   }
2385   return true;
2386 }
2387 
2388 /* Public interface ***********************************************************/
2389 
upb_handlers_new(const upb_msgdef * md,const void * owner)2390 upb_handlers *upb_handlers_new(const upb_msgdef *md, const void *owner) {
2391   int extra;
2392   upb_handlers *h;
2393 
2394   assert(upb_msgdef_isfrozen(md));
2395 
2396   extra = sizeof(upb_handlers_tabent) * (md->selector_count - 1);
2397   h = upb_calloc(sizeof(*h) + extra);
2398   if (!h) return NULL;
2399 
2400   h->msg = md;
2401   upb_msgdef_ref(h->msg, h);
2402   upb_status_clear(&h->status_);
2403 
2404   if (md->submsg_field_count > 0) {
2405     h->sub = upb_calloc(md->submsg_field_count * sizeof(*h->sub));
2406     if (!h->sub) goto oom;
2407   } else {
2408     h->sub = 0;
2409   }
2410 
2411   if (!upb_refcounted_init(upb_handlers_upcast_mutable(h), &vtbl, owner))
2412     goto oom;
2413   if (!upb_inttable_init(&h->cleanup_, UPB_CTYPE_FPTR)) goto oom;
2414 
2415   /* calloc() above initialized all handlers to NULL. */
2416   return h;
2417 
2418 oom:
2419   freehandlers(upb_handlers_upcast_mutable(h));
2420   return NULL;
2421 }
2422 
upb_handlers_newfrozen(const upb_msgdef * m,const void * owner,upb_handlers_callback * callback,const void * closure)2423 const upb_handlers *upb_handlers_newfrozen(const upb_msgdef *m,
2424                                            const void *owner,
2425                                            upb_handlers_callback *callback,
2426                                            const void *closure) {
2427   dfs_state state;
2428   upb_handlers *ret;
2429   bool ok;
2430   upb_refcounted *r;
2431 
2432   state.callback = callback;
2433   state.closure = closure;
2434   if (!upb_inttable_init(&state.tab, UPB_CTYPE_PTR)) return NULL;
2435 
2436   ret = newformsg(m, owner, &state);
2437 
2438   upb_inttable_uninit(&state.tab);
2439   if (!ret) return NULL;
2440 
2441   r = upb_handlers_upcast_mutable(ret);
2442   ok = upb_refcounted_freeze(&r, 1, NULL, UPB_MAX_HANDLER_DEPTH);
2443   UPB_ASSERT_VAR(ok, ok);
2444 
2445   return ret;
2446 }
2447 
upb_handlers_status(upb_handlers * h)2448 const upb_status *upb_handlers_status(upb_handlers *h) {
2449   assert(!upb_handlers_isfrozen(h));
2450   return &h->status_;
2451 }
2452 
upb_handlers_clearerr(upb_handlers * h)2453 void upb_handlers_clearerr(upb_handlers *h) {
2454   assert(!upb_handlers_isfrozen(h));
2455   upb_status_clear(&h->status_);
2456 }
2457 
2458 #define SETTER(name, handlerctype, handlertype) \
2459   bool upb_handlers_set ## name(upb_handlers *h, const upb_fielddef *f, \
2460                                 handlerctype func, upb_handlerattr *attr) { \
2461     int32_t sel = trygetsel(h, f, handlertype); \
2462     return doset(h, sel, f, handlertype, (upb_func*)func, attr); \
2463   }
2464 
SETTER(int32,upb_int32_handlerfunc *,UPB_HANDLER_INT32)2465 SETTER(int32,       upb_int32_handlerfunc*,       UPB_HANDLER_INT32)
2466 SETTER(int64,       upb_int64_handlerfunc*,       UPB_HANDLER_INT64)
2467 SETTER(uint32,      upb_uint32_handlerfunc*,      UPB_HANDLER_UINT32)
2468 SETTER(uint64,      upb_uint64_handlerfunc*,      UPB_HANDLER_UINT64)
2469 SETTER(float,       upb_float_handlerfunc*,       UPB_HANDLER_FLOAT)
2470 SETTER(double,      upb_double_handlerfunc*,      UPB_HANDLER_DOUBLE)
2471 SETTER(bool,        upb_bool_handlerfunc*,        UPB_HANDLER_BOOL)
2472 SETTER(startstr,    upb_startstr_handlerfunc*,    UPB_HANDLER_STARTSTR)
2473 SETTER(string,      upb_string_handlerfunc*,      UPB_HANDLER_STRING)
2474 SETTER(endstr,      upb_endfield_handlerfunc*,    UPB_HANDLER_ENDSTR)
2475 SETTER(startseq,    upb_startfield_handlerfunc*,  UPB_HANDLER_STARTSEQ)
2476 SETTER(startsubmsg, upb_startfield_handlerfunc*,  UPB_HANDLER_STARTSUBMSG)
2477 SETTER(endsubmsg,   upb_endfield_handlerfunc*,    UPB_HANDLER_ENDSUBMSG)
2478 SETTER(endseq,      upb_endfield_handlerfunc*,    UPB_HANDLER_ENDSEQ)
2479 
2480 #undef SETTER
2481 
2482 bool upb_handlers_setstartmsg(upb_handlers *h, upb_startmsg_handlerfunc *func,
2483                               upb_handlerattr *attr) {
2484   return doset(h, UPB_STARTMSG_SELECTOR, NULL, UPB_HANDLER_INT32,
2485                (upb_func *)func, attr);
2486 }
2487 
upb_handlers_setendmsg(upb_handlers * h,upb_endmsg_handlerfunc * func,upb_handlerattr * attr)2488 bool upb_handlers_setendmsg(upb_handlers *h, upb_endmsg_handlerfunc *func,
2489                             upb_handlerattr *attr) {
2490   assert(!upb_handlers_isfrozen(h));
2491   return doset(h, UPB_ENDMSG_SELECTOR, NULL, UPB_HANDLER_INT32,
2492                (upb_func *)func, attr);
2493 }
2494 
upb_handlers_setsubhandlers(upb_handlers * h,const upb_fielddef * f,const upb_handlers * sub)2495 bool upb_handlers_setsubhandlers(upb_handlers *h, const upb_fielddef *f,
2496                                  const upb_handlers *sub) {
2497   assert(sub);
2498   assert(!upb_handlers_isfrozen(h));
2499   assert(upb_fielddef_issubmsg(f));
2500   if (SUBH_F(h, f)) return false;  /* Can't reset. */
2501   if (upb_msgdef_upcast(upb_handlers_msgdef(sub)) != upb_fielddef_subdef(f)) {
2502     return false;
2503   }
2504   SUBH_F(h, f) = sub;
2505   upb_ref2(sub, h);
2506   return true;
2507 }
2508 
upb_handlers_getsubhandlers(const upb_handlers * h,const upb_fielddef * f)2509 const upb_handlers *upb_handlers_getsubhandlers(const upb_handlers *h,
2510                                                 const upb_fielddef *f) {
2511   assert(upb_fielddef_issubmsg(f));
2512   return SUBH_F(h, f);
2513 }
2514 
upb_handlers_getattr(const upb_handlers * h,upb_selector_t sel,upb_handlerattr * attr)2515 bool upb_handlers_getattr(const upb_handlers *h, upb_selector_t sel,
2516                           upb_handlerattr *attr) {
2517   if (!upb_handlers_gethandler(h, sel))
2518     return false;
2519   *attr = h->table[sel].attr;
2520   return true;
2521 }
2522 
upb_handlers_getsubhandlers_sel(const upb_handlers * h,upb_selector_t sel)2523 const upb_handlers *upb_handlers_getsubhandlers_sel(const upb_handlers *h,
2524                                                     upb_selector_t sel) {
2525   /* STARTSUBMSG selector in sel is the field's selector base. */
2526   return SUBH(h, sel - UPB_STATIC_SELECTOR_COUNT);
2527 }
2528 
upb_handlers_msgdef(const upb_handlers * h)2529 const upb_msgdef *upb_handlers_msgdef(const upb_handlers *h) { return h->msg; }
2530 
upb_handlers_addcleanup(upb_handlers * h,void * p,upb_handlerfree * func)2531 bool upb_handlers_addcleanup(upb_handlers *h, void *p, upb_handlerfree *func) {
2532   bool ok;
2533   if (upb_inttable_lookupptr(&h->cleanup_, p, NULL)) {
2534     return false;
2535   }
2536   ok = upb_inttable_insertptr(&h->cleanup_, p, upb_value_fptr(func));
2537   UPB_ASSERT_VAR(ok, ok);
2538   return true;
2539 }
2540 
2541 
2542 /* "Static" methods ***********************************************************/
2543 
upb_handlers_freeze(upb_handlers * const * handlers,int n,upb_status * s)2544 bool upb_handlers_freeze(upb_handlers *const*handlers, int n, upb_status *s) {
2545   /* TODO: verify we have a transitive closure. */
2546   int i;
2547   for (i = 0; i < n; i++) {
2548     upb_msg_field_iter j;
2549     upb_handlers *h = handlers[i];
2550 
2551     if (!upb_ok(&h->status_)) {
2552       upb_status_seterrf(s, "handlers for message %s had error status: %s",
2553                          upb_msgdef_fullname(upb_handlers_msgdef(h)),
2554                          upb_status_errmsg(&h->status_));
2555       return false;
2556     }
2557 
2558     /* Check that there are no closure mismatches due to missing Start* handlers
2559      * or subhandlers with different type-level types. */
2560     for(upb_msg_field_begin(&j, h->msg);
2561         !upb_msg_field_done(&j);
2562         upb_msg_field_next(&j)) {
2563 
2564       const upb_fielddef *f = upb_msg_iter_field(&j);
2565       if (upb_fielddef_isseq(f)) {
2566         if (!checkstart(h, f, UPB_HANDLER_STARTSEQ, s))
2567           return false;
2568       }
2569 
2570       if (upb_fielddef_isstring(f)) {
2571         if (!checkstart(h, f, UPB_HANDLER_STARTSTR, s))
2572           return false;
2573       }
2574 
2575       if (upb_fielddef_issubmsg(f)) {
2576         bool hashandler = false;
2577         if (upb_handlers_gethandler(
2578                 h, handlers_getsel(h, f, UPB_HANDLER_STARTSUBMSG)) ||
2579             upb_handlers_gethandler(
2580                 h, handlers_getsel(h, f, UPB_HANDLER_ENDSUBMSG))) {
2581           hashandler = true;
2582         }
2583 
2584         if (upb_fielddef_isseq(f) &&
2585             (upb_handlers_gethandler(
2586                  h, handlers_getsel(h, f, UPB_HANDLER_STARTSEQ)) ||
2587              upb_handlers_gethandler(
2588                  h, handlers_getsel(h, f, UPB_HANDLER_ENDSEQ)))) {
2589           hashandler = true;
2590         }
2591 
2592         if (hashandler && !upb_handlers_getsubhandlers(h, f)) {
2593           /* For now we add an empty subhandlers in this case.  It makes the
2594            * decoder code generator simpler, because it only has to handle two
2595            * cases (submessage has handlers or not) as opposed to three
2596            * (submessage has handlers in enclosing message but no subhandlers).
2597            *
2598            * This makes parsing less efficient in the case that we want to
2599            * notice a submessage but skip its contents (like if we're testing
2600            * for submessage presence or counting the number of repeated
2601            * submessages).  In this case we will end up parsing the submessage
2602            * field by field and throwing away the results for each, instead of
2603            * skipping the whole delimited thing at once.  If this is an issue we
2604            * can revisit it, but do remember that this only arises when you have
2605            * handlers (startseq/startsubmsg/endsubmsg/endseq) set for the
2606            * submessage but no subhandlers.  The uses cases for this are
2607            * limited. */
2608           upb_handlers *sub = upb_handlers_new(upb_fielddef_msgsubdef(f), &sub);
2609           upb_handlers_setsubhandlers(h, f, sub);
2610           upb_handlers_unref(sub, &sub);
2611         }
2612 
2613         /* TODO(haberman): check type of submessage.
2614          * This is slightly tricky; also consider whether we should check that
2615          * they match at setsubhandlers time. */
2616       }
2617     }
2618   }
2619 
2620   if (!upb_refcounted_freeze((upb_refcounted*const*)handlers, n, s,
2621                              UPB_MAX_HANDLER_DEPTH)) {
2622     return false;
2623   }
2624 
2625   return true;
2626 }
2627 
upb_handlers_getprimitivehandlertype(const upb_fielddef * f)2628 upb_handlertype_t upb_handlers_getprimitivehandlertype(const upb_fielddef *f) {
2629   switch (upb_fielddef_type(f)) {
2630     case UPB_TYPE_INT32:
2631     case UPB_TYPE_ENUM: return UPB_HANDLER_INT32;
2632     case UPB_TYPE_INT64: return UPB_HANDLER_INT64;
2633     case UPB_TYPE_UINT32: return UPB_HANDLER_UINT32;
2634     case UPB_TYPE_UINT64: return UPB_HANDLER_UINT64;
2635     case UPB_TYPE_FLOAT: return UPB_HANDLER_FLOAT;
2636     case UPB_TYPE_DOUBLE: return UPB_HANDLER_DOUBLE;
2637     case UPB_TYPE_BOOL: return UPB_HANDLER_BOOL;
2638     default: assert(false); return -1;  /* Invalid input. */
2639   }
2640 }
2641 
upb_handlers_getselector(const upb_fielddef * f,upb_handlertype_t type,upb_selector_t * s)2642 bool upb_handlers_getselector(const upb_fielddef *f, upb_handlertype_t type,
2643                               upb_selector_t *s) {
2644   switch (type) {
2645     case UPB_HANDLER_INT32:
2646     case UPB_HANDLER_INT64:
2647     case UPB_HANDLER_UINT32:
2648     case UPB_HANDLER_UINT64:
2649     case UPB_HANDLER_FLOAT:
2650     case UPB_HANDLER_DOUBLE:
2651     case UPB_HANDLER_BOOL:
2652       if (!upb_fielddef_isprimitive(f) ||
2653           upb_handlers_getprimitivehandlertype(f) != type)
2654         return false;
2655       *s = f->selector_base;
2656       break;
2657     case UPB_HANDLER_STRING:
2658       if (upb_fielddef_isstring(f)) {
2659         *s = f->selector_base;
2660       } else if (upb_fielddef_lazy(f)) {
2661         *s = f->selector_base + 3;
2662       } else {
2663         return false;
2664       }
2665       break;
2666     case UPB_HANDLER_STARTSTR:
2667       if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) {
2668         *s = f->selector_base + 1;
2669       } else {
2670         return false;
2671       }
2672       break;
2673     case UPB_HANDLER_ENDSTR:
2674       if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) {
2675         *s = f->selector_base + 2;
2676       } else {
2677         return false;
2678       }
2679       break;
2680     case UPB_HANDLER_STARTSEQ:
2681       if (!upb_fielddef_isseq(f)) return false;
2682       *s = f->selector_base - 2;
2683       break;
2684     case UPB_HANDLER_ENDSEQ:
2685       if (!upb_fielddef_isseq(f)) return false;
2686       *s = f->selector_base - 1;
2687       break;
2688     case UPB_HANDLER_STARTSUBMSG:
2689       if (!upb_fielddef_issubmsg(f)) return false;
2690       /* Selectors for STARTSUBMSG are at the beginning of the table so that the
2691        * selector can also be used as an index into the "sub" array of
2692        * subhandlers.  The indexes for the two into these two tables are the
2693        * same, except that in the handler table the static selectors come first. */
2694       *s = f->index_ + UPB_STATIC_SELECTOR_COUNT;
2695       break;
2696     case UPB_HANDLER_ENDSUBMSG:
2697       if (!upb_fielddef_issubmsg(f)) return false;
2698       *s = f->selector_base;
2699       break;
2700   }
2701   assert((size_t)*s < upb_fielddef_containingtype(f)->selector_count);
2702   return true;
2703 }
2704 
upb_handlers_selectorbaseoffset(const upb_fielddef * f)2705 uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f) {
2706   return upb_fielddef_isseq(f) ? 2 : 0;
2707 }
2708 
upb_handlers_selectorcount(const upb_fielddef * f)2709 uint32_t upb_handlers_selectorcount(const upb_fielddef *f) {
2710   uint32_t ret = 1;
2711   if (upb_fielddef_isseq(f)) ret += 2;    /* STARTSEQ/ENDSEQ */
2712   if (upb_fielddef_isstring(f)) ret += 2; /* [STRING]/STARTSTR/ENDSTR */
2713   if (upb_fielddef_issubmsg(f)) {
2714     /* ENDSUBMSG (STARTSUBMSG is at table beginning) */
2715     ret += 0;
2716     if (upb_fielddef_lazy(f)) {
2717       /* STARTSTR/ENDSTR/STRING (for lazy) */
2718       ret += 3;
2719     }
2720   }
2721   return ret;
2722 }
2723 
2724 
2725 /* upb_handlerattr ************************************************************/
2726 
upb_handlerattr_init(upb_handlerattr * attr)2727 void upb_handlerattr_init(upb_handlerattr *attr) {
2728   upb_handlerattr from = UPB_HANDLERATTR_INITIALIZER;
2729   memcpy(attr, &from, sizeof(*attr));
2730 }
2731 
upb_handlerattr_uninit(upb_handlerattr * attr)2732 void upb_handlerattr_uninit(upb_handlerattr *attr) {
2733   UPB_UNUSED(attr);
2734 }
2735 
upb_handlerattr_sethandlerdata(upb_handlerattr * attr,const void * hd)2736 bool upb_handlerattr_sethandlerdata(upb_handlerattr *attr, const void *hd) {
2737   attr->handler_data_ = hd;
2738   return true;
2739 }
2740 
upb_handlerattr_setclosuretype(upb_handlerattr * attr,const void * type)2741 bool upb_handlerattr_setclosuretype(upb_handlerattr *attr, const void *type) {
2742   attr->closure_type_ = type;
2743   return true;
2744 }
2745 
upb_handlerattr_closuretype(const upb_handlerattr * attr)2746 const void *upb_handlerattr_closuretype(const upb_handlerattr *attr) {
2747   return attr->closure_type_;
2748 }
2749 
upb_handlerattr_setreturnclosuretype(upb_handlerattr * attr,const void * type)2750 bool upb_handlerattr_setreturnclosuretype(upb_handlerattr *attr,
2751                                           const void *type) {
2752   attr->return_closure_type_ = type;
2753   return true;
2754 }
2755 
upb_handlerattr_returnclosuretype(const upb_handlerattr * attr)2756 const void *upb_handlerattr_returnclosuretype(const upb_handlerattr *attr) {
2757   return attr->return_closure_type_;
2758 }
2759 
upb_handlerattr_setalwaysok(upb_handlerattr * attr,bool alwaysok)2760 bool upb_handlerattr_setalwaysok(upb_handlerattr *attr, bool alwaysok) {
2761   attr->alwaysok_ = alwaysok;
2762   return true;
2763 }
2764 
upb_handlerattr_alwaysok(const upb_handlerattr * attr)2765 bool upb_handlerattr_alwaysok(const upb_handlerattr *attr) {
2766   return attr->alwaysok_;
2767 }
2768 
2769 /* upb_bufhandle **************************************************************/
2770 
upb_bufhandle_objofs(const upb_bufhandle * h)2771 size_t upb_bufhandle_objofs(const upb_bufhandle *h) {
2772   return h->objofs_;
2773 }
2774 
2775 /* upb_byteshandler ***********************************************************/
2776 
upb_byteshandler_init(upb_byteshandler * h)2777 void upb_byteshandler_init(upb_byteshandler* h) {
2778   memset(h, 0, sizeof(*h));
2779 }
2780 
2781 /* For when we support handlerfree callbacks. */
upb_byteshandler_uninit(upb_byteshandler * h)2782 void upb_byteshandler_uninit(upb_byteshandler* h) {
2783   UPB_UNUSED(h);
2784 }
2785 
upb_byteshandler_setstartstr(upb_byteshandler * h,upb_startstr_handlerfunc * func,void * d)2786 bool upb_byteshandler_setstartstr(upb_byteshandler *h,
2787                                   upb_startstr_handlerfunc *func, void *d) {
2788   h->table[UPB_STARTSTR_SELECTOR].func = (upb_func*)func;
2789   h->table[UPB_STARTSTR_SELECTOR].attr.handler_data_ = d;
2790   return true;
2791 }
2792 
upb_byteshandler_setstring(upb_byteshandler * h,upb_string_handlerfunc * func,void * d)2793 bool upb_byteshandler_setstring(upb_byteshandler *h,
2794                                 upb_string_handlerfunc *func, void *d) {
2795   h->table[UPB_STRING_SELECTOR].func = (upb_func*)func;
2796   h->table[UPB_STRING_SELECTOR].attr.handler_data_ = d;
2797   return true;
2798 }
2799 
upb_byteshandler_setendstr(upb_byteshandler * h,upb_endfield_handlerfunc * func,void * d)2800 bool upb_byteshandler_setendstr(upb_byteshandler *h,
2801                                 upb_endfield_handlerfunc *func, void *d) {
2802   h->table[UPB_ENDSTR_SELECTOR].func = (upb_func*)func;
2803   h->table[UPB_ENDSTR_SELECTOR].attr.handler_data_ = d;
2804   return true;
2805 }
2806 /*
2807 ** upb::RefCounted Implementation
2808 **
2809 ** Our key invariants are:
2810 ** 1. reference cycles never span groups
2811 ** 2. for ref2(to, from), we increment to's count iff group(from) != group(to)
2812 **
2813 ** The previous two are how we avoid leaking cycles.  Other important
2814 ** invariants are:
2815 ** 3. for mutable objects "from" and "to", if there exists a ref2(to, from)
2816 **    this implies group(from) == group(to).  (In practice, what we implement
2817 **    is even stronger; "from" and "to" will share a group if there has *ever*
2818 **    been a ref2(to, from), but all that is necessary for correctness is the
2819 **    weaker one).
2820 ** 4. mutable and immutable objects are never in the same group.
2821 */
2822 
2823 
2824 #include <setjmp.h>
2825 
2826 static void freeobj(upb_refcounted *o);
2827 
2828 const char untracked_val;
2829 const void *UPB_UNTRACKED_REF = &untracked_val;
2830 
2831 /* arch-specific atomic primitives  *******************************************/
2832 
2833 #ifdef UPB_THREAD_UNSAFE /*---------------------------------------------------*/
2834 
atomic_inc(uint32_t * a)2835 static void atomic_inc(uint32_t *a) { (*a)++; }
atomic_dec(uint32_t * a)2836 static bool atomic_dec(uint32_t *a) { return --(*a) == 0; }
2837 
2838 #elif defined(__GNUC__) || defined(__clang__) /*------------------------------*/
2839 
atomic_inc(uint32_t * a)2840 static void atomic_inc(uint32_t *a) { __sync_fetch_and_add(a, 1); }
atomic_dec(uint32_t * a)2841 static bool atomic_dec(uint32_t *a) { return __sync_sub_and_fetch(a, 1) == 0; }
2842 
2843 #elif defined(WIN32) /*-------------------------------------------------------*/
2844 
2845 #include <Windows.h>
2846 
atomic_inc(upb_atomic_t * a)2847 static void atomic_inc(upb_atomic_t *a) { InterlockedIncrement(&a->val); }
atomic_dec(upb_atomic_t * a)2848 static bool atomic_dec(upb_atomic_t *a) {
2849   return InterlockedDecrement(&a->val) == 0;
2850 }
2851 
2852 #else
2853 #error Atomic primitives not defined for your platform/CPU.  \
2854        Implement them or compile with UPB_THREAD_UNSAFE.
2855 #endif
2856 
2857 /* All static objects point to this refcount.
2858  * It is special-cased in ref/unref below.  */
2859 uint32_t static_refcount = -1;
2860 
2861 /* We can avoid atomic ops for statically-declared objects.
2862  * This is a minor optimization but nice since we can avoid degrading under
2863  * contention in this case. */
2864 
refgroup(uint32_t * group)2865 static void refgroup(uint32_t *group) {
2866   if (group != &static_refcount)
2867     atomic_inc(group);
2868 }
2869 
unrefgroup(uint32_t * group)2870 static bool unrefgroup(uint32_t *group) {
2871   if (group == &static_refcount) {
2872     return false;
2873   } else {
2874     return atomic_dec(group);
2875   }
2876 }
2877 
2878 
2879 /* Reference tracking (debug only) ********************************************/
2880 
2881 #ifdef UPB_DEBUG_REFS
2882 
2883 #ifdef UPB_THREAD_UNSAFE
2884 
upb_lock()2885 static void upb_lock() {}
upb_unlock()2886 static void upb_unlock() {}
2887 
2888 #else
2889 
2890 /* User must define functions that lock/unlock a global mutex and link this
2891  * file against them. */
2892 void upb_lock();
2893 void upb_unlock();
2894 
2895 #endif
2896 
2897 /* UPB_DEBUG_REFS mode counts on being able to malloc() memory in some
2898  * code-paths that can normally never fail, like upb_refcounted_ref().  Since
2899  * we have no way to propagage out-of-memory errors back to the user, and since
2900  * these errors can only occur in UPB_DEBUG_REFS mode, we use an allocator that
2901  * immediately aborts on failure (avoiding the global allocator, which might
2902  * inject failures). */
2903 
2904 #include <stdlib.h>
2905 
upb_debugrefs_allocfunc(upb_alloc * alloc,void * ptr,size_t oldsize,size_t size)2906 static void *upb_debugrefs_allocfunc(upb_alloc *alloc, void *ptr,
2907                                      size_t oldsize, size_t size) {
2908   UPB_UNUSED(alloc);
2909   UPB_UNUSED(oldsize);
2910   if (size == 0) {
2911     free(ptr);
2912     return NULL;
2913   } else {
2914     void *ret = realloc(ptr, size);
2915 
2916     if (!ret) {
2917       abort();
2918     }
2919 
2920     return ret;
2921   }
2922 }
2923 
2924 upb_alloc upb_alloc_debugrefs = {&upb_debugrefs_allocfunc};
2925 
2926 typedef struct {
2927   int count;  /* How many refs there are (duplicates only allowed for ref2). */
2928   bool is_ref2;
2929 } trackedref;
2930 
trackedref_new(bool is_ref2)2931 static trackedref *trackedref_new(bool is_ref2) {
2932   trackedref *ret = upb_malloc(&upb_alloc_debugrefs, sizeof(*ret));
2933   ret->count = 1;
2934   ret->is_ref2 = is_ref2;
2935   return ret;
2936 }
2937 
track(const upb_refcounted * r,const void * owner,bool ref2)2938 static void track(const upb_refcounted *r, const void *owner, bool ref2) {
2939   upb_value v;
2940 
2941   assert(owner);
2942   if (owner == UPB_UNTRACKED_REF) return;
2943 
2944   upb_lock();
2945   if (upb_inttable_lookupptr(r->refs, owner, &v)) {
2946     trackedref *ref = upb_value_getptr(v);
2947     /* Since we allow multiple ref2's for the same to/from pair without
2948      * allocating separate memory for each one, we lose the fine-grained
2949      * tracking behavior we get with regular refs.  Since ref2s only happen
2950      * inside upb, we'll accept this limitation until/unless there is a really
2951      * difficult upb-internal bug that can't be figured out without it. */
2952     assert(ref2);
2953     assert(ref->is_ref2);
2954     ref->count++;
2955   } else {
2956     trackedref *ref = trackedref_new(ref2);
2957     upb_inttable_insertptr2(r->refs, owner, upb_value_ptr(ref),
2958                             &upb_alloc_debugrefs);
2959     if (ref2) {
2960       /* We know this cast is safe when it is a ref2, because it's coming from
2961        * another refcounted object. */
2962       const upb_refcounted *from = owner;
2963       assert(!upb_inttable_lookupptr(from->ref2s, r, NULL));
2964       upb_inttable_insertptr2(from->ref2s, r, upb_value_ptr(NULL),
2965                               &upb_alloc_debugrefs);
2966     }
2967   }
2968   upb_unlock();
2969 }
2970 
untrack(const upb_refcounted * r,const void * owner,bool ref2)2971 static void untrack(const upb_refcounted *r, const void *owner, bool ref2) {
2972   upb_value v;
2973   bool found;
2974   trackedref *ref;
2975 
2976   assert(owner);
2977   if (owner == UPB_UNTRACKED_REF) return;
2978 
2979   upb_lock();
2980   found = upb_inttable_lookupptr(r->refs, owner, &v);
2981   /* This assert will fail if an owner attempts to release a ref it didn't have. */
2982   UPB_ASSERT_VAR(found, found);
2983   ref = upb_value_getptr(v);
2984   assert(ref->is_ref2 == ref2);
2985   if (--ref->count == 0) {
2986     free(ref);
2987     upb_inttable_removeptr(r->refs, owner, NULL);
2988     if (ref2) {
2989       /* We know this cast is safe when it is a ref2, because it's coming from
2990        * another refcounted object. */
2991       const upb_refcounted *from = owner;
2992       bool removed = upb_inttable_removeptr(from->ref2s, r, NULL);
2993       assert(removed);
2994     }
2995   }
2996   upb_unlock();
2997 }
2998 
checkref(const upb_refcounted * r,const void * owner,bool ref2)2999 static void checkref(const upb_refcounted *r, const void *owner, bool ref2) {
3000   upb_value v;
3001   bool found;
3002   trackedref *ref;
3003 
3004   upb_lock();
3005   found = upb_inttable_lookupptr(r->refs, owner, &v);
3006   UPB_ASSERT_VAR(found, found);
3007   ref = upb_value_getptr(v);
3008   assert(ref->is_ref2 == ref2);
3009   upb_unlock();
3010 }
3011 
3012 /* Populates the given UPB_CTYPE_INT32 inttable with counts of ref2's that
3013  * originate from the given owner. */
getref2s(const upb_refcounted * owner,upb_inttable * tab)3014 static void getref2s(const upb_refcounted *owner, upb_inttable *tab) {
3015   upb_inttable_iter i;
3016 
3017   upb_lock();
3018   upb_inttable_begin(&i, owner->ref2s);
3019   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
3020     upb_value v;
3021     upb_value count;
3022     trackedref *ref;
3023     bool found;
3024 
3025     upb_refcounted *to = (upb_refcounted*)upb_inttable_iter_key(&i);
3026 
3027     /* To get the count we need to look in the target's table. */
3028     found = upb_inttable_lookupptr(to->refs, owner, &v);
3029     assert(found);
3030     ref = upb_value_getptr(v);
3031     count = upb_value_int32(ref->count);
3032 
3033     upb_inttable_insertptr2(tab, to, count, &upb_alloc_debugrefs);
3034   }
3035   upb_unlock();
3036 }
3037 
3038 typedef struct {
3039   upb_inttable ref2;
3040   const upb_refcounted *obj;
3041 } check_state;
3042 
visit_check(const upb_refcounted * obj,const upb_refcounted * subobj,void * closure)3043 static void visit_check(const upb_refcounted *obj, const upb_refcounted *subobj,
3044                         void *closure) {
3045   check_state *s = closure;
3046   upb_inttable *ref2 = &s->ref2;
3047   upb_value v;
3048   bool removed;
3049   int32_t newcount;
3050 
3051   assert(obj == s->obj);
3052   assert(subobj);
3053   removed = upb_inttable_removeptr(ref2, subobj, &v);
3054   /* The following assertion will fail if the visit() function visits a subobj
3055    * that it did not have a ref2 on, or visits the same subobj too many times. */
3056   assert(removed);
3057   newcount = upb_value_getint32(v) - 1;
3058   if (newcount > 0) {
3059     upb_inttable_insert2(ref2, (uintptr_t)subobj, upb_value_int32(newcount),
3060                          &upb_alloc_debugrefs);
3061   }
3062 }
3063 
visit(const upb_refcounted * r,upb_refcounted_visit * v,void * closure)3064 static void visit(const upb_refcounted *r, upb_refcounted_visit *v,
3065                   void *closure) {
3066   /* In DEBUG_REFS mode we know what existing ref2 refs there are, so we know
3067    * exactly the set of nodes that visit() should visit.  So we verify visit()'s
3068    * correctness here. */
3069   check_state state;
3070   state.obj = r;
3071   upb_inttable_init2(&state.ref2, UPB_CTYPE_INT32, &upb_alloc_debugrefs);
3072   getref2s(r, &state.ref2);
3073 
3074   /* This should visit any children in the ref2 table. */
3075   if (r->vtbl->visit) r->vtbl->visit(r, visit_check, &state);
3076 
3077   /* This assertion will fail if the visit() function missed any children. */
3078   assert(upb_inttable_count(&state.ref2) == 0);
3079   upb_inttable_uninit2(&state.ref2, &upb_alloc_debugrefs);
3080   if (r->vtbl->visit) r->vtbl->visit(r, v, closure);
3081 }
3082 
trackinit(upb_refcounted * r)3083 static void trackinit(upb_refcounted *r) {
3084   r->refs = upb_malloc(&upb_alloc_debugrefs, sizeof(*r->refs));
3085   r->ref2s = upb_malloc(&upb_alloc_debugrefs, sizeof(*r->ref2s));
3086   upb_inttable_init2(r->refs, UPB_CTYPE_PTR, &upb_alloc_debugrefs);
3087   upb_inttable_init2(r->ref2s, UPB_CTYPE_PTR, &upb_alloc_debugrefs);
3088 }
3089 
trackfree(const upb_refcounted * r)3090 static void trackfree(const upb_refcounted *r) {
3091   upb_inttable_uninit2(r->refs, &upb_alloc_debugrefs);
3092   upb_inttable_uninit2(r->ref2s, &upb_alloc_debugrefs);
3093   upb_free(&upb_alloc_debugrefs, r->refs);
3094   upb_free(&upb_alloc_debugrefs, r->ref2s);
3095 }
3096 
3097 #else
3098 
track(const upb_refcounted * r,const void * owner,bool ref2)3099 static void track(const upb_refcounted *r, const void *owner, bool ref2) {
3100   UPB_UNUSED(r);
3101   UPB_UNUSED(owner);
3102   UPB_UNUSED(ref2);
3103 }
3104 
untrack(const upb_refcounted * r,const void * owner,bool ref2)3105 static void untrack(const upb_refcounted *r, const void *owner, bool ref2) {
3106   UPB_UNUSED(r);
3107   UPB_UNUSED(owner);
3108   UPB_UNUSED(ref2);
3109 }
3110 
checkref(const upb_refcounted * r,const void * owner,bool ref2)3111 static void checkref(const upb_refcounted *r, const void *owner, bool ref2) {
3112   UPB_UNUSED(r);
3113   UPB_UNUSED(owner);
3114   UPB_UNUSED(ref2);
3115 }
3116 
trackinit(upb_refcounted * r)3117 static void trackinit(upb_refcounted *r) {
3118   UPB_UNUSED(r);
3119 }
3120 
trackfree(const upb_refcounted * r)3121 static void trackfree(const upb_refcounted *r) {
3122   UPB_UNUSED(r);
3123 }
3124 
visit(const upb_refcounted * r,upb_refcounted_visit * v,void * closure)3125 static void visit(const upb_refcounted *r, upb_refcounted_visit *v,
3126                   void *closure) {
3127   if (r->vtbl->visit) r->vtbl->visit(r, v, closure);
3128 }
3129 
3130 #endif  /* UPB_DEBUG_REFS */
3131 
3132 
3133 /* freeze() *******************************************************************/
3134 
3135 /* The freeze() operation is by far the most complicated part of this scheme.
3136  * We compute strongly-connected components and then mutate the graph such that
3137  * we preserve the invariants documented at the top of this file.  And we must
3138  * handle out-of-memory errors gracefully (without leaving the graph
3139  * inconsistent), which adds to the fun. */
3140 
3141 /* The state used by the freeze operation (shared across many functions). */
3142 typedef struct {
3143   int depth;
3144   int maxdepth;
3145   uint64_t index;
3146   /* Maps upb_refcounted* -> attributes (color, etc).  attr layout varies by
3147    * color. */
3148   upb_inttable objattr;
3149   upb_inttable stack;   /* stack of upb_refcounted* for Tarjan's algorithm. */
3150   upb_inttable groups;  /* array of uint32_t*, malloc'd refcounts for new groups */
3151   upb_status *status;
3152   jmp_buf err;
3153 } tarjan;
3154 
3155 static void release_ref2(const upb_refcounted *obj,
3156                          const upb_refcounted *subobj,
3157                          void *closure);
3158 
3159 /* Node attributes -----------------------------------------------------------*/
3160 
3161 /* After our analysis phase all nodes will be either GRAY or WHITE. */
3162 
3163 typedef enum {
3164   BLACK = 0,  /* Object has not been seen. */
3165   GRAY,   /* Object has been found via a refgroup but may not be reachable. */
3166   GREEN,  /* Object is reachable and is currently on the Tarjan stack. */
3167   WHITE   /* Object is reachable and has been assigned a group (SCC). */
3168 } color_t;
3169 
err(tarjan * t)3170 UPB_NORETURN static void err(tarjan *t) { longjmp(t->err, 1); }
oom(tarjan * t)3171 UPB_NORETURN static void oom(tarjan *t) {
3172   upb_status_seterrmsg(t->status, "out of memory");
3173   err(t);
3174 }
3175 
trygetattr(const tarjan * t,const upb_refcounted * r)3176 static uint64_t trygetattr(const tarjan *t, const upb_refcounted *r) {
3177   upb_value v;
3178   return upb_inttable_lookupptr(&t->objattr, r, &v) ?
3179       upb_value_getuint64(v) : 0;
3180 }
3181 
getattr(const tarjan * t,const upb_refcounted * r)3182 static uint64_t getattr(const tarjan *t, const upb_refcounted *r) {
3183   upb_value v;
3184   bool found = upb_inttable_lookupptr(&t->objattr, r, &v);
3185   UPB_ASSERT_VAR(found, found);
3186   return upb_value_getuint64(v);
3187 }
3188 
setattr(tarjan * t,const upb_refcounted * r,uint64_t attr)3189 static void setattr(tarjan *t, const upb_refcounted *r, uint64_t attr) {
3190   upb_inttable_removeptr(&t->objattr, r, NULL);
3191   upb_inttable_insertptr(&t->objattr, r, upb_value_uint64(attr));
3192 }
3193 
color(tarjan * t,const upb_refcounted * r)3194 static color_t color(tarjan *t, const upb_refcounted *r) {
3195   return trygetattr(t, r) & 0x3;  /* Color is always stored in the low 2 bits. */
3196 }
3197 
set_gray(tarjan * t,const upb_refcounted * r)3198 static void set_gray(tarjan *t, const upb_refcounted *r) {
3199   assert(color(t, r) == BLACK);
3200   setattr(t, r, GRAY);
3201 }
3202 
3203 /* Pushes an obj onto the Tarjan stack and sets it to GREEN. */
push(tarjan * t,const upb_refcounted * r)3204 static void push(tarjan *t, const upb_refcounted *r) {
3205   assert(color(t, r) == BLACK || color(t, r) == GRAY);
3206   /* This defines the attr layout for the GREEN state.  "index" and "lowlink"
3207    * get 31 bits, which is plenty (limit of 2B objects frozen at a time). */
3208   setattr(t, r, GREEN | (t->index << 2) | (t->index << 33));
3209   if (++t->index == 0x80000000) {
3210     upb_status_seterrmsg(t->status, "too many objects to freeze");
3211     err(t);
3212   }
3213   upb_inttable_push(&t->stack, upb_value_ptr((void*)r));
3214 }
3215 
3216 /* Pops an obj from the Tarjan stack and sets it to WHITE, with a ptr to its
3217  * SCC group. */
pop(tarjan * t)3218 static upb_refcounted *pop(tarjan *t) {
3219   upb_refcounted *r = upb_value_getptr(upb_inttable_pop(&t->stack));
3220   assert(color(t, r) == GREEN);
3221   /* This defines the attr layout for nodes in the WHITE state.
3222    * Top of group stack is [group, NULL]; we point at group. */
3223   setattr(t, r, WHITE | (upb_inttable_count(&t->groups) - 2) << 8);
3224   return r;
3225 }
3226 
tarjan_newgroup(tarjan * t)3227 static void tarjan_newgroup(tarjan *t) {
3228   uint32_t *group = upb_gmalloc(sizeof(*group));
3229   if (!group) oom(t);
3230   /* Push group and empty group leader (we'll fill in leader later). */
3231   if (!upb_inttable_push(&t->groups, upb_value_ptr(group)) ||
3232       !upb_inttable_push(&t->groups, upb_value_ptr(NULL))) {
3233     upb_gfree(group);
3234     oom(t);
3235   }
3236   *group = 0;
3237 }
3238 
idx(tarjan * t,const upb_refcounted * r)3239 static uint32_t idx(tarjan *t, const upb_refcounted *r) {
3240   assert(color(t, r) == GREEN);
3241   return (getattr(t, r) >> 2) & 0x7FFFFFFF;
3242 }
3243 
lowlink(tarjan * t,const upb_refcounted * r)3244 static uint32_t lowlink(tarjan *t, const upb_refcounted *r) {
3245   if (color(t, r) == GREEN) {
3246     return getattr(t, r) >> 33;
3247   } else {
3248     return UINT32_MAX;
3249   }
3250 }
3251 
set_lowlink(tarjan * t,const upb_refcounted * r,uint32_t lowlink)3252 static void set_lowlink(tarjan *t, const upb_refcounted *r, uint32_t lowlink) {
3253   assert(color(t, r) == GREEN);
3254   setattr(t, r, ((uint64_t)lowlink << 33) | (getattr(t, r) & 0x1FFFFFFFF));
3255 }
3256 
group(tarjan * t,upb_refcounted * r)3257 static uint32_t *group(tarjan *t, upb_refcounted *r) {
3258   uint64_t groupnum;
3259   upb_value v;
3260   bool found;
3261 
3262   assert(color(t, r) == WHITE);
3263   groupnum = getattr(t, r) >> 8;
3264   found = upb_inttable_lookup(&t->groups, groupnum, &v);
3265   UPB_ASSERT_VAR(found, found);
3266   return upb_value_getptr(v);
3267 }
3268 
3269 /* If the group leader for this object's group has not previously been set,
3270  * the given object is assigned to be its leader. */
groupleader(tarjan * t,upb_refcounted * r)3271 static upb_refcounted *groupleader(tarjan *t, upb_refcounted *r) {
3272   uint64_t leader_slot;
3273   upb_value v;
3274   bool found;
3275 
3276   assert(color(t, r) == WHITE);
3277   leader_slot = (getattr(t, r) >> 8) + 1;
3278   found = upb_inttable_lookup(&t->groups, leader_slot, &v);
3279   UPB_ASSERT_VAR(found, found);
3280   if (upb_value_getptr(v)) {
3281     return upb_value_getptr(v);
3282   } else {
3283     upb_inttable_remove(&t->groups, leader_slot, NULL);
3284     upb_inttable_insert(&t->groups, leader_slot, upb_value_ptr(r));
3285     return r;
3286   }
3287 }
3288 
3289 
3290 /* Tarjan's algorithm --------------------------------------------------------*/
3291 
3292 /* See:
3293  *   http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm */
3294 static void do_tarjan(const upb_refcounted *obj, tarjan *t);
3295 
tarjan_visit(const upb_refcounted * obj,const upb_refcounted * subobj,void * closure)3296 static void tarjan_visit(const upb_refcounted *obj,
3297                          const upb_refcounted *subobj,
3298                          void *closure) {
3299   tarjan *t = closure;
3300   if (++t->depth > t->maxdepth) {
3301     upb_status_seterrf(t->status, "graph too deep to freeze (%d)", t->maxdepth);
3302     err(t);
3303   } else if (subobj->is_frozen || color(t, subobj) == WHITE) {
3304     /* Do nothing: we don't want to visit or color already-frozen nodes,
3305      * and WHITE nodes have already been assigned a SCC. */
3306   } else if (color(t, subobj) < GREEN) {
3307     /* Subdef has not yet been visited; recurse on it. */
3308     do_tarjan(subobj, t);
3309     set_lowlink(t, obj, UPB_MIN(lowlink(t, obj), lowlink(t, subobj)));
3310   } else if (color(t, subobj) == GREEN) {
3311     /* Subdef is in the stack and hence in the current SCC. */
3312     set_lowlink(t, obj, UPB_MIN(lowlink(t, obj), idx(t, subobj)));
3313   }
3314   --t->depth;
3315 }
3316 
do_tarjan(const upb_refcounted * obj,tarjan * t)3317 static void do_tarjan(const upb_refcounted *obj, tarjan *t) {
3318   if (color(t, obj) == BLACK) {
3319     /* We haven't seen this object's group; mark the whole group GRAY. */
3320     const upb_refcounted *o = obj;
3321     do { set_gray(t, o); } while ((o = o->next) != obj);
3322   }
3323 
3324   push(t, obj);
3325   visit(obj, tarjan_visit, t);
3326   if (lowlink(t, obj) == idx(t, obj)) {
3327     tarjan_newgroup(t);
3328     while (pop(t) != obj)
3329       ;
3330   }
3331 }
3332 
3333 
3334 /* freeze() ------------------------------------------------------------------*/
3335 
crossref(const upb_refcounted * r,const upb_refcounted * subobj,void * _t)3336 static void crossref(const upb_refcounted *r, const upb_refcounted *subobj,
3337                      void *_t) {
3338   tarjan *t = _t;
3339   assert(color(t, r) > BLACK);
3340   if (color(t, subobj) > BLACK && r->group != subobj->group) {
3341     /* Previously this ref was not reflected in subobj->group because they
3342      * were in the same group; now that they are split a ref must be taken. */
3343     refgroup(subobj->group);
3344   }
3345 }
3346 
freeze(upb_refcounted * const * roots,int n,upb_status * s,int maxdepth)3347 static bool freeze(upb_refcounted *const*roots, int n, upb_status *s,
3348                    int maxdepth) {
3349   volatile bool ret = false;
3350   int i;
3351   upb_inttable_iter iter;
3352 
3353   /* We run in two passes so that we can allocate all memory before performing
3354    * any mutation of the input -- this allows us to leave the input unchanged
3355    * in the case of memory allocation failure. */
3356   tarjan t;
3357   t.index = 0;
3358   t.depth = 0;
3359   t.maxdepth = maxdepth;
3360   t.status = s;
3361   if (!upb_inttable_init(&t.objattr, UPB_CTYPE_UINT64)) goto err1;
3362   if (!upb_inttable_init(&t.stack, UPB_CTYPE_PTR)) goto err2;
3363   if (!upb_inttable_init(&t.groups, UPB_CTYPE_PTR)) goto err3;
3364   if (setjmp(t.err) != 0) goto err4;
3365 
3366 
3367   for (i = 0; i < n; i++) {
3368     if (color(&t, roots[i]) < GREEN) {
3369       do_tarjan(roots[i], &t);
3370     }
3371   }
3372 
3373   /* If we've made it this far, no further errors are possible so it's safe to
3374    * mutate the objects without risk of leaving them in an inconsistent state. */
3375   ret = true;
3376 
3377   /* The transformation that follows requires care.  The preconditions are:
3378    * - all objects in attr map are WHITE or GRAY, and are in mutable groups
3379    *   (groups of all mutable objs)
3380    * - no ref2(to, from) refs have incremented count(to) if both "to" and
3381    *   "from" are in our attr map (this follows from invariants (2) and (3)) */
3382 
3383   /* Pass 1: we remove WHITE objects from their mutable groups, and add them to
3384    * new groups  according to the SCC's we computed.  These new groups will
3385    * consist of only frozen objects.  None will be immediately collectible,
3386    * because WHITE objects are by definition reachable from one of "roots",
3387    * which the caller must own refs on. */
3388   upb_inttable_begin(&iter, &t.objattr);
3389   for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) {
3390     upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter);
3391     /* Since removal from a singly-linked list requires access to the object's
3392      * predecessor, we consider obj->next instead of obj for moving.  With the
3393      * while() loop we guarantee that we will visit every node's predecessor.
3394      * Proof:
3395      *  1. every node's predecessor is in our attr map.
3396      *  2. though the loop body may change a node's predecessor, it will only
3397      *     change it to be the node we are currently operating on, so with a
3398      *     while() loop we guarantee ourselves the chance to remove each node. */
3399     while (color(&t, obj->next) == WHITE &&
3400            group(&t, obj->next) != obj->next->group) {
3401       upb_refcounted *leader;
3402 
3403       /* Remove from old group. */
3404       upb_refcounted *move = obj->next;
3405       if (obj == move) {
3406         /* Removing the last object from a group. */
3407         assert(*obj->group == obj->individual_count);
3408         upb_gfree(obj->group);
3409       } else {
3410         obj->next = move->next;
3411         /* This may decrease to zero; we'll collect GRAY objects (if any) that
3412          * remain in the group in the third pass. */
3413         assert(*move->group >= move->individual_count);
3414         *move->group -= move->individual_count;
3415       }
3416 
3417       /* Add to new group. */
3418       leader = groupleader(&t, move);
3419       if (move == leader) {
3420         /* First object added to new group is its leader. */
3421         move->group = group(&t, move);
3422         move->next = move;
3423         *move->group = move->individual_count;
3424       } else {
3425         /* Group already has at least one object in it. */
3426         assert(leader->group == group(&t, move));
3427         move->group = group(&t, move);
3428         move->next = leader->next;
3429         leader->next = move;
3430         *move->group += move->individual_count;
3431       }
3432 
3433       move->is_frozen = true;
3434     }
3435   }
3436 
3437   /* Pass 2: GRAY and WHITE objects "obj" with ref2(to, obj) references must
3438    * increment count(to) if group(obj) != group(to) (which could now be the
3439    * case if "to" was just frozen). */
3440   upb_inttable_begin(&iter, &t.objattr);
3441   for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) {
3442     upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter);
3443     visit(obj, crossref, &t);
3444   }
3445 
3446   /* Pass 3: GRAY objects are collected if their group's refcount dropped to
3447    * zero when we removed its white nodes.  This can happen if they had only
3448    * been kept alive by virtue of sharing a group with an object that was just
3449    * frozen.
3450    *
3451    * It is important that we do this last, since the GRAY object's free()
3452    * function could call unref2() on just-frozen objects, which will decrement
3453    * refs that were added in pass 2. */
3454   upb_inttable_begin(&iter, &t.objattr);
3455   for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) {
3456     upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter);
3457     if (obj->group == NULL || *obj->group == 0) {
3458       if (obj->group) {
3459         upb_refcounted *o;
3460 
3461         /* We eagerly free() the group's count (since we can't easily determine
3462          * the group's remaining size it's the easiest way to ensure it gets
3463          * done). */
3464         upb_gfree(obj->group);
3465 
3466         /* Visit to release ref2's (done in a separate pass since release_ref2
3467          * depends on o->group being unmodified so it can test merged()). */
3468         o = obj;
3469         do { visit(o, release_ref2, NULL); } while ((o = o->next) != obj);
3470 
3471         /* Mark "group" fields as NULL so we know to free the objects later in
3472          * this loop, but also don't try to delete the group twice. */
3473         o = obj;
3474         do { o->group = NULL; } while ((o = o->next) != obj);
3475       }
3476       freeobj(obj);
3477     }
3478   }
3479 
3480 err4:
3481   if (!ret) {
3482     upb_inttable_begin(&iter, &t.groups);
3483     for(; !upb_inttable_done(&iter); upb_inttable_next(&iter))
3484       upb_gfree(upb_value_getptr(upb_inttable_iter_value(&iter)));
3485   }
3486   upb_inttable_uninit(&t.groups);
3487 err3:
3488   upb_inttable_uninit(&t.stack);
3489 err2:
3490   upb_inttable_uninit(&t.objattr);
3491 err1:
3492   return ret;
3493 }
3494 
3495 
3496 /* Misc internal functions  ***************************************************/
3497 
merged(const upb_refcounted * r,const upb_refcounted * r2)3498 static bool merged(const upb_refcounted *r, const upb_refcounted *r2) {
3499   return r->group == r2->group;
3500 }
3501 
merge(upb_refcounted * r,upb_refcounted * from)3502 static void merge(upb_refcounted *r, upb_refcounted *from) {
3503   upb_refcounted *base;
3504   upb_refcounted *tmp;
3505 
3506   if (merged(r, from)) return;
3507   *r->group += *from->group;
3508   upb_gfree(from->group);
3509   base = from;
3510 
3511   /* Set all refcount pointers in the "from" chain to the merged refcount.
3512    *
3513    * TODO(haberman): this linear algorithm can result in an overall O(n^2) bound
3514    * if the user continuously extends a group by one object.  Prevent this by
3515    * using one of the techniques in this paper:
3516    *     ftp://www.ncedc.org/outgoing/geomorph/dino/orals/p245-tarjan.pdf */
3517   do { from->group = r->group; } while ((from = from->next) != base);
3518 
3519   /* Merge the two circularly linked lists by swapping their next pointers. */
3520   tmp = r->next;
3521   r->next = base->next;
3522   base->next = tmp;
3523 }
3524 
3525 static void unref(const upb_refcounted *r);
3526 
release_ref2(const upb_refcounted * obj,const upb_refcounted * subobj,void * closure)3527 static void release_ref2(const upb_refcounted *obj,
3528                          const upb_refcounted *subobj,
3529                          void *closure) {
3530   UPB_UNUSED(closure);
3531   untrack(subobj, obj, true);
3532   if (!merged(obj, subobj)) {
3533     assert(subobj->is_frozen);
3534     unref(subobj);
3535   }
3536 }
3537 
unref(const upb_refcounted * r)3538 static void unref(const upb_refcounted *r) {
3539   if (unrefgroup(r->group)) {
3540     const upb_refcounted *o;
3541 
3542     upb_gfree(r->group);
3543 
3544     /* In two passes, since release_ref2 needs a guarantee that any subobjs
3545      * are alive. */
3546     o = r;
3547     do { visit(o, release_ref2, NULL); } while((o = o->next) != r);
3548 
3549     o = r;
3550     do {
3551       const upb_refcounted *next = o->next;
3552       assert(o->is_frozen || o->individual_count == 0);
3553       freeobj((upb_refcounted*)o);
3554       o = next;
3555     } while(o != r);
3556   }
3557 }
3558 
freeobj(upb_refcounted * o)3559 static void freeobj(upb_refcounted *o) {
3560   trackfree(o);
3561   o->vtbl->free((upb_refcounted*)o);
3562 }
3563 
3564 
3565 /* Public interface ***********************************************************/
3566 
upb_refcounted_init(upb_refcounted * r,const struct upb_refcounted_vtbl * vtbl,const void * owner)3567 bool upb_refcounted_init(upb_refcounted *r,
3568                          const struct upb_refcounted_vtbl *vtbl,
3569                          const void *owner) {
3570 #ifndef NDEBUG
3571   /* Endianness check.  This is unrelated to upb_refcounted, it's just a
3572    * convenient place to put the check that we can be assured will run for
3573    * basically every program using upb. */
3574   const int x = 1;
3575 #ifdef UPB_BIG_ENDIAN
3576   assert(*(char*)&x != 1);
3577 #else
3578   assert(*(char*)&x == 1);
3579 #endif
3580 #endif
3581 
3582   r->next = r;
3583   r->vtbl = vtbl;
3584   r->individual_count = 0;
3585   r->is_frozen = false;
3586   r->group = upb_gmalloc(sizeof(*r->group));
3587   if (!r->group) return false;
3588   *r->group = 0;
3589   trackinit(r);
3590   upb_refcounted_ref(r, owner);
3591   return true;
3592 }
3593 
upb_refcounted_isfrozen(const upb_refcounted * r)3594 bool upb_refcounted_isfrozen(const upb_refcounted *r) {
3595   return r->is_frozen;
3596 }
3597 
upb_refcounted_ref(const upb_refcounted * r,const void * owner)3598 void upb_refcounted_ref(const upb_refcounted *r, const void *owner) {
3599   track(r, owner, false);
3600   if (!r->is_frozen)
3601     ((upb_refcounted*)r)->individual_count++;
3602   refgroup(r->group);
3603 }
3604 
upb_refcounted_unref(const upb_refcounted * r,const void * owner)3605 void upb_refcounted_unref(const upb_refcounted *r, const void *owner) {
3606   untrack(r, owner, false);
3607   if (!r->is_frozen)
3608     ((upb_refcounted*)r)->individual_count--;
3609   unref(r);
3610 }
3611 
upb_refcounted_ref2(const upb_refcounted * r,upb_refcounted * from)3612 void upb_refcounted_ref2(const upb_refcounted *r, upb_refcounted *from) {
3613   assert(!from->is_frozen);  /* Non-const pointer implies this. */
3614   track(r, from, true);
3615   if (r->is_frozen) {
3616     refgroup(r->group);
3617   } else {
3618     merge((upb_refcounted*)r, from);
3619   }
3620 }
3621 
upb_refcounted_unref2(const upb_refcounted * r,upb_refcounted * from)3622 void upb_refcounted_unref2(const upb_refcounted *r, upb_refcounted *from) {
3623   assert(!from->is_frozen);  /* Non-const pointer implies this. */
3624   untrack(r, from, true);
3625   if (r->is_frozen) {
3626     unref(r);
3627   } else {
3628     assert(merged(r, from));
3629   }
3630 }
3631 
upb_refcounted_donateref(const upb_refcounted * r,const void * from,const void * to)3632 void upb_refcounted_donateref(
3633     const upb_refcounted *r, const void *from, const void *to) {
3634   assert(from != to);
3635   if (to != NULL)
3636     upb_refcounted_ref(r, to);
3637   if (from != NULL)
3638     upb_refcounted_unref(r, from);
3639 }
3640 
upb_refcounted_checkref(const upb_refcounted * r,const void * owner)3641 void upb_refcounted_checkref(const upb_refcounted *r, const void *owner) {
3642   checkref(r, owner, false);
3643 }
3644 
upb_refcounted_freeze(upb_refcounted * const * roots,int n,upb_status * s,int maxdepth)3645 bool upb_refcounted_freeze(upb_refcounted *const*roots, int n, upb_status *s,
3646                            int maxdepth) {
3647   int i;
3648   bool ret;
3649   for (i = 0; i < n; i++) {
3650     assert(!roots[i]->is_frozen);
3651   }
3652   ret = freeze(roots, n, s, maxdepth);
3653   assert(!s || ret == upb_ok(s));
3654   return ret;
3655 }
3656 
3657 
3658 /* Fallback implementation if the shim is not specialized by the JIT. */
3659 #define SHIM_WRITER(type, ctype)                                              \
3660   bool upb_shim_set ## type (void *c, const void *hd, ctype val) {            \
3661     uint8_t *m = c;                                                           \
3662     const upb_shim_data *d = hd;                                              \
3663     if (d->hasbit > 0)                                                        \
3664       *(uint8_t*)&m[d->hasbit / 8] |= 1 << (d->hasbit % 8);                   \
3665     *(ctype*)&m[d->offset] = val;                                             \
3666     return true;                                                              \
3667   }                                                                           \
3668 
SHIM_WRITER(double,double)3669 SHIM_WRITER(double, double)
3670 SHIM_WRITER(float,  float)
3671 SHIM_WRITER(int32,  int32_t)
3672 SHIM_WRITER(int64,  int64_t)
3673 SHIM_WRITER(uint32, uint32_t)
3674 SHIM_WRITER(uint64, uint64_t)
3675 SHIM_WRITER(bool,   bool)
3676 #undef SHIM_WRITER
3677 
3678 bool upb_shim_set(upb_handlers *h, const upb_fielddef *f, size_t offset,
3679                   int32_t hasbit) {
3680   upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
3681   bool ok;
3682 
3683   upb_shim_data *d = upb_gmalloc(sizeof(*d));
3684   if (!d) return false;
3685   d->offset = offset;
3686   d->hasbit = hasbit;
3687 
3688   upb_handlerattr_sethandlerdata(&attr, d);
3689   upb_handlerattr_setalwaysok(&attr, true);
3690   upb_handlers_addcleanup(h, d, upb_gfree);
3691 
3692 #define TYPE(u, l) \
3693   case UPB_TYPE_##u: \
3694     ok = upb_handlers_set##l(h, f, upb_shim_set##l, &attr); break;
3695 
3696   ok = false;
3697 
3698   switch (upb_fielddef_type(f)) {
3699     TYPE(INT64,  int64);
3700     TYPE(INT32,  int32);
3701     TYPE(ENUM,   int32);
3702     TYPE(UINT64, uint64);
3703     TYPE(UINT32, uint32);
3704     TYPE(DOUBLE, double);
3705     TYPE(FLOAT,  float);
3706     TYPE(BOOL,   bool);
3707     default: assert(false); break;
3708   }
3709 #undef TYPE
3710 
3711   upb_handlerattr_uninit(&attr);
3712   return ok;
3713 }
3714 
upb_shim_getdata(const upb_handlers * h,upb_selector_t s,upb_fieldtype_t * type)3715 const upb_shim_data *upb_shim_getdata(const upb_handlers *h, upb_selector_t s,
3716                                       upb_fieldtype_t *type) {
3717   upb_func *f = upb_handlers_gethandler(h, s);
3718 
3719   if ((upb_int64_handlerfunc*)f == upb_shim_setint64) {
3720     *type = UPB_TYPE_INT64;
3721   } else if ((upb_int32_handlerfunc*)f == upb_shim_setint32) {
3722     *type = UPB_TYPE_INT32;
3723   } else if ((upb_uint64_handlerfunc*)f == upb_shim_setuint64) {
3724     *type = UPB_TYPE_UINT64;
3725   } else if ((upb_uint32_handlerfunc*)f == upb_shim_setuint32) {
3726     *type = UPB_TYPE_UINT32;
3727   } else if ((upb_double_handlerfunc*)f == upb_shim_setdouble) {
3728     *type = UPB_TYPE_DOUBLE;
3729   } else if ((upb_float_handlerfunc*)f == upb_shim_setfloat) {
3730     *type = UPB_TYPE_FLOAT;
3731   } else if ((upb_bool_handlerfunc*)f == upb_shim_setbool) {
3732     *type = UPB_TYPE_BOOL;
3733   } else {
3734     return NULL;
3735   }
3736 
3737   return (const upb_shim_data*)upb_handlers_gethandlerdata(h, s);
3738 }
3739 
3740 
3741 #include <string.h>
3742 
upb_symtab_free(upb_refcounted * r)3743 static void upb_symtab_free(upb_refcounted *r) {
3744   upb_symtab *s = (upb_symtab*)r;
3745   upb_strtable_iter i;
3746   upb_strtable_begin(&i, &s->symtab);
3747   for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
3748     const upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i));
3749     upb_def_unref(def, s);
3750   }
3751   upb_strtable_uninit(&s->symtab);
3752   upb_gfree(s);
3753 }
3754 
upb_symtab_new(const void * owner)3755 upb_symtab *upb_symtab_new(const void *owner) {
3756   static const struct upb_refcounted_vtbl vtbl = {NULL, &upb_symtab_free};
3757 
3758   upb_symtab *s = upb_gmalloc(sizeof(*s));
3759   if (!s) {
3760     return NULL;
3761   }
3762 
3763   upb_refcounted_init(upb_symtab_upcast_mutable(s), &vtbl, owner);
3764   upb_strtable_init(&s->symtab, UPB_CTYPE_PTR);
3765   return s;
3766 }
3767 
upb_symtab_freeze(upb_symtab * s)3768 void upb_symtab_freeze(upb_symtab *s) {
3769   upb_refcounted *r;
3770   bool ok;
3771 
3772   assert(!upb_symtab_isfrozen(s));
3773   r = upb_symtab_upcast_mutable(s);
3774   /* The symtab does not take ref2's (see refcounted.h) on the defs, because
3775    * defs cannot refer back to the table and therefore cannot create cycles.  So
3776    * 0 will suffice for maxdepth here. */
3777   ok = upb_refcounted_freeze(&r, 1, NULL, 0);
3778   UPB_ASSERT_VAR(ok, ok);
3779 }
3780 
upb_symtab_lookup(const upb_symtab * s,const char * sym)3781 const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym) {
3782   upb_value v;
3783   upb_def *ret = upb_strtable_lookup(&s->symtab, sym, &v) ?
3784       upb_value_getptr(v) : NULL;
3785   return ret;
3786 }
3787 
upb_symtab_lookupmsg(const upb_symtab * s,const char * sym)3788 const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) {
3789   upb_value v;
3790   upb_def *def = upb_strtable_lookup(&s->symtab, sym, &v) ?
3791       upb_value_getptr(v) : NULL;
3792   return def ? upb_dyncast_msgdef(def) : NULL;
3793 }
3794 
upb_symtab_lookupenum(const upb_symtab * s,const char * sym)3795 const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym) {
3796   upb_value v;
3797   upb_def *def = upb_strtable_lookup(&s->symtab, sym, &v) ?
3798       upb_value_getptr(v) : NULL;
3799   return def ? upb_dyncast_enumdef(def) : NULL;
3800 }
3801 
3802 /* Given a symbol and the base symbol inside which it is defined, find the
3803  * symbol's definition in t. */
upb_resolvename(const upb_strtable * t,const char * base,const char * sym)3804 static upb_def *upb_resolvename(const upb_strtable *t,
3805                                 const char *base, const char *sym) {
3806   if(strlen(sym) == 0) return NULL;
3807   if(sym[0] == '.') {
3808     /* Symbols starting with '.' are absolute, so we do a single lookup.
3809      * Slice to omit the leading '.' */
3810     upb_value v;
3811     return upb_strtable_lookup(t, sym + 1, &v) ? upb_value_getptr(v) : NULL;
3812   } else {
3813     /* Remove components from base until we find an entry or run out.
3814      * TODO: This branch is totally broken, but currently not used. */
3815     (void)base;
3816     assert(false);
3817     return NULL;
3818   }
3819 }
3820 
upb_symtab_resolve(const upb_symtab * s,const char * base,const char * sym)3821 const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base,
3822                                   const char *sym) {
3823   upb_def *ret = upb_resolvename(&s->symtab, base, sym);
3824   return ret;
3825 }
3826 
3827 /* Starts a depth-first traversal at "def", recursing into any subdefs
3828  * (ie. submessage types).  Adds duplicates of existing defs to addtab
3829  * wherever necessary, so that the resulting symtab will be consistent once
3830  * addtab is added.
3831  *
3832  * More specifically, if any def D is found in the DFS that:
3833  *
3834  *   1. can reach a def that is being replaced by something in addtab, AND
3835  *
3836  *   2. is not itself being replaced already (ie. this name doesn't already
3837  *      exist in addtab)
3838  *
3839  * ...then a duplicate (new copy) of D will be added to addtab.
3840  *
3841  * Returns true if this happened for any def reachable from "def."
3842  *
3843  * It is slightly tricky to do this correctly in the presence of cycles.  If we
3844  * detect that our DFS has hit a cycle, we might not yet know if any SCCs on
3845  * our stack can reach a def in addtab or not.  Once we figure this out, that
3846  * answer needs to apply to *all* defs in these SCCs, even if we visited them
3847  * already.  So a straight up one-pass cycle-detecting DFS won't work.
3848  *
3849  * To work around this problem, we traverse each SCC (which we already
3850  * computed, since these defs are frozen) as a single node.  We first compute
3851  * whether the SCC as a whole can reach any def in addtab, then we dup (or not)
3852  * the entire SCC.  This requires breaking the encapsulation of upb_refcounted,
3853  * since that is where we get the data about what SCC we are in. */
upb_resolve_dfs(const upb_def * def,upb_strtable * addtab,const void * new_owner,upb_inttable * seen,upb_status * s)3854 static bool upb_resolve_dfs(const upb_def *def, upb_strtable *addtab,
3855                             const void *new_owner, upb_inttable *seen,
3856                             upb_status *s) {
3857   upb_value v;
3858   bool need_dup;
3859   const upb_def *base;
3860   const void* memoize_key;
3861 
3862   /* Memoize results of this function for efficiency (since we're traversing a
3863    * DAG this is not needed to limit the depth of the search).
3864    *
3865    * We memoize by SCC instead of by individual def. */
3866   memoize_key = def->base.group;
3867 
3868   if (upb_inttable_lookupptr(seen, memoize_key, &v))
3869     return upb_value_getbool(v);
3870 
3871   /* Visit submessages for all messages in the SCC. */
3872   need_dup = false;
3873   base = def;
3874   do {
3875     upb_value v;
3876     const upb_msgdef *m;
3877 
3878     assert(upb_def_isfrozen(def));
3879     if (def->type == UPB_DEF_FIELD) continue;
3880     if (upb_strtable_lookup(addtab, upb_def_fullname(def), &v)) {
3881       need_dup = true;
3882     }
3883 
3884     /* For messages, continue the recursion by visiting all subdefs, but only
3885      * ones in different SCCs. */
3886     m = upb_dyncast_msgdef(def);
3887     if (m) {
3888       upb_msg_field_iter i;
3889       for(upb_msg_field_begin(&i, m);
3890           !upb_msg_field_done(&i);
3891           upb_msg_field_next(&i)) {
3892         upb_fielddef *f = upb_msg_iter_field(&i);
3893         const upb_def *subdef;
3894 
3895         if (!upb_fielddef_hassubdef(f)) continue;
3896         subdef = upb_fielddef_subdef(f);
3897 
3898         /* Skip subdefs in this SCC. */
3899         if (def->base.group == subdef->base.group) continue;
3900 
3901         /* |= to avoid short-circuit; we need its side-effects. */
3902         need_dup |= upb_resolve_dfs(subdef, addtab, new_owner, seen, s);
3903         if (!upb_ok(s)) return false;
3904       }
3905     }
3906   } while ((def = (upb_def*)def->base.next) != base);
3907 
3908   if (need_dup) {
3909     /* Dup all defs in this SCC that don't already have entries in addtab. */
3910     def = base;
3911     do {
3912       const char *name;
3913 
3914       if (def->type == UPB_DEF_FIELD) continue;
3915       name = upb_def_fullname(def);
3916       if (!upb_strtable_lookup(addtab, name, NULL)) {
3917         upb_def *newdef = upb_def_dup(def, new_owner);
3918         if (!newdef) goto oom;
3919         newdef->came_from_user = false;
3920         if (!upb_strtable_insert(addtab, name, upb_value_ptr(newdef)))
3921           goto oom;
3922       }
3923     } while ((def = (upb_def*)def->base.next) != base);
3924   }
3925 
3926   upb_inttable_insertptr(seen, memoize_key, upb_value_bool(need_dup));
3927   return need_dup;
3928 
3929 oom:
3930   upb_status_seterrmsg(s, "out of memory");
3931   return false;
3932 }
3933 
3934 /* TODO(haberman): we need a lot more testing of error conditions.
3935  * The came_from_user stuff in particular is not tested. */
symtab_add(upb_symtab * s,upb_def * const * defs,size_t n,void * ref_donor,upb_refcounted * freeze_also,upb_status * status)3936 static bool symtab_add(upb_symtab *s, upb_def *const*defs, size_t n,
3937                        void *ref_donor, upb_refcounted *freeze_also,
3938                        upb_status *status) {
3939   size_t i;
3940   size_t add_n;
3941   size_t freeze_n;
3942   upb_strtable_iter iter;
3943   upb_refcounted **add_objs = NULL;
3944   upb_def **add_defs = NULL;
3945   size_t add_objs_size;
3946   upb_strtable addtab;
3947   upb_inttable seen;
3948 
3949   if (n == 0 && !freeze_also) {
3950     return true;
3951   }
3952 
3953   assert(!upb_symtab_isfrozen(s));
3954   if (!upb_strtable_init(&addtab, UPB_CTYPE_PTR)) {
3955     upb_status_seterrmsg(status, "out of memory");
3956     return false;
3957   }
3958 
3959   /* Add new defs to our "add" set. */
3960   for (i = 0; i < n; i++) {
3961     upb_def *def = defs[i];
3962     const char *fullname;
3963     upb_fielddef *f;
3964 
3965     if (upb_def_isfrozen(def)) {
3966       upb_status_seterrmsg(status, "added defs must be mutable");
3967       goto err;
3968     }
3969     assert(!upb_def_isfrozen(def));
3970     fullname = upb_def_fullname(def);
3971     if (!fullname) {
3972       upb_status_seterrmsg(
3973           status, "Anonymous defs cannot be added to a symtab");
3974       goto err;
3975     }
3976 
3977     f = upb_dyncast_fielddef_mutable(def);
3978 
3979     if (f) {
3980       if (!upb_fielddef_containingtypename(f)) {
3981         upb_status_seterrmsg(status,
3982                              "Standalone fielddefs must have a containing type "
3983                              "(extendee) name set");
3984         goto err;
3985       }
3986     } else {
3987       if (upb_strtable_lookup(&addtab, fullname, NULL)) {
3988         upb_status_seterrf(status, "Conflicting defs named '%s'", fullname);
3989         goto err;
3990       }
3991       /* We need this to back out properly, because if there is a failure we
3992        * need to donate the ref back to the caller. */
3993       def->came_from_user = true;
3994       upb_def_donateref(def, ref_donor, s);
3995       if (!upb_strtable_insert(&addtab, fullname, upb_value_ptr(def)))
3996         goto oom_err;
3997     }
3998   }
3999 
4000   /* Add standalone fielddefs (ie. extensions) to the appropriate messages.
4001    * If the appropriate message only exists in the existing symtab, duplicate
4002    * it so we have a mutable copy we can add the fields to. */
4003   for (i = 0; i < n; i++) {
4004     upb_def *def = defs[i];
4005     upb_fielddef *f = upb_dyncast_fielddef_mutable(def);
4006     const char *msgname;
4007     upb_value v;
4008     upb_msgdef *m;
4009 
4010     if (!f) continue;
4011     msgname = upb_fielddef_containingtypename(f);
4012     /* We validated this earlier in this function. */
4013     assert(msgname);
4014 
4015     /* If the extendee name is absolutely qualified, move past the initial ".".
4016      * TODO(haberman): it is not obvious what it would mean if this was not
4017      * absolutely qualified. */
4018     if (msgname[0] == '.') {
4019       msgname++;
4020     }
4021 
4022     if (upb_strtable_lookup(&addtab, msgname, &v)) {
4023       /* Extendee is in the set of defs the user asked us to add. */
4024       m = upb_value_getptr(v);
4025     } else {
4026       /* Need to find and dup the extendee from the existing symtab. */
4027       const upb_msgdef *frozen_m = upb_symtab_lookupmsg(s, msgname);
4028       if (!frozen_m) {
4029         upb_status_seterrf(status,
4030                            "Tried to extend message %s that does not exist "
4031                            "in this SymbolTable.",
4032                            msgname);
4033         goto err;
4034       }
4035       m = upb_msgdef_dup(frozen_m, s);
4036       if (!m) goto oom_err;
4037       if (!upb_strtable_insert(&addtab, msgname, upb_value_ptr(m))) {
4038         upb_msgdef_unref(m, s);
4039         goto oom_err;
4040       }
4041     }
4042 
4043     if (!upb_msgdef_addfield(m, f, ref_donor, status)) {
4044       goto err;
4045     }
4046   }
4047 
4048   /* Add dups of any existing def that can reach a def with the same name as
4049    * anything in our "add" set. */
4050   if (!upb_inttable_init(&seen, UPB_CTYPE_BOOL)) goto oom_err;
4051   upb_strtable_begin(&iter, &s->symtab);
4052   for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
4053     upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
4054     upb_resolve_dfs(def, &addtab, s, &seen, status);
4055     if (!upb_ok(status)) goto err;
4056   }
4057   upb_inttable_uninit(&seen);
4058 
4059   /* Now using the table, resolve symbolic references for subdefs. */
4060   upb_strtable_begin(&iter, &addtab);
4061   for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
4062     const char *base;
4063     upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
4064     upb_msgdef *m = upb_dyncast_msgdef_mutable(def);
4065     upb_msg_field_iter j;
4066 
4067     if (!m) continue;
4068     /* Type names are resolved relative to the message in which they appear. */
4069     base = upb_msgdef_fullname(m);
4070 
4071     for(upb_msg_field_begin(&j, m);
4072         !upb_msg_field_done(&j);
4073         upb_msg_field_next(&j)) {
4074       upb_fielddef *f = upb_msg_iter_field(&j);
4075       const char *name = upb_fielddef_subdefname(f);
4076       if (name && !upb_fielddef_subdef(f)) {
4077         /* Try the lookup in the current set of to-be-added defs first. If not
4078          * there, try existing defs. */
4079         upb_def *subdef = upb_resolvename(&addtab, base, name);
4080         if (subdef == NULL) {
4081           subdef = upb_resolvename(&s->symtab, base, name);
4082         }
4083         if (subdef == NULL) {
4084           upb_status_seterrf(
4085               status, "couldn't resolve name '%s' in message '%s'", name, base);
4086           goto err;
4087         } else if (!upb_fielddef_setsubdef(f, subdef, status)) {
4088           goto err;
4089         }
4090       }
4091     }
4092   }
4093 
4094   /* We need an array of the defs in addtab, for passing to
4095    * upb_refcounted_freeze(). */
4096   add_objs_size = upb_strtable_count(&addtab);
4097   if (freeze_also) {
4098     add_objs_size++;
4099   }
4100 
4101   add_defs = upb_gmalloc(sizeof(void*) * add_objs_size);
4102   if (add_defs == NULL) goto oom_err;
4103   upb_strtable_begin(&iter, &addtab);
4104   for (add_n = 0; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
4105     add_defs[add_n++] = upb_value_getptr(upb_strtable_iter_value(&iter));
4106   }
4107 
4108   /* Validate defs. */
4109   if (!_upb_def_validate(add_defs, add_n, status)) {
4110     goto err;
4111   }
4112 
4113   /* Cheat a little and give the array a new type.
4114    * This is probably undefined behavior, but this code will be deleted soon. */
4115   add_objs = (upb_refcounted**)add_defs;
4116 
4117   freeze_n = add_n;
4118   if (freeze_also) {
4119     add_objs[freeze_n++] = freeze_also;
4120   }
4121 
4122   if (!upb_refcounted_freeze(add_objs, freeze_n, status,
4123                              UPB_MAX_MESSAGE_DEPTH * 2)) {
4124     goto err;
4125   }
4126 
4127   /* This must be delayed until all errors have been detected, since error
4128    * recovery code uses this table to cleanup defs. */
4129   upb_strtable_uninit(&addtab);
4130 
4131   /* TODO(haberman) we don't properly handle errors after this point (like
4132    * OOM in upb_strtable_insert() below). */
4133   for (i = 0; i < add_n; i++) {
4134     upb_def *def = (upb_def*)add_objs[i];
4135     const char *name = upb_def_fullname(def);
4136     upb_value v;
4137     bool success;
4138 
4139     if (upb_strtable_remove(&s->symtab, name, &v)) {
4140       const upb_def *def = upb_value_getptr(v);
4141       upb_def_unref(def, s);
4142     }
4143     success = upb_strtable_insert(&s->symtab, name, upb_value_ptr(def));
4144     UPB_ASSERT_VAR(success, success == true);
4145   }
4146   upb_gfree(add_defs);
4147   return true;
4148 
4149 oom_err:
4150   upb_status_seterrmsg(status, "out of memory");
4151 err: {
4152     /* For defs the user passed in, we need to donate the refs back.  For defs
4153      * we dup'd, we need to just unref them. */
4154     upb_strtable_begin(&iter, &addtab);
4155     for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
4156       upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
4157       bool came_from_user = def->came_from_user;
4158       def->came_from_user = false;
4159       if (came_from_user) {
4160         upb_def_donateref(def, s, ref_donor);
4161       } else {
4162         upb_def_unref(def, s);
4163       }
4164     }
4165   }
4166   upb_strtable_uninit(&addtab);
4167   upb_gfree(add_defs);
4168   assert(!upb_ok(status));
4169   return false;
4170 }
4171 
upb_symtab_add(upb_symtab * s,upb_def * const * defs,size_t n,void * ref_donor,upb_status * status)4172 bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, size_t n,
4173                     void *ref_donor, upb_status *status) {
4174   return symtab_add(s, defs, n, ref_donor, NULL, status);
4175 }
4176 
upb_symtab_addfile(upb_symtab * s,upb_filedef * file,upb_status * status)4177 bool upb_symtab_addfile(upb_symtab *s, upb_filedef *file, upb_status *status) {
4178   size_t n;
4179   size_t i;
4180   upb_def **defs;
4181   bool ret;
4182 
4183   n = upb_filedef_defcount(file);
4184   defs = upb_gmalloc(sizeof(*defs) * n);
4185 
4186   if (defs == NULL) {
4187     upb_status_seterrmsg(status, "Out of memory");
4188     return false;
4189   }
4190 
4191   for (i = 0; i < n; i++) {
4192     defs[i] = upb_filedef_mutabledef(file, i);
4193   }
4194 
4195   ret = symtab_add(s, defs, n, NULL, upb_filedef_upcast_mutable(file), status);
4196 
4197   upb_gfree(defs);
4198   return ret;
4199 }
4200 
4201 /* Iteration. */
4202 
advance_to_matching(upb_symtab_iter * iter)4203 static void advance_to_matching(upb_symtab_iter *iter) {
4204   if (iter->type == UPB_DEF_ANY)
4205     return;
4206 
4207   while (!upb_strtable_done(&iter->iter) &&
4208          iter->type != upb_symtab_iter_def(iter)->type) {
4209     upb_strtable_next(&iter->iter);
4210   }
4211 }
4212 
upb_symtab_begin(upb_symtab_iter * iter,const upb_symtab * s,upb_deftype_t type)4213 void upb_symtab_begin(upb_symtab_iter *iter, const upb_symtab *s,
4214                       upb_deftype_t type) {
4215   upb_strtable_begin(&iter->iter, &s->symtab);
4216   iter->type = type;
4217   advance_to_matching(iter);
4218 }
4219 
upb_symtab_next(upb_symtab_iter * iter)4220 void upb_symtab_next(upb_symtab_iter *iter) {
4221   upb_strtable_next(&iter->iter);
4222   advance_to_matching(iter);
4223 }
4224 
upb_symtab_done(const upb_symtab_iter * iter)4225 bool upb_symtab_done(const upb_symtab_iter *iter) {
4226   return upb_strtable_done(&iter->iter);
4227 }
4228 
upb_symtab_iter_def(const upb_symtab_iter * iter)4229 const upb_def *upb_symtab_iter_def(const upb_symtab_iter *iter) {
4230   return upb_value_getptr(upb_strtable_iter_value(&iter->iter));
4231 }
4232 /*
4233 ** upb_table Implementation
4234 **
4235 ** Implementation is heavily inspired by Lua's ltable.c.
4236 */
4237 
4238 
4239 #include <string.h>
4240 
4241 #define UPB_MAXARRSIZE 16  /* 64k. */
4242 
4243 /* From Chromium. */
4244 #define ARRAY_SIZE(x) \
4245     ((sizeof(x)/sizeof(0[x])) / ((size_t)(!(sizeof(x) % sizeof(0[x])))))
4246 
4247 #ifdef NDEBUG
upb_check_alloc(upb_table * t,upb_alloc * a)4248 static void upb_check_alloc(upb_table *t, upb_alloc *a) {
4249   UPB_UNUSED(t);
4250   UPB_UNUSED(a);
4251 }
4252 #else
upb_check_alloc(upb_table * t,upb_alloc * a)4253 static void upb_check_alloc(upb_table *t, upb_alloc *a) {
4254   assert(t->alloc == a);
4255 }
4256 #endif
4257 
4258 static const double MAX_LOAD = 0.85;
4259 
4260 /* The minimum utilization of the array part of a mixed hash/array table.  This
4261  * is a speed/memory-usage tradeoff (though it's not straightforward because of
4262  * cache effects).  The lower this is, the more memory we'll use. */
4263 static const double MIN_DENSITY = 0.1;
4264 
is_pow2(uint64_t v)4265 bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; }
4266 
log2ceil(uint64_t v)4267 int log2ceil(uint64_t v) {
4268   int ret = 0;
4269   bool pow2 = is_pow2(v);
4270   while (v >>= 1) ret++;
4271   ret = pow2 ? ret : ret + 1;  /* Ceiling. */
4272   return UPB_MIN(UPB_MAXARRSIZE, ret);
4273 }
4274 
upb_strdup(const char * s,upb_alloc * a)4275 char *upb_strdup(const char *s, upb_alloc *a) {
4276   return upb_strdup2(s, strlen(s), a);
4277 }
4278 
upb_strdup2(const char * s,size_t len,upb_alloc * a)4279 char *upb_strdup2(const char *s, size_t len, upb_alloc *a) {
4280   size_t n;
4281   char *p;
4282 
4283   /* Prevent overflow errors. */
4284   if (len == SIZE_MAX) return NULL;
4285   /* Always null-terminate, even if binary data; but don't rely on the input to
4286    * have a null-terminating byte since it may be a raw binary buffer. */
4287   n = len + 1;
4288   p = upb_malloc(a, n);
4289   if (p) {
4290     memcpy(p, s, len);
4291     p[len] = 0;
4292   }
4293   return p;
4294 }
4295 
4296 /* A type to represent the lookup key of either a strtable or an inttable. */
4297 typedef union {
4298   uintptr_t num;
4299   struct {
4300     const char *str;
4301     size_t len;
4302   } str;
4303 } lookupkey_t;
4304 
strkey2(const char * str,size_t len)4305 static lookupkey_t strkey2(const char *str, size_t len) {
4306   lookupkey_t k;
4307   k.str.str = str;
4308   k.str.len = len;
4309   return k;
4310 }
4311 
intkey(uintptr_t key)4312 static lookupkey_t intkey(uintptr_t key) {
4313   lookupkey_t k;
4314   k.num = key;
4315   return k;
4316 }
4317 
4318 typedef uint32_t hashfunc_t(upb_tabkey key);
4319 typedef bool eqlfunc_t(upb_tabkey k1, lookupkey_t k2);
4320 
4321 /* Base table (shared code) ***************************************************/
4322 
4323 /* For when we need to cast away const. */
mutable_entries(upb_table * t)4324 static upb_tabent *mutable_entries(upb_table *t) {
4325   return (upb_tabent*)t->entries;
4326 }
4327 
isfull(upb_table * t)4328 static bool isfull(upb_table *t) {
4329   if (upb_table_size(t) == 0) {
4330     return true;
4331   } else {
4332     return ((double)(t->count + 1) / upb_table_size(t)) > MAX_LOAD;
4333   }
4334 }
4335 
init(upb_table * t,upb_ctype_t ctype,uint8_t size_lg2,upb_alloc * a)4336 static bool init(upb_table *t, upb_ctype_t ctype, uint8_t size_lg2,
4337                  upb_alloc *a) {
4338   size_t bytes;
4339 
4340   t->count = 0;
4341   t->ctype = ctype;
4342   t->size_lg2 = size_lg2;
4343   t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0;
4344 #ifndef NDEBUG
4345   t->alloc = a;
4346 #endif
4347   bytes = upb_table_size(t) * sizeof(upb_tabent);
4348   if (bytes > 0) {
4349     t->entries = upb_malloc(a, bytes);
4350     if (!t->entries) return false;
4351     memset(mutable_entries(t), 0, bytes);
4352   } else {
4353     t->entries = NULL;
4354   }
4355   return true;
4356 }
4357 
uninit(upb_table * t,upb_alloc * a)4358 static void uninit(upb_table *t, upb_alloc *a) {
4359   upb_check_alloc(t, a);
4360   upb_free(a, mutable_entries(t));
4361 }
4362 
emptyent(upb_table * t)4363 static upb_tabent *emptyent(upb_table *t) {
4364   upb_tabent *e = mutable_entries(t) + upb_table_size(t);
4365   while (1) { if (upb_tabent_isempty(--e)) return e; assert(e > t->entries); }
4366 }
4367 
getentry_mutable(upb_table * t,uint32_t hash)4368 static upb_tabent *getentry_mutable(upb_table *t, uint32_t hash) {
4369   return (upb_tabent*)upb_getentry(t, hash);
4370 }
4371 
findentry(const upb_table * t,lookupkey_t key,uint32_t hash,eqlfunc_t * eql)4372 static const upb_tabent *findentry(const upb_table *t, lookupkey_t key,
4373                                    uint32_t hash, eqlfunc_t *eql) {
4374   const upb_tabent *e;
4375 
4376   if (t->size_lg2 == 0) return NULL;
4377   e = upb_getentry(t, hash);
4378   if (upb_tabent_isempty(e)) return NULL;
4379   while (1) {
4380     if (eql(e->key, key)) return e;
4381     if ((e = e->next) == NULL) return NULL;
4382   }
4383 }
4384 
findentry_mutable(upb_table * t,lookupkey_t key,uint32_t hash,eqlfunc_t * eql)4385 static upb_tabent *findentry_mutable(upb_table *t, lookupkey_t key,
4386                                      uint32_t hash, eqlfunc_t *eql) {
4387   return (upb_tabent*)findentry(t, key, hash, eql);
4388 }
4389 
lookup(const upb_table * t,lookupkey_t key,upb_value * v,uint32_t hash,eqlfunc_t * eql)4390 static bool lookup(const upb_table *t, lookupkey_t key, upb_value *v,
4391                    uint32_t hash, eqlfunc_t *eql) {
4392   const upb_tabent *e = findentry(t, key, hash, eql);
4393   if (e) {
4394     if (v) {
4395       _upb_value_setval(v, e->val.val, t->ctype);
4396     }
4397     return true;
4398   } else {
4399     return false;
4400   }
4401 }
4402 
4403 /* The given key must not already exist in the table. */
insert(upb_table * t,lookupkey_t key,upb_tabkey tabkey,upb_value val,uint32_t hash,hashfunc_t * hashfunc,eqlfunc_t * eql)4404 static void insert(upb_table *t, lookupkey_t key, upb_tabkey tabkey,
4405                    upb_value val, uint32_t hash,
4406                    hashfunc_t *hashfunc, eqlfunc_t *eql) {
4407   upb_tabent *mainpos_e;
4408   upb_tabent *our_e;
4409 
4410   UPB_UNUSED(eql);
4411   UPB_UNUSED(key);
4412   assert(findentry(t, key, hash, eql) == NULL);
4413   assert(val.ctype == t->ctype);
4414 
4415   t->count++;
4416   mainpos_e = getentry_mutable(t, hash);
4417   our_e = mainpos_e;
4418 
4419   if (upb_tabent_isempty(mainpos_e)) {
4420     /* Our main position is empty; use it. */
4421     our_e->next = NULL;
4422   } else {
4423     /* Collision. */
4424     upb_tabent *new_e = emptyent(t);
4425     /* Head of collider's chain. */
4426     upb_tabent *chain = getentry_mutable(t, hashfunc(mainpos_e->key));
4427     if (chain == mainpos_e) {
4428       /* Existing ent is in its main posisiton (it has the same hash as us, and
4429        * is the head of our chain).  Insert to new ent and append to this chain. */
4430       new_e->next = mainpos_e->next;
4431       mainpos_e->next = new_e;
4432       our_e = new_e;
4433     } else {
4434       /* Existing ent is not in its main position (it is a node in some other
4435        * chain).  This implies that no existing ent in the table has our hash.
4436        * Evict it (updating its chain) and use its ent for head of our chain. */
4437       *new_e = *mainpos_e;  /* copies next. */
4438       while (chain->next != mainpos_e) {
4439         chain = (upb_tabent*)chain->next;
4440         assert(chain);
4441       }
4442       chain->next = new_e;
4443       our_e = mainpos_e;
4444       our_e->next = NULL;
4445     }
4446   }
4447   our_e->key = tabkey;
4448   our_e->val.val = val.val;
4449   assert(findentry(t, key, hash, eql) == our_e);
4450 }
4451 
rm(upb_table * t,lookupkey_t key,upb_value * val,upb_tabkey * removed,uint32_t hash,eqlfunc_t * eql)4452 static bool rm(upb_table *t, lookupkey_t key, upb_value *val,
4453                upb_tabkey *removed, uint32_t hash, eqlfunc_t *eql) {
4454   upb_tabent *chain = getentry_mutable(t, hash);
4455   if (upb_tabent_isempty(chain)) return false;
4456   if (eql(chain->key, key)) {
4457     /* Element to remove is at the head of its chain. */
4458     t->count--;
4459     if (val) {
4460       _upb_value_setval(val, chain->val.val, t->ctype);
4461     }
4462     if (chain->next) {
4463       upb_tabent *move = (upb_tabent*)chain->next;
4464       *chain = *move;
4465       if (removed) *removed = move->key;
4466       move->key = 0;  /* Make the slot empty. */
4467     } else {
4468       if (removed) *removed = chain->key;
4469       chain->key = 0;  /* Make the slot empty. */
4470     }
4471     return true;
4472   } else {
4473     /* Element to remove is either in a non-head position or not in the
4474      * table. */
4475     while (chain->next && !eql(chain->next->key, key))
4476       chain = (upb_tabent*)chain->next;
4477     if (chain->next) {
4478       /* Found element to remove. */
4479       upb_tabent *rm;
4480 
4481       if (val) {
4482         _upb_value_setval(val, chain->next->val.val, t->ctype);
4483       }
4484       rm = (upb_tabent*)chain->next;
4485       if (removed) *removed = rm->key;
4486       rm->key = 0;
4487       chain->next = rm->next;
4488       t->count--;
4489       return true;
4490     } else {
4491       return false;
4492     }
4493   }
4494 }
4495 
next(const upb_table * t,size_t i)4496 static size_t next(const upb_table *t, size_t i) {
4497   do {
4498     if (++i >= upb_table_size(t))
4499       return SIZE_MAX;
4500   } while(upb_tabent_isempty(&t->entries[i]));
4501 
4502   return i;
4503 }
4504 
begin(const upb_table * t)4505 static size_t begin(const upb_table *t) {
4506   return next(t, -1);
4507 }
4508 
4509 
4510 /* upb_strtable ***************************************************************/
4511 
4512 /* A simple "subclass" of upb_table that only adds a hash function for strings. */
4513 
strcopy(lookupkey_t k2,upb_alloc * a)4514 static upb_tabkey strcopy(lookupkey_t k2, upb_alloc *a) {
4515   char *str = upb_malloc(a, k2.str.len + sizeof(uint32_t) + 1);
4516   if (str == NULL) return 0;
4517   memcpy(str, &k2.str.len, sizeof(uint32_t));
4518   memcpy(str + sizeof(uint32_t), k2.str.str, k2.str.len + 1);
4519   return (uintptr_t)str;
4520 }
4521 
strhash(upb_tabkey key)4522 static uint32_t strhash(upb_tabkey key) {
4523   uint32_t len;
4524   char *str = upb_tabstr(key, &len);
4525   return MurmurHash2(str, len, 0);
4526 }
4527 
streql(upb_tabkey k1,lookupkey_t k2)4528 static bool streql(upb_tabkey k1, lookupkey_t k2) {
4529   uint32_t len;
4530   char *str = upb_tabstr(k1, &len);
4531   return len == k2.str.len && memcmp(str, k2.str.str, len) == 0;
4532 }
4533 
upb_strtable_init2(upb_strtable * t,upb_ctype_t ctype,upb_alloc * a)4534 bool upb_strtable_init2(upb_strtable *t, upb_ctype_t ctype, upb_alloc *a) {
4535   return init(&t->t, ctype, 2, a);
4536 }
4537 
upb_strtable_uninit2(upb_strtable * t,upb_alloc * a)4538 void upb_strtable_uninit2(upb_strtable *t, upb_alloc *a) {
4539   size_t i;
4540   for (i = 0; i < upb_table_size(&t->t); i++)
4541     upb_free(a, (void*)t->t.entries[i].key);
4542   uninit(&t->t, a);
4543 }
4544 
upb_strtable_resize(upb_strtable * t,size_t size_lg2,upb_alloc * a)4545 bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_alloc *a) {
4546   upb_strtable new_table;
4547   upb_strtable_iter i;
4548 
4549   upb_check_alloc(&t->t, a);
4550 
4551   if (!init(&new_table.t, t->t.ctype, size_lg2, a))
4552     return false;
4553   upb_strtable_begin(&i, t);
4554   for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) {
4555     upb_strtable_insert3(
4556         &new_table,
4557         upb_strtable_iter_key(&i),
4558         upb_strtable_iter_keylength(&i),
4559         upb_strtable_iter_value(&i),
4560         a);
4561   }
4562   upb_strtable_uninit2(t, a);
4563   *t = new_table;
4564   return true;
4565 }
4566 
upb_strtable_insert3(upb_strtable * t,const char * k,size_t len,upb_value v,upb_alloc * a)4567 bool upb_strtable_insert3(upb_strtable *t, const char *k, size_t len,
4568                           upb_value v, upb_alloc *a) {
4569   lookupkey_t key;
4570   upb_tabkey tabkey;
4571   uint32_t hash;
4572 
4573   upb_check_alloc(&t->t, a);
4574 
4575   if (isfull(&t->t)) {
4576     /* Need to resize.  New table of double the size, add old elements to it. */
4577     if (!upb_strtable_resize(t, t->t.size_lg2 + 1, a)) {
4578       return false;
4579     }
4580   }
4581 
4582   key = strkey2(k, len);
4583   tabkey = strcopy(key, a);
4584   if (tabkey == 0) return false;
4585 
4586   hash = MurmurHash2(key.str.str, key.str.len, 0);
4587   insert(&t->t, key, tabkey, v, hash, &strhash, &streql);
4588   return true;
4589 }
4590 
upb_strtable_lookup2(const upb_strtable * t,const char * key,size_t len,upb_value * v)4591 bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len,
4592                           upb_value *v) {
4593   uint32_t hash = MurmurHash2(key, len, 0);
4594   return lookup(&t->t, strkey2(key, len), v, hash, &streql);
4595 }
4596 
upb_strtable_remove3(upb_strtable * t,const char * key,size_t len,upb_value * val,upb_alloc * alloc)4597 bool upb_strtable_remove3(upb_strtable *t, const char *key, size_t len,
4598                          upb_value *val, upb_alloc *alloc) {
4599   uint32_t hash = MurmurHash2(key, strlen(key), 0);
4600   upb_tabkey tabkey;
4601   if (rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql)) {
4602     upb_free(alloc, (void*)tabkey);
4603     return true;
4604   } else {
4605     return false;
4606   }
4607 }
4608 
4609 /* Iteration */
4610 
str_tabent(const upb_strtable_iter * i)4611 static const upb_tabent *str_tabent(const upb_strtable_iter *i) {
4612   return &i->t->t.entries[i->index];
4613 }
4614 
upb_strtable_begin(upb_strtable_iter * i,const upb_strtable * t)4615 void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t) {
4616   i->t = t;
4617   i->index = begin(&t->t);
4618 }
4619 
upb_strtable_next(upb_strtable_iter * i)4620 void upb_strtable_next(upb_strtable_iter *i) {
4621   i->index = next(&i->t->t, i->index);
4622 }
4623 
upb_strtable_done(const upb_strtable_iter * i)4624 bool upb_strtable_done(const upb_strtable_iter *i) {
4625   return i->index >= upb_table_size(&i->t->t) ||
4626          upb_tabent_isempty(str_tabent(i));
4627 }
4628 
upb_strtable_iter_key(const upb_strtable_iter * i)4629 const char *upb_strtable_iter_key(const upb_strtable_iter *i) {
4630   assert(!upb_strtable_done(i));
4631   return upb_tabstr(str_tabent(i)->key, NULL);
4632 }
4633 
upb_strtable_iter_keylength(const upb_strtable_iter * i)4634 size_t upb_strtable_iter_keylength(const upb_strtable_iter *i) {
4635   uint32_t len;
4636   assert(!upb_strtable_done(i));
4637   upb_tabstr(str_tabent(i)->key, &len);
4638   return len;
4639 }
4640 
upb_strtable_iter_value(const upb_strtable_iter * i)4641 upb_value upb_strtable_iter_value(const upb_strtable_iter *i) {
4642   assert(!upb_strtable_done(i));
4643   return _upb_value_val(str_tabent(i)->val.val, i->t->t.ctype);
4644 }
4645 
upb_strtable_iter_setdone(upb_strtable_iter * i)4646 void upb_strtable_iter_setdone(upb_strtable_iter *i) {
4647   i->index = SIZE_MAX;
4648 }
4649 
upb_strtable_iter_isequal(const upb_strtable_iter * i1,const upb_strtable_iter * i2)4650 bool upb_strtable_iter_isequal(const upb_strtable_iter *i1,
4651                                const upb_strtable_iter *i2) {
4652   if (upb_strtable_done(i1) && upb_strtable_done(i2))
4653     return true;
4654   return i1->t == i2->t && i1->index == i2->index;
4655 }
4656 
4657 
4658 /* upb_inttable ***************************************************************/
4659 
4660 /* For inttables we use a hybrid structure where small keys are kept in an
4661  * array and large keys are put in the hash table. */
4662 
inthash(upb_tabkey key)4663 static uint32_t inthash(upb_tabkey key) { return upb_inthash(key); }
4664 
inteql(upb_tabkey k1,lookupkey_t k2)4665 static bool inteql(upb_tabkey k1, lookupkey_t k2) {
4666   return k1 == k2.num;
4667 }
4668 
mutable_array(upb_inttable * t)4669 static upb_tabval *mutable_array(upb_inttable *t) {
4670   return (upb_tabval*)t->array;
4671 }
4672 
inttable_val(upb_inttable * t,uintptr_t key)4673 static upb_tabval *inttable_val(upb_inttable *t, uintptr_t key) {
4674   if (key < t->array_size) {
4675     return upb_arrhas(t->array[key]) ? &(mutable_array(t)[key]) : NULL;
4676   } else {
4677     upb_tabent *e =
4678         findentry_mutable(&t->t, intkey(key), upb_inthash(key), &inteql);
4679     return e ? &e->val : NULL;
4680   }
4681 }
4682 
inttable_val_const(const upb_inttable * t,uintptr_t key)4683 static const upb_tabval *inttable_val_const(const upb_inttable *t,
4684                                             uintptr_t key) {
4685   return inttable_val((upb_inttable*)t, key);
4686 }
4687 
upb_inttable_count(const upb_inttable * t)4688 size_t upb_inttable_count(const upb_inttable *t) {
4689   return t->t.count + t->array_count;
4690 }
4691 
check(upb_inttable * t)4692 static void check(upb_inttable *t) {
4693   UPB_UNUSED(t);
4694 #if defined(UPB_DEBUG_TABLE) && !defined(NDEBUG)
4695   {
4696     /* This check is very expensive (makes inserts/deletes O(N)). */
4697     size_t count = 0;
4698     upb_inttable_iter i;
4699     upb_inttable_begin(&i, t);
4700     for(; !upb_inttable_done(&i); upb_inttable_next(&i), count++) {
4701       assert(upb_inttable_lookup(t, upb_inttable_iter_key(&i), NULL));
4702     }
4703     assert(count == upb_inttable_count(t));
4704   }
4705 #endif
4706 }
4707 
upb_inttable_sizedinit(upb_inttable * t,upb_ctype_t ctype,size_t asize,int hsize_lg2,upb_alloc * a)4708 bool upb_inttable_sizedinit(upb_inttable *t, upb_ctype_t ctype,
4709                             size_t asize, int hsize_lg2, upb_alloc *a) {
4710   size_t array_bytes;
4711 
4712   if (!init(&t->t, ctype, hsize_lg2, a)) return false;
4713   /* Always make the array part at least 1 long, so that we know key 0
4714    * won't be in the hash part, which simplifies things. */
4715   t->array_size = UPB_MAX(1, asize);
4716   t->array_count = 0;
4717   array_bytes = t->array_size * sizeof(upb_value);
4718   t->array = upb_malloc(a, array_bytes);
4719   if (!t->array) {
4720     uninit(&t->t, a);
4721     return false;
4722   }
4723   memset(mutable_array(t), 0xff, array_bytes);
4724   check(t);
4725   return true;
4726 }
4727 
upb_inttable_init2(upb_inttable * t,upb_ctype_t ctype,upb_alloc * a)4728 bool upb_inttable_init2(upb_inttable *t, upb_ctype_t ctype, upb_alloc *a) {
4729   return upb_inttable_sizedinit(t, ctype, 0, 4, a);
4730 }
4731 
upb_inttable_uninit2(upb_inttable * t,upb_alloc * a)4732 void upb_inttable_uninit2(upb_inttable *t, upb_alloc *a) {
4733   uninit(&t->t, a);
4734   upb_free(a, mutable_array(t));
4735 }
4736 
upb_inttable_insert2(upb_inttable * t,uintptr_t key,upb_value val,upb_alloc * a)4737 bool upb_inttable_insert2(upb_inttable *t, uintptr_t key, upb_value val,
4738                           upb_alloc *a) {
4739   upb_tabval tabval;
4740   tabval.val = val.val;
4741   UPB_UNUSED(tabval);
4742   assert(upb_arrhas(tabval));  /* This will reject (uint64_t)-1.  Fix this. */
4743 
4744   upb_check_alloc(&t->t, a);
4745 
4746   if (key < t->array_size) {
4747     assert(!upb_arrhas(t->array[key]));
4748     t->array_count++;
4749     mutable_array(t)[key].val = val.val;
4750   } else {
4751     if (isfull(&t->t)) {
4752       /* Need to resize the hash part, but we re-use the array part. */
4753       size_t i;
4754       upb_table new_table;
4755 
4756       if (!init(&new_table, t->t.ctype, t->t.size_lg2 + 1, a)) {
4757         return false;
4758       }
4759 
4760       for (i = begin(&t->t); i < upb_table_size(&t->t); i = next(&t->t, i)) {
4761         const upb_tabent *e = &t->t.entries[i];
4762         uint32_t hash;
4763         upb_value v;
4764 
4765         _upb_value_setval(&v, e->val.val, t->t.ctype);
4766         hash = upb_inthash(e->key);
4767         insert(&new_table, intkey(e->key), e->key, v, hash, &inthash, &inteql);
4768       }
4769 
4770       assert(t->t.count == new_table.count);
4771 
4772       uninit(&t->t, a);
4773       t->t = new_table;
4774     }
4775     insert(&t->t, intkey(key), key, val, upb_inthash(key), &inthash, &inteql);
4776   }
4777   check(t);
4778   return true;
4779 }
4780 
upb_inttable_lookup(const upb_inttable * t,uintptr_t key,upb_value * v)4781 bool upb_inttable_lookup(const upb_inttable *t, uintptr_t key, upb_value *v) {
4782   const upb_tabval *table_v = inttable_val_const(t, key);
4783   if (!table_v) return false;
4784   if (v) _upb_value_setval(v, table_v->val, t->t.ctype);
4785   return true;
4786 }
4787 
upb_inttable_replace(upb_inttable * t,uintptr_t key,upb_value val)4788 bool upb_inttable_replace(upb_inttable *t, uintptr_t key, upb_value val) {
4789   upb_tabval *table_v = inttable_val(t, key);
4790   if (!table_v) return false;
4791   table_v->val = val.val;
4792   return true;
4793 }
4794 
upb_inttable_remove(upb_inttable * t,uintptr_t key,upb_value * val)4795 bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) {
4796   bool success;
4797   if (key < t->array_size) {
4798     if (upb_arrhas(t->array[key])) {
4799       upb_tabval empty = UPB_TABVALUE_EMPTY_INIT;
4800       t->array_count--;
4801       if (val) {
4802         _upb_value_setval(val, t->array[key].val, t->t.ctype);
4803       }
4804       mutable_array(t)[key] = empty;
4805       success = true;
4806     } else {
4807       success = false;
4808     }
4809   } else {
4810     upb_tabkey removed;
4811     uint32_t hash = upb_inthash(key);
4812     success = rm(&t->t, intkey(key), val, &removed, hash, &inteql);
4813   }
4814   check(t);
4815   return success;
4816 }
4817 
upb_inttable_push2(upb_inttable * t,upb_value val,upb_alloc * a)4818 bool upb_inttable_push2(upb_inttable *t, upb_value val, upb_alloc *a) {
4819   upb_check_alloc(&t->t, a);
4820   return upb_inttable_insert2(t, upb_inttable_count(t), val, a);
4821 }
4822 
upb_inttable_pop(upb_inttable * t)4823 upb_value upb_inttable_pop(upb_inttable *t) {
4824   upb_value val;
4825   bool ok = upb_inttable_remove(t, upb_inttable_count(t) - 1, &val);
4826   UPB_ASSERT_VAR(ok, ok);
4827   return val;
4828 }
4829 
upb_inttable_insertptr2(upb_inttable * t,const void * key,upb_value val,upb_alloc * a)4830 bool upb_inttable_insertptr2(upb_inttable *t, const void *key, upb_value val,
4831                              upb_alloc *a) {
4832   upb_check_alloc(&t->t, a);
4833   return upb_inttable_insert2(t, (uintptr_t)key, val, a);
4834 }
4835 
upb_inttable_lookupptr(const upb_inttable * t,const void * key,upb_value * v)4836 bool upb_inttable_lookupptr(const upb_inttable *t, const void *key,
4837                             upb_value *v) {
4838   return upb_inttable_lookup(t, (uintptr_t)key, v);
4839 }
4840 
upb_inttable_removeptr(upb_inttable * t,const void * key,upb_value * val)4841 bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val) {
4842   return upb_inttable_remove(t, (uintptr_t)key, val);
4843 }
4844 
upb_inttable_compact2(upb_inttable * t,upb_alloc * a)4845 void upb_inttable_compact2(upb_inttable *t, upb_alloc *a) {
4846   /* A power-of-two histogram of the table keys. */
4847   size_t counts[UPB_MAXARRSIZE + 1] = {0};
4848 
4849   /* The max key in each bucket. */
4850   uintptr_t max[UPB_MAXARRSIZE + 1] = {0};
4851 
4852   upb_inttable_iter i;
4853   size_t arr_count;
4854   int size_lg2;
4855   upb_inttable new_t;
4856 
4857   upb_check_alloc(&t->t, a);
4858 
4859   upb_inttable_begin(&i, t);
4860   for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
4861     uintptr_t key = upb_inttable_iter_key(&i);
4862     int bucket = log2ceil(key);
4863     max[bucket] = UPB_MAX(max[bucket], key);
4864     counts[bucket]++;
4865   }
4866 
4867   /* Find the largest power of two that satisfies the MIN_DENSITY
4868    * definition (while actually having some keys). */
4869   arr_count = upb_inttable_count(t);
4870 
4871   for (size_lg2 = ARRAY_SIZE(counts) - 1; size_lg2 > 0; size_lg2--) {
4872     if (counts[size_lg2] == 0) {
4873       /* We can halve again without losing any entries. */
4874       continue;
4875     } else if (arr_count >= (1 << size_lg2) * MIN_DENSITY) {
4876       break;
4877     }
4878 
4879     arr_count -= counts[size_lg2];
4880   }
4881 
4882   assert(arr_count <= upb_inttable_count(t));
4883 
4884   {
4885     /* Insert all elements into new, perfectly-sized table. */
4886     size_t arr_size = max[size_lg2] + 1;  /* +1 so arr[max] will fit. */
4887     size_t hash_count = upb_inttable_count(t) - arr_count;
4888     size_t hash_size = hash_count ? (hash_count / MAX_LOAD) + 1 : 0;
4889     size_t hashsize_lg2 = log2ceil(hash_size);
4890 
4891     upb_inttable_sizedinit(&new_t, t->t.ctype, arr_size, hashsize_lg2, a);
4892     upb_inttable_begin(&i, t);
4893     for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
4894       uintptr_t k = upb_inttable_iter_key(&i);
4895       upb_inttable_insert2(&new_t, k, upb_inttable_iter_value(&i), a);
4896     }
4897     assert(new_t.array_size == arr_size);
4898     assert(new_t.t.size_lg2 == hashsize_lg2);
4899   }
4900   upb_inttable_uninit2(t, a);
4901   *t = new_t;
4902 }
4903 
4904 /* Iteration. */
4905 
int_tabent(const upb_inttable_iter * i)4906 static const upb_tabent *int_tabent(const upb_inttable_iter *i) {
4907   assert(!i->array_part);
4908   return &i->t->t.entries[i->index];
4909 }
4910 
int_arrent(const upb_inttable_iter * i)4911 static upb_tabval int_arrent(const upb_inttable_iter *i) {
4912   assert(i->array_part);
4913   return i->t->array[i->index];
4914 }
4915 
upb_inttable_begin(upb_inttable_iter * i,const upb_inttable * t)4916 void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t) {
4917   i->t = t;
4918   i->index = -1;
4919   i->array_part = true;
4920   upb_inttable_next(i);
4921 }
4922 
upb_inttable_next(upb_inttable_iter * iter)4923 void upb_inttable_next(upb_inttable_iter *iter) {
4924   const upb_inttable *t = iter->t;
4925   if (iter->array_part) {
4926     while (++iter->index < t->array_size) {
4927       if (upb_arrhas(int_arrent(iter))) {
4928         return;
4929       }
4930     }
4931     iter->array_part = false;
4932     iter->index = begin(&t->t);
4933   } else {
4934     iter->index = next(&t->t, iter->index);
4935   }
4936 }
4937 
upb_inttable_done(const upb_inttable_iter * i)4938 bool upb_inttable_done(const upb_inttable_iter *i) {
4939   if (i->array_part) {
4940     return i->index >= i->t->array_size ||
4941            !upb_arrhas(int_arrent(i));
4942   } else {
4943     return i->index >= upb_table_size(&i->t->t) ||
4944            upb_tabent_isempty(int_tabent(i));
4945   }
4946 }
4947 
upb_inttable_iter_key(const upb_inttable_iter * i)4948 uintptr_t upb_inttable_iter_key(const upb_inttable_iter *i) {
4949   assert(!upb_inttable_done(i));
4950   return i->array_part ? i->index : int_tabent(i)->key;
4951 }
4952 
upb_inttable_iter_value(const upb_inttable_iter * i)4953 upb_value upb_inttable_iter_value(const upb_inttable_iter *i) {
4954   assert(!upb_inttable_done(i));
4955   return _upb_value_val(
4956       i->array_part ? i->t->array[i->index].val : int_tabent(i)->val.val,
4957       i->t->t.ctype);
4958 }
4959 
upb_inttable_iter_setdone(upb_inttable_iter * i)4960 void upb_inttable_iter_setdone(upb_inttable_iter *i) {
4961   i->index = SIZE_MAX;
4962   i->array_part = false;
4963 }
4964 
upb_inttable_iter_isequal(const upb_inttable_iter * i1,const upb_inttable_iter * i2)4965 bool upb_inttable_iter_isequal(const upb_inttable_iter *i1,
4966                                           const upb_inttable_iter *i2) {
4967   if (upb_inttable_done(i1) && upb_inttable_done(i2))
4968     return true;
4969   return i1->t == i2->t && i1->index == i2->index &&
4970          i1->array_part == i2->array_part;
4971 }
4972 
4973 #ifdef UPB_UNALIGNED_READS_OK
4974 /* -----------------------------------------------------------------------------
4975  * MurmurHash2, by Austin Appleby (released as public domain).
4976  * Reformatted and C99-ified by Joshua Haberman.
4977  * Note - This code makes a few assumptions about how your machine behaves -
4978  *   1. We can read a 4-byte value from any address without crashing
4979  *   2. sizeof(int) == 4 (in upb this limitation is removed by using uint32_t
4980  * And it has a few limitations -
4981  *   1. It will not work incrementally.
4982  *   2. It will not produce the same results on little-endian and big-endian
4983  *      machines. */
MurmurHash2(const void * key,size_t len,uint32_t seed)4984 uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) {
4985   /* 'm' and 'r' are mixing constants generated offline.
4986    * They're not really 'magic', they just happen to work well. */
4987   const uint32_t m = 0x5bd1e995;
4988   const int32_t r = 24;
4989 
4990   /* Initialize the hash to a 'random' value */
4991   uint32_t h = seed ^ len;
4992 
4993   /* Mix 4 bytes at a time into the hash */
4994   const uint8_t * data = (const uint8_t *)key;
4995   while(len >= 4) {
4996     uint32_t k = *(uint32_t *)data;
4997 
4998     k *= m;
4999     k ^= k >> r;
5000     k *= m;
5001 
5002     h *= m;
5003     h ^= k;
5004 
5005     data += 4;
5006     len -= 4;
5007   }
5008 
5009   /* Handle the last few bytes of the input array */
5010   switch(len) {
5011     case 3: h ^= data[2] << 16;
5012     case 2: h ^= data[1] << 8;
5013     case 1: h ^= data[0]; h *= m;
5014   };
5015 
5016   /* Do a few final mixes of the hash to ensure the last few
5017    * bytes are well-incorporated. */
5018   h ^= h >> 13;
5019   h *= m;
5020   h ^= h >> 15;
5021 
5022   return h;
5023 }
5024 
5025 #else /* !UPB_UNALIGNED_READS_OK */
5026 
5027 /* -----------------------------------------------------------------------------
5028  * MurmurHashAligned2, by Austin Appleby
5029  * Same algorithm as MurmurHash2, but only does aligned reads - should be safer
5030  * on certain platforms.
5031  * Performance will be lower than MurmurHash2 */
5032 
5033 #define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
5034 
MurmurHash2(const void * key,size_t len,uint32_t seed)5035 uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) {
5036   const uint32_t m = 0x5bd1e995;
5037   const int32_t r = 24;
5038   const uint8_t * data = (const uint8_t *)key;
5039   uint32_t h = seed ^ len;
5040   uint8_t align = (uintptr_t)data & 3;
5041 
5042   if(align && (len >= 4)) {
5043     /* Pre-load the temp registers */
5044     uint32_t t = 0, d = 0;
5045     int32_t sl;
5046     int32_t sr;
5047 
5048     switch(align) {
5049       case 1: t |= data[2] << 16;
5050       case 2: t |= data[1] << 8;
5051       case 3: t |= data[0];
5052     }
5053 
5054     t <<= (8 * align);
5055 
5056     data += 4-align;
5057     len -= 4-align;
5058 
5059     sl = 8 * (4-align);
5060     sr = 8 * align;
5061 
5062     /* Mix */
5063 
5064     while(len >= 4) {
5065       uint32_t k;
5066 
5067       d = *(uint32_t *)data;
5068       t = (t >> sr) | (d << sl);
5069 
5070       k = t;
5071 
5072       MIX(h,k,m);
5073 
5074       t = d;
5075 
5076       data += 4;
5077       len -= 4;
5078     }
5079 
5080     /* Handle leftover data in temp registers */
5081 
5082     d = 0;
5083 
5084     if(len >= align) {
5085       uint32_t k;
5086 
5087       switch(align) {
5088         case 3: d |= data[2] << 16;
5089         case 2: d |= data[1] << 8;
5090         case 1: d |= data[0];
5091       }
5092 
5093       k = (t >> sr) | (d << sl);
5094       MIX(h,k,m);
5095 
5096       data += align;
5097       len -= align;
5098 
5099       /* ----------
5100        * Handle tail bytes */
5101 
5102       switch(len) {
5103         case 3: h ^= data[2] << 16;
5104         case 2: h ^= data[1] << 8;
5105         case 1: h ^= data[0]; h *= m;
5106       };
5107     } else {
5108       switch(len) {
5109         case 3: d |= data[2] << 16;
5110         case 2: d |= data[1] << 8;
5111         case 1: d |= data[0];
5112         case 0: h ^= (t >> sr) | (d << sl); h *= m;
5113       }
5114     }
5115 
5116     h ^= h >> 13;
5117     h *= m;
5118     h ^= h >> 15;
5119 
5120     return h;
5121   } else {
5122     while(len >= 4) {
5123       uint32_t k = *(uint32_t *)data;
5124 
5125       MIX(h,k,m);
5126 
5127       data += 4;
5128       len -= 4;
5129     }
5130 
5131     /* ----------
5132      * Handle tail bytes */
5133 
5134     switch(len) {
5135       case 3: h ^= data[2] << 16;
5136       case 2: h ^= data[1] << 8;
5137       case 1: h ^= data[0]; h *= m;
5138     };
5139 
5140     h ^= h >> 13;
5141     h *= m;
5142     h ^= h >> 15;
5143 
5144     return h;
5145   }
5146 }
5147 #undef MIX
5148 
5149 #endif /* UPB_UNALIGNED_READS_OK */
5150 
5151 #include <errno.h>
5152 #include <stdarg.h>
5153 #include <stddef.h>
5154 #include <stdint.h>
5155 #include <stdio.h>
5156 #include <stdlib.h>
5157 #include <string.h>
5158 
upb_dumptostderr(void * closure,const upb_status * status)5159 bool upb_dumptostderr(void *closure, const upb_status* status) {
5160   UPB_UNUSED(closure);
5161   fprintf(stderr, "%s\n", upb_status_errmsg(status));
5162   return false;
5163 }
5164 
5165 /* Guarantee null-termination and provide ellipsis truncation.
5166  * It may be tempting to "optimize" this by initializing these final
5167  * four bytes up-front and then being careful never to overwrite them,
5168  * this is safer and simpler. */
nullz(upb_status * status)5169 static void nullz(upb_status *status) {
5170   const char *ellipsis = "...";
5171   size_t len = strlen(ellipsis);
5172   assert(sizeof(status->msg) > len);
5173   memcpy(status->msg + sizeof(status->msg) - len, ellipsis, len);
5174 }
5175 
5176 
5177 /* upb_upberr *****************************************************************/
5178 
5179 upb_errorspace upb_upberr = {"upb error"};
5180 
upb_upberr_setoom(upb_status * status)5181 void upb_upberr_setoom(upb_status *status) {
5182   status->error_space_ = &upb_upberr;
5183   upb_status_seterrmsg(status, "Out of memory");
5184 }
5185 
5186 
5187 /* upb_status *****************************************************************/
5188 
upb_status_clear(upb_status * status)5189 void upb_status_clear(upb_status *status) {
5190   if (!status) return;
5191   status->ok_ = true;
5192   status->code_ = 0;
5193   status->msg[0] = '\0';
5194 }
5195 
upb_ok(const upb_status * status)5196 bool upb_ok(const upb_status *status) { return status->ok_; }
5197 
upb_status_errspace(const upb_status * status)5198 upb_errorspace *upb_status_errspace(const upb_status *status) {
5199   return status->error_space_;
5200 }
5201 
upb_status_errcode(const upb_status * status)5202 int upb_status_errcode(const upb_status *status) { return status->code_; }
5203 
upb_status_errmsg(const upb_status * status)5204 const char *upb_status_errmsg(const upb_status *status) { return status->msg; }
5205 
upb_status_seterrmsg(upb_status * status,const char * msg)5206 void upb_status_seterrmsg(upb_status *status, const char *msg) {
5207   if (!status) return;
5208   status->ok_ = false;
5209   strncpy(status->msg, msg, sizeof(status->msg));
5210   nullz(status);
5211 }
5212 
upb_status_seterrf(upb_status * status,const char * fmt,...)5213 void upb_status_seterrf(upb_status *status, const char *fmt, ...) {
5214   va_list args;
5215   va_start(args, fmt);
5216   upb_status_vseterrf(status, fmt, args);
5217   va_end(args);
5218 }
5219 
upb_status_vseterrf(upb_status * status,const char * fmt,va_list args)5220 void upb_status_vseterrf(upb_status *status, const char *fmt, va_list args) {
5221   if (!status) return;
5222   status->ok_ = false;
5223   _upb_vsnprintf(status->msg, sizeof(status->msg), fmt, args);
5224   nullz(status);
5225 }
5226 
upb_status_copy(upb_status * to,const upb_status * from)5227 void upb_status_copy(upb_status *to, const upb_status *from) {
5228   if (!to) return;
5229   *to = *from;
5230 }
5231 
5232 
5233 /* upb_alloc ******************************************************************/
5234 
upb_global_allocfunc(upb_alloc * alloc,void * ptr,size_t oldsize,size_t size)5235 static void *upb_global_allocfunc(upb_alloc *alloc, void *ptr, size_t oldsize,
5236                                   size_t size) {
5237   UPB_UNUSED(alloc);
5238   UPB_UNUSED(oldsize);
5239   if (size == 0) {
5240     free(ptr);
5241     return NULL;
5242   } else {
5243     return realloc(ptr, size);
5244   }
5245 }
5246 
5247 upb_alloc upb_alloc_global = {&upb_global_allocfunc};
5248 
5249 
5250 /* upb_arena ******************************************************************/
5251 
5252 /* Be conservative and choose 16 in case anyone is using SSE. */
5253 static const size_t maxalign = 16;
5254 
align_up(size_t size)5255 static size_t align_up(size_t size) {
5256   return ((size + maxalign - 1) / maxalign) * maxalign;
5257 }
5258 
5259 typedef struct mem_block {
5260   struct mem_block *next;
5261   size_t size;
5262   size_t used;
5263   bool owned;
5264   /* Data follows. */
5265 } mem_block;
5266 
5267 typedef struct cleanup_ent {
5268   struct cleanup_ent *next;
5269   upb_cleanup_func *cleanup;
5270   void *ud;
5271 } cleanup_ent;
5272 
upb_arena_addblock(upb_arena * a,void * ptr,size_t size,bool owned)5273 static void upb_arena_addblock(upb_arena *a, void *ptr, size_t size,
5274                                bool owned) {
5275   mem_block *block = ptr;
5276 
5277   block->next = a->block_head;
5278   block->size = size;
5279   block->used = align_up(sizeof(mem_block));
5280   block->owned = owned;
5281 
5282   a->block_head = block;
5283 
5284   /* TODO(haberman): ASAN poison. */
5285 }
5286 
5287 
upb_arena_allocblock(upb_arena * a,size_t size)5288 static mem_block *upb_arena_allocblock(upb_arena *a, size_t size) {
5289   size_t block_size = UPB_MAX(size, a->next_block_size) + sizeof(mem_block);
5290   mem_block *block = upb_malloc(a->block_alloc, block_size);
5291 
5292   if (!block) {
5293     return NULL;
5294   }
5295 
5296   upb_arena_addblock(a, block, block_size, true);
5297   a->next_block_size = UPB_MIN(block_size * 2, a->max_block_size);
5298 
5299   return block;
5300 }
5301 
upb_arena_doalloc(upb_alloc * alloc,void * ptr,size_t oldsize,size_t size)5302 static void *upb_arena_doalloc(upb_alloc *alloc, void *ptr, size_t oldsize,
5303                                size_t size) {
5304   upb_arena *a = (upb_arena*)alloc;  /* upb_alloc is initial member. */
5305   mem_block *block = a->block_head;
5306   void *ret;
5307 
5308   if (size == 0) {
5309     return NULL;  /* We are an arena, don't need individual frees. */
5310   }
5311 
5312   size = align_up(size);
5313 
5314   /* TODO(haberman): special-case if this is a realloc of the last alloc? */
5315 
5316   if (!block || block->size - block->used < size) {
5317     /* Slow path: have to allocate a new block. */
5318     block = upb_arena_allocblock(a, size);
5319 
5320     if (!block) {
5321       return NULL;  /* Out of memory. */
5322     }
5323   }
5324 
5325   ret = (char*)block + block->used;
5326   block->used += size;
5327 
5328   if (oldsize > 0) {
5329     memcpy(ret, ptr, oldsize);  /* Preserve existing data. */
5330   }
5331 
5332   /* TODO(haberman): ASAN unpoison. */
5333 
5334   a->bytes_allocated += size;
5335   return ret;
5336 }
5337 
5338 /* Public Arena API ***********************************************************/
5339 
upb_arena_init(upb_arena * a)5340 void upb_arena_init(upb_arena *a) {
5341   a->alloc.func = &upb_arena_doalloc;
5342   a->block_alloc = &upb_alloc_global;
5343   a->bytes_allocated = 0;
5344   a->next_block_size = 256;
5345   a->max_block_size = 16384;
5346   a->cleanup_head = NULL;
5347   a->block_head = NULL;
5348 }
5349 
upb_arena_init2(upb_arena * a,void * mem,size_t size,upb_alloc * alloc)5350 void upb_arena_init2(upb_arena *a, void *mem, size_t size, upb_alloc *alloc) {
5351   upb_arena_init(a);
5352 
5353   if (size > sizeof(mem_block)) {
5354     upb_arena_addblock(a, mem, size, false);
5355   }
5356 
5357   if (alloc) {
5358     a->block_alloc = alloc;
5359   }
5360 }
5361 
upb_arena_uninit(upb_arena * a)5362 void upb_arena_uninit(upb_arena *a) {
5363   cleanup_ent *ent = a->cleanup_head;
5364   mem_block *block = a->block_head;
5365 
5366   while (ent) {
5367     ent->cleanup(ent->ud);
5368     ent = ent->next;
5369   }
5370 
5371   /* Must do this after running cleanup functions, because this will delete
5372    * the memory we store our cleanup entries in! */
5373   while (block) {
5374     mem_block *next = block->next;
5375 
5376     if (block->owned) {
5377       upb_free(a->block_alloc, block);
5378     }
5379 
5380     block = next;
5381   }
5382 }
5383 
upb_arena_addcleanup(upb_arena * a,upb_cleanup_func * func,void * ud)5384 bool upb_arena_addcleanup(upb_arena *a, upb_cleanup_func *func, void *ud) {
5385   cleanup_ent *ent = upb_malloc(&a->alloc, sizeof(cleanup_ent));
5386   if (!ent) {
5387     return false;  /* Out of memory. */
5388   }
5389 
5390   ent->cleanup = func;
5391   ent->ud = ud;
5392   ent->next = a->cleanup_head;
5393   a->cleanup_head = ent;
5394 
5395   return true;
5396 }
5397 
upb_arena_bytesallocated(const upb_arena * a)5398 size_t upb_arena_bytesallocated(const upb_arena *a) {
5399   return a->bytes_allocated;
5400 }
5401 
5402 
5403 /* Standard error functions ***************************************************/
5404 
default_err(void * ud,const upb_status * status)5405 static bool default_err(void *ud, const upb_status *status) {
5406   UPB_UNUSED(ud);
5407   UPB_UNUSED(status);
5408   return false;
5409 }
5410 
write_err_to(void * ud,const upb_status * status)5411 static bool write_err_to(void *ud, const upb_status *status) {
5412   upb_status *copy_to = ud;
5413   upb_status_copy(copy_to, status);
5414   return false;
5415 }
5416 
5417 
5418 /* upb_env ********************************************************************/
5419 
upb_env_initonly(upb_env * e)5420 void upb_env_initonly(upb_env *e) {
5421   e->ok_ = true;
5422   e->error_func_ = &default_err;
5423   e->error_ud_ = NULL;
5424 }
5425 
upb_env_init(upb_env * e)5426 void upb_env_init(upb_env *e) {
5427   upb_arena_init(&e->arena_);
5428   upb_env_initonly(e);
5429 }
5430 
upb_env_init2(upb_env * e,void * mem,size_t n,upb_alloc * alloc)5431 void upb_env_init2(upb_env *e, void *mem, size_t n, upb_alloc *alloc) {
5432   upb_arena_init2(&e->arena_, mem, n, alloc);
5433   upb_env_initonly(e);
5434 }
5435 
upb_env_uninit(upb_env * e)5436 void upb_env_uninit(upb_env *e) {
5437   upb_arena_uninit(&e->arena_);
5438 }
5439 
upb_env_seterrorfunc(upb_env * e,upb_error_func * func,void * ud)5440 void upb_env_seterrorfunc(upb_env *e, upb_error_func *func, void *ud) {
5441   e->error_func_ = func;
5442   e->error_ud_ = ud;
5443 }
5444 
upb_env_reporterrorsto(upb_env * e,upb_status * s)5445 void upb_env_reporterrorsto(upb_env *e, upb_status *s) {
5446   e->error_func_ = &write_err_to;
5447   e->error_ud_ = s;
5448 }
5449 
upb_env_reporterror(upb_env * e,const upb_status * status)5450 bool upb_env_reporterror(upb_env *e, const upb_status *status) {
5451   e->ok_ = false;
5452   return e->error_func_(e->error_ud_, status);
5453 }
5454 
upb_env_malloc(upb_env * e,size_t size)5455 void *upb_env_malloc(upb_env *e, size_t size) {
5456   return upb_malloc(&e->arena_.alloc, size);
5457 }
5458 
upb_env_realloc(upb_env * e,void * ptr,size_t oldsize,size_t size)5459 void *upb_env_realloc(upb_env *e, void *ptr, size_t oldsize, size_t size) {
5460   return upb_realloc(&e->arena_.alloc, ptr, oldsize, size);
5461 }
5462 
upb_env_free(upb_env * e,void * ptr)5463 void upb_env_free(upb_env *e, void *ptr) {
5464   upb_free(&e->arena_.alloc, ptr);
5465 }
5466 
upb_env_addcleanup(upb_env * e,upb_cleanup_func * func,void * ud)5467 bool upb_env_addcleanup(upb_env *e, upb_cleanup_func *func, void *ud) {
5468   return upb_arena_addcleanup(&e->arena_, func, ud);
5469 }
5470 
upb_env_bytesallocated(const upb_env * e)5471 size_t upb_env_bytesallocated(const upb_env *e) {
5472   return upb_arena_bytesallocated(&e->arena_);
5473 }
5474 /* This file was generated by upbc (the upb compiler) from the input
5475  * file:
5476  *
5477  *     upb/descriptor/descriptor.proto
5478  *
5479  * Do not edit -- your changes will be discarded when the file is
5480  * regenerated. */
5481 
5482 #include <assert.h>
5483 
5484 
5485 static const upb_msgdef msgs[22];
5486 static const upb_fielddef fields[105];
5487 static const upb_enumdef enums[5];
5488 static const upb_tabent strentries[236];
5489 static const upb_tabent intentries[18];
5490 static const upb_tabval arrays[184];
5491 
5492 #ifdef UPB_DEBUG_REFS
5493 static upb_inttable reftables[264];
5494 #endif
5495 
5496 static const upb_msgdef msgs[22] = {
5497   UPB_MSGDEF_INIT("google.protobuf.DescriptorProto", 40, 8, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[0], 11, 10), UPB_STRTABLE_INIT(10, 15, UPB_CTYPE_PTR, 4, &strentries[0]), false, UPB_SYNTAX_PROTO2, &reftables[0], &reftables[1]),
5498   UPB_MSGDEF_INIT("google.protobuf.DescriptorProto.ExtensionRange", 4, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[11], 3, 2), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[16]), false, UPB_SYNTAX_PROTO2, &reftables[2], &reftables[3]),
5499   UPB_MSGDEF_INIT("google.protobuf.DescriptorProto.ReservedRange", 4, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[14], 3, 2), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[20]), false, UPB_SYNTAX_PROTO2, &reftables[4], &reftables[5]),
5500   UPB_MSGDEF_INIT("google.protobuf.EnumDescriptorProto", 11, 2, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[17], 4, 3), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[24]), false, UPB_SYNTAX_PROTO2, &reftables[6], &reftables[7]),
5501   UPB_MSGDEF_INIT("google.protobuf.EnumOptions", 8, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[0], &arrays[21], 4, 2), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[28]), false, UPB_SYNTAX_PROTO2, &reftables[8], &reftables[9]),
5502   UPB_MSGDEF_INIT("google.protobuf.EnumValueDescriptorProto", 8, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[25], 4, 3), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[32]), false, UPB_SYNTAX_PROTO2, &reftables[10], &reftables[11]),
5503   UPB_MSGDEF_INIT("google.protobuf.EnumValueOptions", 7, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[2], &arrays[29], 2, 1), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[36]), false, UPB_SYNTAX_PROTO2, &reftables[12], &reftables[13]),
5504   UPB_MSGDEF_INIT("google.protobuf.FieldDescriptorProto", 23, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[31], 11, 10), UPB_STRTABLE_INIT(10, 15, UPB_CTYPE_PTR, 4, &strentries[40]), false, UPB_SYNTAX_PROTO2, &reftables[14], &reftables[15]),
5505   UPB_MSGDEF_INIT("google.protobuf.FieldOptions", 12, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[4], &arrays[42], 11, 6), UPB_STRTABLE_INIT(7, 15, UPB_CTYPE_PTR, 4, &strentries[56]), false, UPB_SYNTAX_PROTO2, &reftables[16], &reftables[17]),
5506   UPB_MSGDEF_INIT("google.protobuf.FileDescriptorProto", 42, 6, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[53], 13, 12), UPB_STRTABLE_INIT(12, 15, UPB_CTYPE_PTR, 4, &strentries[72]), false, UPB_SYNTAX_PROTO2, &reftables[18], &reftables[19]),
5507   UPB_MSGDEF_INIT("google.protobuf.FileDescriptorSet", 6, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[66], 2, 1), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[88]), false, UPB_SYNTAX_PROTO2, &reftables[20], &reftables[21]),
5508   UPB_MSGDEF_INIT("google.protobuf.FileOptions", 31, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[6], &arrays[68], 39, 15), UPB_STRTABLE_INIT(16, 31, UPB_CTYPE_PTR, 5, &strentries[92]), false, UPB_SYNTAX_PROTO2, &reftables[22], &reftables[23]),
5509   UPB_MSGDEF_INIT("google.protobuf.MessageOptions", 10, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[8], &arrays[107], 8, 4), UPB_STRTABLE_INIT(5, 7, UPB_CTYPE_PTR, 3, &strentries[124]), false, UPB_SYNTAX_PROTO2, &reftables[24], &reftables[25]),
5510   UPB_MSGDEF_INIT("google.protobuf.MethodDescriptorProto", 15, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[115], 7, 6), UPB_STRTABLE_INIT(6, 7, UPB_CTYPE_PTR, 3, &strentries[132]), false, UPB_SYNTAX_PROTO2, &reftables[26], &reftables[27]),
5511   UPB_MSGDEF_INIT("google.protobuf.MethodOptions", 7, 1, UPB_INTTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &intentries[10], &arrays[122], 1, 0), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[140]), false, UPB_SYNTAX_PROTO2, &reftables[28], &reftables[29]),
5512   UPB_MSGDEF_INIT("google.protobuf.OneofDescriptorProto", 5, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[123], 2, 1), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[144]), false, UPB_SYNTAX_PROTO2, &reftables[30], &reftables[31]),
5513   UPB_MSGDEF_INIT("google.protobuf.ServiceDescriptorProto", 11, 2, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[125], 4, 3), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[148]), false, UPB_SYNTAX_PROTO2, &reftables[32], &reftables[33]),
5514   UPB_MSGDEF_INIT("google.protobuf.ServiceOptions", 7, 1, UPB_INTTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &intentries[14], &arrays[129], 1, 0), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[152]), false, UPB_SYNTAX_PROTO2, &reftables[34], &reftables[35]),
5515   UPB_MSGDEF_INIT("google.protobuf.SourceCodeInfo", 6, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[130], 2, 1), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[156]), false, UPB_SYNTAX_PROTO2, &reftables[36], &reftables[37]),
5516   UPB_MSGDEF_INIT("google.protobuf.SourceCodeInfo.Location", 19, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[132], 7, 5), UPB_STRTABLE_INIT(5, 7, UPB_CTYPE_PTR, 3, &strentries[160]), false, UPB_SYNTAX_PROTO2, &reftables[38], &reftables[39]),
5517   UPB_MSGDEF_INIT("google.protobuf.UninterpretedOption", 18, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[139], 9, 7), UPB_STRTABLE_INIT(7, 15, UPB_CTYPE_PTR, 4, &strentries[168]), false, UPB_SYNTAX_PROTO2, &reftables[40], &reftables[41]),
5518   UPB_MSGDEF_INIT("google.protobuf.UninterpretedOption.NamePart", 6, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[148], 3, 2), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[184]), false, UPB_SYNTAX_PROTO2, &reftables[42], &reftables[43]),
5519 };
5520 
5521 static const upb_fielddef fields[105] = {
5522   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "aggregate_value", 8, &msgs[20], NULL, 15, 6, {0},&reftables[44], &reftables[45]),
5523   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "allow_alias", 2, &msgs[4], NULL, 6, 1, {0},&reftables[46], &reftables[47]),
5524   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "cc_enable_arenas", 31, &msgs[11], NULL, 23, 12, {0},&reftables[48], &reftables[49]),
5525   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "cc_generic_services", 16, &msgs[11], NULL, 17, 6, {0},&reftables[50], &reftables[51]),
5526   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "client_streaming", 5, &msgs[13], NULL, 13, 4, {0},&reftables[52], &reftables[53]),
5527   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "csharp_namespace", 37, &msgs[11], NULL, 27, 14, {0},&reftables[54], &reftables[55]),
5528   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "ctype", 1, &msgs[8], (const upb_def*)(&enums[2]), 6, 1, {0},&reftables[56], &reftables[57]),
5529   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "default_value", 7, &msgs[7], NULL, 16, 7, {0},&reftables[58], &reftables[59]),
5530   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_STRING, 0, false, false, false, false, "dependency", 3, &msgs[9], NULL, 30, 8, {0},&reftables[60], &reftables[61]),
5531   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 3, &msgs[12], NULL, 8, 3, {0},&reftables[62], &reftables[63]),
5532   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 3, &msgs[8], NULL, 8, 3, {0},&reftables[64], &reftables[65]),
5533   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 33, &msgs[14], NULL, 6, 1, {0},&reftables[66], &reftables[67]),
5534   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 23, &msgs[11], NULL, 21, 10, {0},&reftables[68], &reftables[69]),
5535   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 3, &msgs[4], NULL, 7, 2, {0},&reftables[70], &reftables[71]),
5536   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 33, &msgs[17], NULL, 6, 1, {0},&reftables[72], &reftables[73]),
5537   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 1, &msgs[6], NULL, 6, 1, {0},&reftables[74], &reftables[75]),
5538   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_DOUBLE, 0, false, false, false, false, "double_value", 6, &msgs[20], NULL, 11, 4, {0},&reftables[76], &reftables[77]),
5539   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "end", 2, &msgs[2], NULL, 3, 1, {0},&reftables[78], &reftables[79]),
5540   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "end", 2, &msgs[1], NULL, 3, 1, {0},&reftables[80], &reftables[81]),
5541   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "enum_type", 4, &msgs[0], (const upb_def*)(&msgs[3]), 18, 2, {0},&reftables[82], &reftables[83]),
5542   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "enum_type", 5, &msgs[9], (const upb_def*)(&msgs[3]), 13, 1, {0},&reftables[84], &reftables[85]),
5543   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "extendee", 2, &msgs[7], NULL, 7, 2, {0},&reftables[86], &reftables[87]),
5544   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension", 6, &msgs[0], (const upb_def*)(&msgs[7]), 24, 4, {0},&reftables[88], &reftables[89]),
5545   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension", 7, &msgs[9], (const upb_def*)(&msgs[7]), 19, 3, {0},&reftables[90], &reftables[91]),
5546   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension_range", 5, &msgs[0], (const upb_def*)(&msgs[1]), 21, 3, {0},&reftables[92], &reftables[93]),
5547   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "field", 2, &msgs[0], (const upb_def*)(&msgs[7]), 12, 0, {0},&reftables[94], &reftables[95]),
5548   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "file", 1, &msgs[10], (const upb_def*)(&msgs[9]), 5, 0, {0},&reftables[96], &reftables[97]),
5549   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "go_package", 11, &msgs[11], NULL, 14, 5, {0},&reftables[98], &reftables[99]),
5550   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "identifier_value", 3, &msgs[20], NULL, 6, 1, {0},&reftables[100], &reftables[101]),
5551   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "input_type", 2, &msgs[13], NULL, 7, 2, {0},&reftables[102], &reftables[103]),
5552   UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_BOOL, 0, false, false, false, false, "is_extension", 2, &msgs[21], NULL, 5, 1, {0},&reftables[104], &reftables[105]),
5553   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_generate_equals_and_hash", 20, &msgs[11], NULL, 20, 9, {0},&reftables[106], &reftables[107]),
5554   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_generic_services", 17, &msgs[11], NULL, 18, 7, {0},&reftables[108], &reftables[109]),
5555   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_multiple_files", 10, &msgs[11], NULL, 13, 4, {0},&reftables[110], &reftables[111]),
5556   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "java_outer_classname", 8, &msgs[11], NULL, 9, 2, {0},&reftables[112], &reftables[113]),
5557   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "java_package", 1, &msgs[11], NULL, 6, 1, {0},&reftables[114], &reftables[115]),
5558   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_string_check_utf8", 27, &msgs[11], NULL, 22, 11, {0},&reftables[116], &reftables[117]),
5559   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "javanano_use_deprecated_package", 38, &msgs[11], NULL, 30, 15, {0},&reftables[118], &reftables[119]),
5560   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "json_name", 10, &msgs[7], NULL, 20, 9, {0},&reftables[120], &reftables[121]),
5561   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "jstype", 6, &msgs[8], (const upb_def*)(&enums[3]), 10, 5, {0},&reftables[122], &reftables[123]),
5562   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "label", 4, &msgs[7], (const upb_def*)(&enums[0]), 11, 4, {0},&reftables[124], &reftables[125]),
5563   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "lazy", 5, &msgs[8], NULL, 9, 4, {0},&reftables[126], &reftables[127]),
5564   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "leading_comments", 3, &msgs[19], NULL, 8, 2, {0},&reftables[128], &reftables[129]),
5565   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_STRING, 0, false, false, false, false, "leading_detached_comments", 6, &msgs[19], NULL, 16, 4, {0},&reftables[130], &reftables[131]),
5566   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "location", 1, &msgs[18], (const upb_def*)(&msgs[19]), 5, 0, {0},&reftables[132], &reftables[133]),
5567   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "map_entry", 7, &msgs[12], NULL, 9, 4, {0},&reftables[134], &reftables[135]),
5568   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "message_set_wire_format", 1, &msgs[12], NULL, 6, 1, {0},&reftables[136], &reftables[137]),
5569   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "message_type", 4, &msgs[9], (const upb_def*)(&msgs[0]), 10, 0, {0},&reftables[138], &reftables[139]),
5570   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "method", 2, &msgs[16], (const upb_def*)(&msgs[13]), 6, 0, {0},&reftables[140], &reftables[141]),
5571   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[3], NULL, 8, 2, {0},&reftables[142], &reftables[143]),
5572   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[15], NULL, 2, 0, {0},&reftables[144], &reftables[145]),
5573   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "name", 2, &msgs[20], (const upb_def*)(&msgs[21]), 5, 0, {0},&reftables[146], &reftables[147]),
5574   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[0], NULL, 32, 8, {0},&reftables[148], &reftables[149]),
5575   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[5], NULL, 4, 1, {0},&reftables[150], &reftables[151]),
5576   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[9], NULL, 22, 6, {0},&reftables[152], &reftables[153]),
5577   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[7], NULL, 4, 1, {0},&reftables[154], &reftables[155]),
5578   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[13], NULL, 4, 1, {0},&reftables[156], &reftables[157]),
5579   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[16], NULL, 8, 2, {0},&reftables[158], &reftables[159]),
5580   UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_STRING, 0, false, false, false, false, "name_part", 1, &msgs[21], NULL, 2, 0, {0},&reftables[160], &reftables[161]),
5581   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT64, UPB_INTFMT_VARIABLE, false, false, false, false, "negative_int_value", 5, &msgs[20], NULL, 10, 3, {0},&reftables[162], &reftables[163]),
5582   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "nested_type", 3, &msgs[0], (const upb_def*)(&msgs[0]), 15, 1, {0},&reftables[164], &reftables[165]),
5583   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "no_standard_descriptor_accessor", 2, &msgs[12], NULL, 7, 2, {0},&reftables[166], &reftables[167]),
5584   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "number", 2, &msgs[5], NULL, 7, 2, {0},&reftables[168], &reftables[169]),
5585   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "number", 3, &msgs[7], NULL, 10, 3, {0},&reftables[170], &reftables[171]),
5586   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "objc_class_prefix", 36, &msgs[11], NULL, 24, 13, {0},&reftables[172], &reftables[173]),
5587   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "oneof_decl", 8, &msgs[0], (const upb_def*)(&msgs[15]), 28, 6, {0},&reftables[174], &reftables[175]),
5588   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "oneof_index", 9, &msgs[7], NULL, 19, 8, {0},&reftables[176], &reftables[177]),
5589   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "optimize_for", 9, &msgs[11], (const upb_def*)(&enums[4]), 12, 3, {0},&reftables[178], &reftables[179]),
5590   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 7, &msgs[0], (const upb_def*)(&msgs[12]), 25, 5, {0},&reftables[180], &reftables[181]),
5591   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 8, &msgs[9], (const upb_def*)(&msgs[11]), 20, 4, {0},&reftables[182], &reftables[183]),
5592   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 4, &msgs[13], (const upb_def*)(&msgs[14]), 3, 0, {0},&reftables[184], &reftables[185]),
5593   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 8, &msgs[7], (const upb_def*)(&msgs[8]), 3, 0, {0},&reftables[186], &reftables[187]),
5594   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[16], (const upb_def*)(&msgs[17]), 7, 1, {0},&reftables[188], &reftables[189]),
5595   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[5], (const upb_def*)(&msgs[6]), 3, 0, {0},&reftables[190], &reftables[191]),
5596   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[3], (const upb_def*)(&msgs[4]), 7, 1, {0},&reftables[192], &reftables[193]),
5597   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "output_type", 3, &msgs[13], NULL, 10, 3, {0},&reftables[194], &reftables[195]),
5598   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "package", 2, &msgs[9], NULL, 25, 7, {0},&reftables[196], &reftables[197]),
5599   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "packed", 2, &msgs[8], NULL, 7, 2, {0},&reftables[198], &reftables[199]),
5600   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, true, "path", 1, &msgs[19], NULL, 4, 0, {0},&reftables[200], &reftables[201]),
5601   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_UINT64, UPB_INTFMT_VARIABLE, false, false, false, false, "positive_int_value", 4, &msgs[20], NULL, 9, 2, {0},&reftables[202], &reftables[203]),
5602   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "public_dependency", 10, &msgs[9], NULL, 35, 9, {0},&reftables[204], &reftables[205]),
5603   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "py_generic_services", 18, &msgs[11], NULL, 19, 8, {0},&reftables[206], &reftables[207]),
5604   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_STRING, 0, false, false, false, false, "reserved_name", 10, &msgs[0], NULL, 37, 9, {0},&reftables[208], &reftables[209]),
5605   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "reserved_range", 9, &msgs[0], (const upb_def*)(&msgs[2]), 31, 7, {0},&reftables[210], &reftables[211]),
5606   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "server_streaming", 6, &msgs[13], NULL, 14, 5, {0},&reftables[212], &reftables[213]),
5607   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "service", 6, &msgs[9], (const upb_def*)(&msgs[16]), 16, 2, {0},&reftables[214], &reftables[215]),
5608   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "source_code_info", 9, &msgs[9], (const upb_def*)(&msgs[18]), 21, 5, {0},&reftables[216], &reftables[217]),
5609   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, true, "span", 2, &msgs[19], NULL, 7, 1, {0},&reftables[218], &reftables[219]),
5610   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "start", 1, &msgs[2], NULL, 2, 0, {0},&reftables[220], &reftables[221]),
5611   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "start", 1, &msgs[1], NULL, 2, 0, {0},&reftables[222], &reftables[223]),
5612   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BYTES, 0, false, false, false, false, "string_value", 7, &msgs[20], NULL, 12, 5, {0},&reftables[224], &reftables[225]),
5613   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "syntax", 12, &msgs[9], NULL, 39, 11, {0},&reftables[226], &reftables[227]),
5614   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "trailing_comments", 4, &msgs[19], NULL, 11, 3, {0},&reftables[228], &reftables[229]),
5615   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "type", 5, &msgs[7], (const upb_def*)(&enums[1]), 12, 5, {0},&reftables[230], &reftables[231]),
5616   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "type_name", 6, &msgs[7], NULL, 13, 6, {0},&reftables[232], &reftables[233]),
5617   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[11], (const upb_def*)(&msgs[20]), 5, 0, {0},&reftables[234], &reftables[235]),
5618   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[12], (const upb_def*)(&msgs[20]), 5, 0, {0},&reftables[236], &reftables[237]),
5619   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[6], (const upb_def*)(&msgs[20]), 5, 0, {0},&reftables[238], &reftables[239]),
5620   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[4], (const upb_def*)(&msgs[20]), 5, 0, {0},&reftables[240], &reftables[241]),
5621   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[8], (const upb_def*)(&msgs[20]), 5, 0, {0},&reftables[242], &reftables[243]),
5622   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[14], (const upb_def*)(&msgs[20]), 5, 0, {0},&reftables[244], &reftables[245]),
5623   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[17], (const upb_def*)(&msgs[20]), 5, 0, {0},&reftables[246], &reftables[247]),
5624   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "value", 2, &msgs[3], (const upb_def*)(&msgs[5]), 6, 0, {0},&reftables[248], &reftables[249]),
5625   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "weak", 10, &msgs[8], NULL, 11, 6, {0},&reftables[250], &reftables[251]),
5626   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "weak_dependency", 11, &msgs[9], NULL, 38, 10, {0},&reftables[252], &reftables[253]),
5627 };
5628 
5629 static const upb_enumdef enums[5] = {
5630   UPB_ENUMDEF_INIT("google.protobuf.FieldDescriptorProto.Label", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[188]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[151], 4, 3), 0, &reftables[254], &reftables[255]),
5631   UPB_ENUMDEF_INIT("google.protobuf.FieldDescriptorProto.Type", UPB_STRTABLE_INIT(18, 31, UPB_CTYPE_INT32, 5, &strentries[192]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[155], 19, 18), 0, &reftables[256], &reftables[257]),
5632   UPB_ENUMDEF_INIT("google.protobuf.FieldOptions.CType", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[224]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[174], 3, 3), 0, &reftables[258], &reftables[259]),
5633   UPB_ENUMDEF_INIT("google.protobuf.FieldOptions.JSType", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[228]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[177], 3, 3), 0, &reftables[260], &reftables[261]),
5634   UPB_ENUMDEF_INIT("google.protobuf.FileOptions.OptimizeMode", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[232]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[180], 4, 3), 0, &reftables[262], &reftables[263]),
5635 };
5636 
5637 static const upb_tabent strentries[236] = {
5638   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "extension"), UPB_TABVALUE_PTR_INIT(&fields[22]), NULL},
5639   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5640   {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "reserved_name"), UPB_TABVALUE_PTR_INIT(&fields[82]), NULL},
5641   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[52]), NULL},
5642   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5643   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5644   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5645   {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "field"), UPB_TABVALUE_PTR_INIT(&fields[25]), &strentries[12]},
5646   {UPB_TABKEY_STR("\017", "\000", "\000", "\000", "extension_range"), UPB_TABVALUE_PTR_INIT(&fields[24]), &strentries[14]},
5647   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5648   {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "nested_type"), UPB_TABVALUE_PTR_INIT(&fields[60]), NULL},
5649   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5650   {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "reserved_range"), UPB_TABVALUE_PTR_INIT(&fields[83]), NULL},
5651   {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[68]), NULL},
5652   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "oneof_decl"), UPB_TABVALUE_PTR_INIT(&fields[65]), NULL},
5653   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "enum_type"), UPB_TABVALUE_PTR_INIT(&fields[19]), &strentries[13]},
5654   {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "start"), UPB_TABVALUE_PTR_INIT(&fields[89]), NULL},
5655   {UPB_TABKEY_STR("\003", "\000", "\000", "\000", "end"), UPB_TABVALUE_PTR_INIT(&fields[18]), NULL},
5656   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5657   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5658   {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "start"), UPB_TABVALUE_PTR_INIT(&fields[88]), NULL},
5659   {UPB_TABKEY_STR("\003", "\000", "\000", "\000", "end"), UPB_TABVALUE_PTR_INIT(&fields[17]), NULL},
5660   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5661   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5662   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5663   {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "value"), UPB_TABVALUE_PTR_INIT(&fields[102]), NULL},
5664   {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[74]), NULL},
5665   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[49]), &strentries[26]},
5666   {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[98]), NULL},
5667   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[13]), NULL},
5668   {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "allow_alias"), UPB_TABVALUE_PTR_INIT(&fields[1]), NULL},
5669   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5670   {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "number"), UPB_TABVALUE_PTR_INIT(&fields[62]), NULL},
5671   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5672   {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[73]), NULL},
5673   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[53]), &strentries[34]},
5674   {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[97]), NULL},
5675   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[15]), NULL},
5676   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5677   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5678   {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "oneof_index"), UPB_TABVALUE_PTR_INIT(&fields[66]), NULL},
5679   {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "label"), UPB_TABVALUE_PTR_INIT(&fields[40]), NULL},
5680   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5681   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[55]), NULL},
5682   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5683   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5684   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5685   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5686   {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "number"), UPB_TABVALUE_PTR_INIT(&fields[63]), &strentries[53]},
5687   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5688   {UPB_TABKEY_STR("\010", "\000", "\000", "\000", "extendee"), UPB_TABVALUE_PTR_INIT(&fields[21]), NULL},
5689   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "type_name"), UPB_TABVALUE_PTR_INIT(&fields[94]), NULL},
5690   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "json_name"), UPB_TABVALUE_PTR_INIT(&fields[38]), NULL},
5691   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "type"), UPB_TABVALUE_PTR_INIT(&fields[93]), &strentries[50]},
5692   {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "default_value"), UPB_TABVALUE_PTR_INIT(&fields[7]), NULL},
5693   {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[71]), NULL},
5694   {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[99]), NULL},
5695   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5696   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "weak"), UPB_TABVALUE_PTR_INIT(&fields[103]), NULL},
5697   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5698   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5699   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5700   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5701   {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "packed"), UPB_TABVALUE_PTR_INIT(&fields[77]), NULL},
5702   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "lazy"), UPB_TABVALUE_PTR_INIT(&fields[41]), NULL},
5703   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5704   {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "ctype"), UPB_TABVALUE_PTR_INIT(&fields[6]), NULL},
5705   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5706   {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "jstype"), UPB_TABVALUE_PTR_INIT(&fields[39]), NULL},
5707   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[10]), NULL},
5708   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5709   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5710   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "extension"), UPB_TABVALUE_PTR_INIT(&fields[23]), NULL},
5711   {UPB_TABKEY_STR("\017", "\000", "\000", "\000", "weak_dependency"), UPB_TABVALUE_PTR_INIT(&fields[104]), NULL},
5712   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5713   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[54]), NULL},
5714   {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "service"), UPB_TABVALUE_PTR_INIT(&fields[85]), NULL},
5715   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5716   {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "source_code_info"), UPB_TABVALUE_PTR_INIT(&fields[86]), NULL},
5717   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5718   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5719   {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "syntax"), UPB_TABVALUE_PTR_INIT(&fields[91]), NULL},
5720   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "dependency"), UPB_TABVALUE_PTR_INIT(&fields[8]), NULL},
5721   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "message_type"), UPB_TABVALUE_PTR_INIT(&fields[47]), NULL},
5722   {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "package"), UPB_TABVALUE_PTR_INIT(&fields[76]), NULL},
5723   {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[69]), &strentries[86]},
5724   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "enum_type"), UPB_TABVALUE_PTR_INIT(&fields[20]), NULL},
5725   {UPB_TABKEY_STR("\021", "\000", "\000", "\000", "public_dependency"), UPB_TABVALUE_PTR_INIT(&fields[80]), &strentries[85]},
5726   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5727   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "file"), UPB_TABVALUE_PTR_INIT(&fields[26]), NULL},
5728   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5729   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5730   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5731   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5732   {UPB_TABKEY_STR("\023", "\000", "\000", "\000", "cc_generic_services"), UPB_TABVALUE_PTR_INIT(&fields[3]), NULL},
5733   {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "csharp_namespace"), UPB_TABVALUE_PTR_INIT(&fields[5]), NULL},
5734   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5735   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5736   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5737   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5738   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5739   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5740   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5741   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "go_package"), UPB_TABVALUE_PTR_INIT(&fields[27]), NULL},
5742   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "java_package"), UPB_TABVALUE_PTR_INIT(&fields[35]), &strentries[120]},
5743   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5744   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5745   {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "java_outer_classname"), UPB_TABVALUE_PTR_INIT(&fields[34]), NULL},
5746   {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[95]), NULL},
5747   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5748   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5749   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5750   {UPB_TABKEY_STR("\023", "\000", "\000", "\000", "java_multiple_files"), UPB_TABVALUE_PTR_INIT(&fields[33]), &strentries[117]},
5751   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5752   {UPB_TABKEY_STR("\025", "\000", "\000", "\000", "java_generic_services"), UPB_TABVALUE_PTR_INIT(&fields[32]), &strentries[118]},
5753   {UPB_TABKEY_STR("\035", "\000", "\000", "\000", "java_generate_equals_and_hash"), UPB_TABVALUE_PTR_INIT(&fields[31]), NULL},
5754   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5755   {UPB_TABKEY_STR("\037", "\000", "\000", "\000", "javanano_use_deprecated_package"), UPB_TABVALUE_PTR_INIT(&fields[37]), &strentries[123]},
5756   {UPB_TABKEY_STR("\023", "\000", "\000", "\000", "py_generic_services"), UPB_TABVALUE_PTR_INIT(&fields[81]), NULL},
5757   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "optimize_for"), UPB_TABVALUE_PTR_INIT(&fields[67]), NULL},
5758   {UPB_TABKEY_STR("\026", "\000", "\000", "\000", "java_string_check_utf8"), UPB_TABVALUE_PTR_INIT(&fields[36]), NULL},
5759   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[12]), &strentries[119]},
5760   {UPB_TABKEY_STR("\021", "\000", "\000", "\000", "objc_class_prefix"), UPB_TABVALUE_PTR_INIT(&fields[64]), NULL},
5761   {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "cc_enable_arenas"), UPB_TABVALUE_PTR_INIT(&fields[2]), NULL},
5762   {UPB_TABKEY_STR("\027", "\000", "\000", "\000", "message_set_wire_format"), UPB_TABVALUE_PTR_INIT(&fields[46]), &strentries[128]},
5763   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5764   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5765   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5766   {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[96]), NULL},
5767   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[9]), NULL},
5768   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "map_entry"), UPB_TABVALUE_PTR_INIT(&fields[45]), NULL},
5769   {UPB_TABKEY_STR("\037", "\000", "\000", "\000", "no_standard_descriptor_accessor"), UPB_TABVALUE_PTR_INIT(&fields[61]), NULL},
5770   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5771   {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "client_streaming"), UPB_TABVALUE_PTR_INIT(&fields[4]), NULL},
5772   {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "server_streaming"), UPB_TABVALUE_PTR_INIT(&fields[84]), NULL},
5773   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[56]), NULL},
5774   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "input_type"), UPB_TABVALUE_PTR_INIT(&fields[29]), NULL},
5775   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5776   {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "output_type"), UPB_TABVALUE_PTR_INIT(&fields[75]), NULL},
5777   {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[70]), NULL},
5778   {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[100]), NULL},
5779   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[11]), NULL},
5780   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5781   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5782   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5783   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5784   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5785   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[50]), NULL},
5786   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5787   {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[72]), &strentries[150]},
5788   {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "method"), UPB_TABVALUE_PTR_INIT(&fields[48]), NULL},
5789   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[57]), &strentries[149]},
5790   {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[101]), NULL},
5791   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[14]), NULL},
5792   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5793   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5794   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5795   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5796   {UPB_TABKEY_STR("\010", "\000", "\000", "\000", "location"), UPB_TABVALUE_PTR_INIT(&fields[44]), NULL},
5797   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5798   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5799   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5800   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5801   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "span"), UPB_TABVALUE_PTR_INIT(&fields[87]), &strentries[167]},
5802   {UPB_TABKEY_STR("\031", "\000", "\000", "\000", "leading_detached_comments"), UPB_TABVALUE_PTR_INIT(&fields[43]), &strentries[165]},
5803   {UPB_TABKEY_STR("\021", "\000", "\000", "\000", "trailing_comments"), UPB_TABVALUE_PTR_INIT(&fields[92]), NULL},
5804   {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "leading_comments"), UPB_TABVALUE_PTR_INIT(&fields[42]), &strentries[164]},
5805   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "path"), UPB_TABVALUE_PTR_INIT(&fields[78]), NULL},
5806   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "double_value"), UPB_TABVALUE_PTR_INIT(&fields[16]), NULL},
5807   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5808   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5809   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[51]), NULL},
5810   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5811   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5812   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5813   {UPB_TABKEY_STR("\022", "\000", "\000", "\000", "negative_int_value"), UPB_TABVALUE_PTR_INIT(&fields[59]), NULL},
5814   {UPB_TABKEY_STR("\017", "\000", "\000", "\000", "aggregate_value"), UPB_TABVALUE_PTR_INIT(&fields[0]), NULL},
5815   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5816   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5817   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5818   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5819   {UPB_TABKEY_STR("\022", "\000", "\000", "\000", "positive_int_value"), UPB_TABVALUE_PTR_INIT(&fields[79]), NULL},
5820   {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "identifier_value"), UPB_TABVALUE_PTR_INIT(&fields[28]), NULL},
5821   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "string_value"), UPB_TABVALUE_PTR_INIT(&fields[90]), &strentries[182]},
5822   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5823   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5824   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "is_extension"), UPB_TABVALUE_PTR_INIT(&fields[30]), NULL},
5825   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "name_part"), UPB_TABVALUE_PTR_INIT(&fields[58]), NULL},
5826   {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "LABEL_REQUIRED"), UPB_TABVALUE_INT_INIT(2), &strentries[190]},
5827   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5828   {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "LABEL_REPEATED"), UPB_TABVALUE_INT_INIT(3), NULL},
5829   {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "LABEL_OPTIONAL"), UPB_TABVALUE_INT_INIT(1), NULL},
5830   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "TYPE_FIXED64"), UPB_TABVALUE_INT_INIT(6), NULL},
5831   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5832   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5833   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5834   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5835   {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_STRING"), UPB_TABVALUE_INT_INIT(9), NULL},
5836   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_FLOAT"), UPB_TABVALUE_INT_INIT(2), &strentries[221]},
5837   {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_DOUBLE"), UPB_TABVALUE_INT_INIT(1), NULL},
5838   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5839   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_INT32"), UPB_TABVALUE_INT_INIT(5), NULL},
5840   {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "TYPE_SFIXED32"), UPB_TABVALUE_INT_INIT(15), NULL},
5841   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "TYPE_FIXED32"), UPB_TABVALUE_INT_INIT(7), NULL},
5842   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5843   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "TYPE_MESSAGE"), UPB_TABVALUE_INT_INIT(11), &strentries[222]},
5844   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5845   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5846   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_INT64"), UPB_TABVALUE_INT_INIT(3), &strentries[219]},
5847   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5848   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5849   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5850   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5851   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "TYPE_ENUM"), UPB_TABVALUE_INT_INIT(14), NULL},
5852   {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_UINT32"), UPB_TABVALUE_INT_INIT(13), NULL},
5853   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5854   {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_UINT64"), UPB_TABVALUE_INT_INIT(4), &strentries[218]},
5855   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5856   {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "TYPE_SFIXED64"), UPB_TABVALUE_INT_INIT(16), NULL},
5857   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_BYTES"), UPB_TABVALUE_INT_INIT(12), NULL},
5858   {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_SINT64"), UPB_TABVALUE_INT_INIT(18), NULL},
5859   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "TYPE_BOOL"), UPB_TABVALUE_INT_INIT(8), NULL},
5860   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_GROUP"), UPB_TABVALUE_INT_INIT(10), NULL},
5861   {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_SINT32"), UPB_TABVALUE_INT_INIT(17), NULL},
5862   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5863   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "CORD"), UPB_TABVALUE_INT_INIT(1), NULL},
5864   {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "STRING"), UPB_TABVALUE_INT_INIT(0), &strentries[225]},
5865   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "STRING_PIECE"), UPB_TABVALUE_INT_INIT(2), NULL},
5866   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5867   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "JS_NORMAL"), UPB_TABVALUE_INT_INIT(0), NULL},
5868   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "JS_NUMBER"), UPB_TABVALUE_INT_INIT(2), NULL},
5869   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "JS_STRING"), UPB_TABVALUE_INT_INIT(1), NULL},
5870   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "CODE_SIZE"), UPB_TABVALUE_INT_INIT(2), NULL},
5871   {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "SPEED"), UPB_TABVALUE_INT_INIT(1), &strentries[235]},
5872   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5873   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "LITE_RUNTIME"), UPB_TABVALUE_INT_INIT(3), NULL},
5874 };
5875 
5876 static const upb_tabent intentries[18] = {
5877   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5878   {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[98]), NULL},
5879   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5880   {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[97]), NULL},
5881   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5882   {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[99]), NULL},
5883   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5884   {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[95]), NULL},
5885   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5886   {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[96]), NULL},
5887   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5888   {UPB_TABKEY_NUM(33), UPB_TABVALUE_PTR_INIT(&fields[11]), NULL},
5889   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5890   {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[100]), NULL},
5891   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5892   {UPB_TABKEY_NUM(33), UPB_TABVALUE_PTR_INIT(&fields[14]), NULL},
5893   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5894   {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[101]), NULL},
5895 };
5896 
5897 static const upb_tabval arrays[184] = {
5898   UPB_TABVALUE_EMPTY_INIT,
5899   UPB_TABVALUE_PTR_INIT(&fields[52]),
5900   UPB_TABVALUE_PTR_INIT(&fields[25]),
5901   UPB_TABVALUE_PTR_INIT(&fields[60]),
5902   UPB_TABVALUE_PTR_INIT(&fields[19]),
5903   UPB_TABVALUE_PTR_INIT(&fields[24]),
5904   UPB_TABVALUE_PTR_INIT(&fields[22]),
5905   UPB_TABVALUE_PTR_INIT(&fields[68]),
5906   UPB_TABVALUE_PTR_INIT(&fields[65]),
5907   UPB_TABVALUE_PTR_INIT(&fields[83]),
5908   UPB_TABVALUE_PTR_INIT(&fields[82]),
5909   UPB_TABVALUE_EMPTY_INIT,
5910   UPB_TABVALUE_PTR_INIT(&fields[89]),
5911   UPB_TABVALUE_PTR_INIT(&fields[18]),
5912   UPB_TABVALUE_EMPTY_INIT,
5913   UPB_TABVALUE_PTR_INIT(&fields[88]),
5914   UPB_TABVALUE_PTR_INIT(&fields[17]),
5915   UPB_TABVALUE_EMPTY_INIT,
5916   UPB_TABVALUE_PTR_INIT(&fields[49]),
5917   UPB_TABVALUE_PTR_INIT(&fields[102]),
5918   UPB_TABVALUE_PTR_INIT(&fields[74]),
5919   UPB_TABVALUE_EMPTY_INIT,
5920   UPB_TABVALUE_EMPTY_INIT,
5921   UPB_TABVALUE_PTR_INIT(&fields[1]),
5922   UPB_TABVALUE_PTR_INIT(&fields[13]),
5923   UPB_TABVALUE_EMPTY_INIT,
5924   UPB_TABVALUE_PTR_INIT(&fields[53]),
5925   UPB_TABVALUE_PTR_INIT(&fields[62]),
5926   UPB_TABVALUE_PTR_INIT(&fields[73]),
5927   UPB_TABVALUE_EMPTY_INIT,
5928   UPB_TABVALUE_PTR_INIT(&fields[15]),
5929   UPB_TABVALUE_EMPTY_INIT,
5930   UPB_TABVALUE_PTR_INIT(&fields[55]),
5931   UPB_TABVALUE_PTR_INIT(&fields[21]),
5932   UPB_TABVALUE_PTR_INIT(&fields[63]),
5933   UPB_TABVALUE_PTR_INIT(&fields[40]),
5934   UPB_TABVALUE_PTR_INIT(&fields[93]),
5935   UPB_TABVALUE_PTR_INIT(&fields[94]),
5936   UPB_TABVALUE_PTR_INIT(&fields[7]),
5937   UPB_TABVALUE_PTR_INIT(&fields[71]),
5938   UPB_TABVALUE_PTR_INIT(&fields[66]),
5939   UPB_TABVALUE_PTR_INIT(&fields[38]),
5940   UPB_TABVALUE_EMPTY_INIT,
5941   UPB_TABVALUE_PTR_INIT(&fields[6]),
5942   UPB_TABVALUE_PTR_INIT(&fields[77]),
5943   UPB_TABVALUE_PTR_INIT(&fields[10]),
5944   UPB_TABVALUE_EMPTY_INIT,
5945   UPB_TABVALUE_PTR_INIT(&fields[41]),
5946   UPB_TABVALUE_PTR_INIT(&fields[39]),
5947   UPB_TABVALUE_EMPTY_INIT,
5948   UPB_TABVALUE_EMPTY_INIT,
5949   UPB_TABVALUE_EMPTY_INIT,
5950   UPB_TABVALUE_PTR_INIT(&fields[103]),
5951   UPB_TABVALUE_EMPTY_INIT,
5952   UPB_TABVALUE_PTR_INIT(&fields[54]),
5953   UPB_TABVALUE_PTR_INIT(&fields[76]),
5954   UPB_TABVALUE_PTR_INIT(&fields[8]),
5955   UPB_TABVALUE_PTR_INIT(&fields[47]),
5956   UPB_TABVALUE_PTR_INIT(&fields[20]),
5957   UPB_TABVALUE_PTR_INIT(&fields[85]),
5958   UPB_TABVALUE_PTR_INIT(&fields[23]),
5959   UPB_TABVALUE_PTR_INIT(&fields[69]),
5960   UPB_TABVALUE_PTR_INIT(&fields[86]),
5961   UPB_TABVALUE_PTR_INIT(&fields[80]),
5962   UPB_TABVALUE_PTR_INIT(&fields[104]),
5963   UPB_TABVALUE_PTR_INIT(&fields[91]),
5964   UPB_TABVALUE_EMPTY_INIT,
5965   UPB_TABVALUE_PTR_INIT(&fields[26]),
5966   UPB_TABVALUE_EMPTY_INIT,
5967   UPB_TABVALUE_PTR_INIT(&fields[35]),
5968   UPB_TABVALUE_EMPTY_INIT,
5969   UPB_TABVALUE_EMPTY_INIT,
5970   UPB_TABVALUE_EMPTY_INIT,
5971   UPB_TABVALUE_EMPTY_INIT,
5972   UPB_TABVALUE_EMPTY_INIT,
5973   UPB_TABVALUE_EMPTY_INIT,
5974   UPB_TABVALUE_PTR_INIT(&fields[34]),
5975   UPB_TABVALUE_PTR_INIT(&fields[67]),
5976   UPB_TABVALUE_PTR_INIT(&fields[33]),
5977   UPB_TABVALUE_PTR_INIT(&fields[27]),
5978   UPB_TABVALUE_EMPTY_INIT,
5979   UPB_TABVALUE_EMPTY_INIT,
5980   UPB_TABVALUE_EMPTY_INIT,
5981   UPB_TABVALUE_EMPTY_INIT,
5982   UPB_TABVALUE_PTR_INIT(&fields[3]),
5983   UPB_TABVALUE_PTR_INIT(&fields[32]),
5984   UPB_TABVALUE_PTR_INIT(&fields[81]),
5985   UPB_TABVALUE_EMPTY_INIT,
5986   UPB_TABVALUE_PTR_INIT(&fields[31]),
5987   UPB_TABVALUE_EMPTY_INIT,
5988   UPB_TABVALUE_EMPTY_INIT,
5989   UPB_TABVALUE_PTR_INIT(&fields[12]),
5990   UPB_TABVALUE_EMPTY_INIT,
5991   UPB_TABVALUE_EMPTY_INIT,
5992   UPB_TABVALUE_EMPTY_INIT,
5993   UPB_TABVALUE_PTR_INIT(&fields[36]),
5994   UPB_TABVALUE_EMPTY_INIT,
5995   UPB_TABVALUE_EMPTY_INIT,
5996   UPB_TABVALUE_EMPTY_INIT,
5997   UPB_TABVALUE_PTR_INIT(&fields[2]),
5998   UPB_TABVALUE_EMPTY_INIT,
5999   UPB_TABVALUE_EMPTY_INIT,
6000   UPB_TABVALUE_EMPTY_INIT,
6001   UPB_TABVALUE_EMPTY_INIT,
6002   UPB_TABVALUE_PTR_INIT(&fields[64]),
6003   UPB_TABVALUE_PTR_INIT(&fields[5]),
6004   UPB_TABVALUE_PTR_INIT(&fields[37]),
6005   UPB_TABVALUE_EMPTY_INIT,
6006   UPB_TABVALUE_PTR_INIT(&fields[46]),
6007   UPB_TABVALUE_PTR_INIT(&fields[61]),
6008   UPB_TABVALUE_PTR_INIT(&fields[9]),
6009   UPB_TABVALUE_EMPTY_INIT,
6010   UPB_TABVALUE_EMPTY_INIT,
6011   UPB_TABVALUE_EMPTY_INIT,
6012   UPB_TABVALUE_PTR_INIT(&fields[45]),
6013   UPB_TABVALUE_EMPTY_INIT,
6014   UPB_TABVALUE_PTR_INIT(&fields[56]),
6015   UPB_TABVALUE_PTR_INIT(&fields[29]),
6016   UPB_TABVALUE_PTR_INIT(&fields[75]),
6017   UPB_TABVALUE_PTR_INIT(&fields[70]),
6018   UPB_TABVALUE_PTR_INIT(&fields[4]),
6019   UPB_TABVALUE_PTR_INIT(&fields[84]),
6020   UPB_TABVALUE_EMPTY_INIT,
6021   UPB_TABVALUE_EMPTY_INIT,
6022   UPB_TABVALUE_PTR_INIT(&fields[50]),
6023   UPB_TABVALUE_EMPTY_INIT,
6024   UPB_TABVALUE_PTR_INIT(&fields[57]),
6025   UPB_TABVALUE_PTR_INIT(&fields[48]),
6026   UPB_TABVALUE_PTR_INIT(&fields[72]),
6027   UPB_TABVALUE_EMPTY_INIT,
6028   UPB_TABVALUE_EMPTY_INIT,
6029   UPB_TABVALUE_PTR_INIT(&fields[44]),
6030   UPB_TABVALUE_EMPTY_INIT,
6031   UPB_TABVALUE_PTR_INIT(&fields[78]),
6032   UPB_TABVALUE_PTR_INIT(&fields[87]),
6033   UPB_TABVALUE_PTR_INIT(&fields[42]),
6034   UPB_TABVALUE_PTR_INIT(&fields[92]),
6035   UPB_TABVALUE_EMPTY_INIT,
6036   UPB_TABVALUE_PTR_INIT(&fields[43]),
6037   UPB_TABVALUE_EMPTY_INIT,
6038   UPB_TABVALUE_EMPTY_INIT,
6039   UPB_TABVALUE_PTR_INIT(&fields[51]),
6040   UPB_TABVALUE_PTR_INIT(&fields[28]),
6041   UPB_TABVALUE_PTR_INIT(&fields[79]),
6042   UPB_TABVALUE_PTR_INIT(&fields[59]),
6043   UPB_TABVALUE_PTR_INIT(&fields[16]),
6044   UPB_TABVALUE_PTR_INIT(&fields[90]),
6045   UPB_TABVALUE_PTR_INIT(&fields[0]),
6046   UPB_TABVALUE_EMPTY_INIT,
6047   UPB_TABVALUE_PTR_INIT(&fields[58]),
6048   UPB_TABVALUE_PTR_INIT(&fields[30]),
6049   UPB_TABVALUE_EMPTY_INIT,
6050   UPB_TABVALUE_PTR_INIT("LABEL_OPTIONAL"),
6051   UPB_TABVALUE_PTR_INIT("LABEL_REQUIRED"),
6052   UPB_TABVALUE_PTR_INIT("LABEL_REPEATED"),
6053   UPB_TABVALUE_EMPTY_INIT,
6054   UPB_TABVALUE_PTR_INIT("TYPE_DOUBLE"),
6055   UPB_TABVALUE_PTR_INIT("TYPE_FLOAT"),
6056   UPB_TABVALUE_PTR_INIT("TYPE_INT64"),
6057   UPB_TABVALUE_PTR_INIT("TYPE_UINT64"),
6058   UPB_TABVALUE_PTR_INIT("TYPE_INT32"),
6059   UPB_TABVALUE_PTR_INIT("TYPE_FIXED64"),
6060   UPB_TABVALUE_PTR_INIT("TYPE_FIXED32"),
6061   UPB_TABVALUE_PTR_INIT("TYPE_BOOL"),
6062   UPB_TABVALUE_PTR_INIT("TYPE_STRING"),
6063   UPB_TABVALUE_PTR_INIT("TYPE_GROUP"),
6064   UPB_TABVALUE_PTR_INIT("TYPE_MESSAGE"),
6065   UPB_TABVALUE_PTR_INIT("TYPE_BYTES"),
6066   UPB_TABVALUE_PTR_INIT("TYPE_UINT32"),
6067   UPB_TABVALUE_PTR_INIT("TYPE_ENUM"),
6068   UPB_TABVALUE_PTR_INIT("TYPE_SFIXED32"),
6069   UPB_TABVALUE_PTR_INIT("TYPE_SFIXED64"),
6070   UPB_TABVALUE_PTR_INIT("TYPE_SINT32"),
6071   UPB_TABVALUE_PTR_INIT("TYPE_SINT64"),
6072   UPB_TABVALUE_PTR_INIT("STRING"),
6073   UPB_TABVALUE_PTR_INIT("CORD"),
6074   UPB_TABVALUE_PTR_INIT("STRING_PIECE"),
6075   UPB_TABVALUE_PTR_INIT("JS_NORMAL"),
6076   UPB_TABVALUE_PTR_INIT("JS_STRING"),
6077   UPB_TABVALUE_PTR_INIT("JS_NUMBER"),
6078   UPB_TABVALUE_EMPTY_INIT,
6079   UPB_TABVALUE_PTR_INIT("SPEED"),
6080   UPB_TABVALUE_PTR_INIT("CODE_SIZE"),
6081   UPB_TABVALUE_PTR_INIT("LITE_RUNTIME"),
6082 };
6083 
6084 #ifdef UPB_DEBUG_REFS
6085 static upb_inttable reftables[264] = {
6086   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6087   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6088   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6089   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6090   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6091   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6092   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6093   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6094   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6095   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6096   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6097   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6098   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6099   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6100   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6101   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6102   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6103   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6104   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6105   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6106   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6107   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6108   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6109   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6110   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6111   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6112   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6113   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6114   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6115   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6116   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6117   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6118   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6119   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6120   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6121   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6122   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6123   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6124   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6125   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6126   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6127   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6128   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6129   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6130   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6131   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6132   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6133   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6134   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6135   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6136   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6137   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6138   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6139   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6140   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6141   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6142   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6143   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6144   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6145   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6146   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6147   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6148   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6149   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6150   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6151   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6152   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6153   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6154   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6155   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6156   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6157   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6158   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6159   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6160   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6161   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6162   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6163   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6164   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6165   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6166   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6167   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6168   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6169   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6170   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6171   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6172   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6173   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6174   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6175   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6176   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6177   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6178   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6179   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6180   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6181   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6182   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6183   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6184   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6185   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6186   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6187   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6188   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6189   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6190   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6191   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6192   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6193   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6194   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6195   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6196   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6197   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6198   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6199   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6200   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6201   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6202   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6203   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6204   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6205   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6206   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6207   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6208   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6209   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6210   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6211   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6212   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6213   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6214   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6215   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6216   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6217   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6218   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6219   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6220   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6221   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6222   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6223   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6224   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6225   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6226   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6227   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6228   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6229   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6230   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6231   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6232   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6233   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6234   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6235   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6236   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6237   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6238   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6239   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6240   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6241   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6242   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6243   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6244   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6245   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6246   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6247   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6248   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6249   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6250   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6251   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6252   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6253   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6254   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6255   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6256   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6257   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6258   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6259   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6260   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6261   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6262   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6263   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6264   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6265   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6266   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6267   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6268   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6269   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6270   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6271   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6272   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6273   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6274   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6275   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6276   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6277   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6278   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6279   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6280   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6281   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6282   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6283   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6284   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6285   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6286   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6287   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6288   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6289   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6290   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6291   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6292   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6293   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6294   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6295   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6296   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6297   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6298   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6299   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6300   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6301   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6302   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6303   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6304   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6305   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6306   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6307   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6308   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6309   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6310   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6311   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6312   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6313   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6314   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6315   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6316   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6317   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6318   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6319   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6320   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6321   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6322   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6323   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6324   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6325   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6326   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6327   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6328   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6329   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6330   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6331   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6332   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6333   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6334   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6335   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6336   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6337   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6338   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6339   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6340   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6341   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6342   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6343   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6344   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6345   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6346   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6347   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6348   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6349   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
6350 };
6351 #endif
6352 
refm(const upb_msgdef * m,const void * owner)6353 static const upb_msgdef *refm(const upb_msgdef *m, const void *owner) {
6354   upb_msgdef_ref(m, owner);
6355   return m;
6356 }
6357 
refe(const upb_enumdef * e,const void * owner)6358 static const upb_enumdef *refe(const upb_enumdef *e, const void *owner) {
6359   upb_enumdef_ref(e, owner);
6360   return e;
6361 }
6362 
6363 /* Public API. */
upbdefs_google_protobuf_DescriptorProto_get(const void * owner)6364 const upb_msgdef *upbdefs_google_protobuf_DescriptorProto_get(const void *owner) { return refm(&msgs[0], owner); }
upbdefs_google_protobuf_DescriptorProto_ExtensionRange_get(const void * owner)6365 const upb_msgdef *upbdefs_google_protobuf_DescriptorProto_ExtensionRange_get(const void *owner) { return refm(&msgs[1], owner); }
upbdefs_google_protobuf_DescriptorProto_ReservedRange_get(const void * owner)6366 const upb_msgdef *upbdefs_google_protobuf_DescriptorProto_ReservedRange_get(const void *owner) { return refm(&msgs[2], owner); }
upbdefs_google_protobuf_EnumDescriptorProto_get(const void * owner)6367 const upb_msgdef *upbdefs_google_protobuf_EnumDescriptorProto_get(const void *owner) { return refm(&msgs[3], owner); }
upbdefs_google_protobuf_EnumOptions_get(const void * owner)6368 const upb_msgdef *upbdefs_google_protobuf_EnumOptions_get(const void *owner) { return refm(&msgs[4], owner); }
upbdefs_google_protobuf_EnumValueDescriptorProto_get(const void * owner)6369 const upb_msgdef *upbdefs_google_protobuf_EnumValueDescriptorProto_get(const void *owner) { return refm(&msgs[5], owner); }
upbdefs_google_protobuf_EnumValueOptions_get(const void * owner)6370 const upb_msgdef *upbdefs_google_protobuf_EnumValueOptions_get(const void *owner) { return refm(&msgs[6], owner); }
upbdefs_google_protobuf_FieldDescriptorProto_get(const void * owner)6371 const upb_msgdef *upbdefs_google_protobuf_FieldDescriptorProto_get(const void *owner) { return refm(&msgs[7], owner); }
upbdefs_google_protobuf_FieldOptions_get(const void * owner)6372 const upb_msgdef *upbdefs_google_protobuf_FieldOptions_get(const void *owner) { return refm(&msgs[8], owner); }
upbdefs_google_protobuf_FileDescriptorProto_get(const void * owner)6373 const upb_msgdef *upbdefs_google_protobuf_FileDescriptorProto_get(const void *owner) { return refm(&msgs[9], owner); }
upbdefs_google_protobuf_FileDescriptorSet_get(const void * owner)6374 const upb_msgdef *upbdefs_google_protobuf_FileDescriptorSet_get(const void *owner) { return refm(&msgs[10], owner); }
upbdefs_google_protobuf_FileOptions_get(const void * owner)6375 const upb_msgdef *upbdefs_google_protobuf_FileOptions_get(const void *owner) { return refm(&msgs[11], owner); }
upbdefs_google_protobuf_MessageOptions_get(const void * owner)6376 const upb_msgdef *upbdefs_google_protobuf_MessageOptions_get(const void *owner) { return refm(&msgs[12], owner); }
upbdefs_google_protobuf_MethodDescriptorProto_get(const void * owner)6377 const upb_msgdef *upbdefs_google_protobuf_MethodDescriptorProto_get(const void *owner) { return refm(&msgs[13], owner); }
upbdefs_google_protobuf_MethodOptions_get(const void * owner)6378 const upb_msgdef *upbdefs_google_protobuf_MethodOptions_get(const void *owner) { return refm(&msgs[14], owner); }
upbdefs_google_protobuf_OneofDescriptorProto_get(const void * owner)6379 const upb_msgdef *upbdefs_google_protobuf_OneofDescriptorProto_get(const void *owner) { return refm(&msgs[15], owner); }
upbdefs_google_protobuf_ServiceDescriptorProto_get(const void * owner)6380 const upb_msgdef *upbdefs_google_protobuf_ServiceDescriptorProto_get(const void *owner) { return refm(&msgs[16], owner); }
upbdefs_google_protobuf_ServiceOptions_get(const void * owner)6381 const upb_msgdef *upbdefs_google_protobuf_ServiceOptions_get(const void *owner) { return refm(&msgs[17], owner); }
upbdefs_google_protobuf_SourceCodeInfo_get(const void * owner)6382 const upb_msgdef *upbdefs_google_protobuf_SourceCodeInfo_get(const void *owner) { return refm(&msgs[18], owner); }
upbdefs_google_protobuf_SourceCodeInfo_Location_get(const void * owner)6383 const upb_msgdef *upbdefs_google_protobuf_SourceCodeInfo_Location_get(const void *owner) { return refm(&msgs[19], owner); }
upbdefs_google_protobuf_UninterpretedOption_get(const void * owner)6384 const upb_msgdef *upbdefs_google_protobuf_UninterpretedOption_get(const void *owner) { return refm(&msgs[20], owner); }
upbdefs_google_protobuf_UninterpretedOption_NamePart_get(const void * owner)6385 const upb_msgdef *upbdefs_google_protobuf_UninterpretedOption_NamePart_get(const void *owner) { return refm(&msgs[21], owner); }
6386 
upbdefs_google_protobuf_FieldDescriptorProto_Label_get(const void * owner)6387 const upb_enumdef *upbdefs_google_protobuf_FieldDescriptorProto_Label_get(const void *owner) { return refe(&enums[0], owner); }
upbdefs_google_protobuf_FieldDescriptorProto_Type_get(const void * owner)6388 const upb_enumdef *upbdefs_google_protobuf_FieldDescriptorProto_Type_get(const void *owner) { return refe(&enums[1], owner); }
upbdefs_google_protobuf_FieldOptions_CType_get(const void * owner)6389 const upb_enumdef *upbdefs_google_protobuf_FieldOptions_CType_get(const void *owner) { return refe(&enums[2], owner); }
upbdefs_google_protobuf_FieldOptions_JSType_get(const void * owner)6390 const upb_enumdef *upbdefs_google_protobuf_FieldOptions_JSType_get(const void *owner) { return refe(&enums[3], owner); }
upbdefs_google_protobuf_FileOptions_OptimizeMode_get(const void * owner)6391 const upb_enumdef *upbdefs_google_protobuf_FileOptions_OptimizeMode_get(const void *owner) { return refe(&enums[4], owner); }
6392 /*
6393 ** XXX: The routines in this file that consume a string do not currently
6394 ** support having the string span buffers.  In the future, as upb_sink and
6395 ** its buffering/sharing functionality evolve there should be an easy and
6396 ** idiomatic way of correctly handling this case.  For now, we accept this
6397 ** limitation since we currently only parse descriptors from single strings.
6398 */
6399 
6400 
6401 #include <errno.h>
6402 #include <stdlib.h>
6403 #include <string.h>
6404 
6405 /* Compares a NULL-terminated string with a non-NULL-terminated string. */
upb_streq(const char * str,const char * buf,size_t n)6406 static bool upb_streq(const char *str, const char *buf, size_t n) {
6407   return strlen(str) == n && memcmp(str, buf, n) == 0;
6408 }
6409 
6410 /* We keep a stack of all the messages scopes we are currently in, as well as
6411  * the top-level file scope.  This is necessary to correctly qualify the
6412  * definitions that are contained inside.  "name" tracks the name of the
6413  * message or package (a bare name -- not qualified by any enclosing scopes). */
6414 typedef struct {
6415   char *name;
6416   /* Index of the first def that is under this scope.  For msgdefs, the
6417    * msgdef itself is at start-1. */
6418   int start;
6419 } upb_descreader_frame;
6420 
6421 /* The maximum number of nested declarations that are allowed, ie.
6422  * message Foo {
6423  *   message Bar {
6424  *     message Baz {
6425  *     }
6426  *   }
6427  * }
6428  *
6429  * This is a resource limit that affects how big our runtime stack can grow.
6430  * TODO: make this a runtime-settable property of the Reader instance. */
6431 #define UPB_MAX_MESSAGE_NESTING 64
6432 
6433 struct upb_descreader {
6434   upb_sink sink;
6435   upb_inttable files;
6436   upb_filedef *file;  /* The last file in files. */
6437   upb_descreader_frame stack[UPB_MAX_MESSAGE_NESTING];
6438   int stack_len;
6439 
6440   uint32_t number;
6441   char *name;
6442   bool saw_number;
6443   bool saw_name;
6444 
6445   char *default_string;
6446 
6447   upb_fielddef *f;
6448 };
6449 
upb_strndup(const char * buf,size_t n)6450 static char *upb_strndup(const char *buf, size_t n) {
6451   char *ret = upb_gmalloc(n + 1);
6452   if (!ret) return NULL;
6453   memcpy(ret, buf, n);
6454   ret[n] = '\0';
6455   return ret;
6456 }
6457 
6458 /* Returns a newly allocated string that joins input strings together, for
6459  * example:
6460  *   join("Foo.Bar", "Baz") -> "Foo.Bar.Baz"
6461  *   join("", "Baz") -> "Baz"
6462  * Caller owns a ref on the returned string. */
upb_join(const char * base,const char * name)6463 static char *upb_join(const char *base, const char *name) {
6464   if (!base || strlen(base) == 0) {
6465     return upb_gstrdup(name);
6466   } else {
6467     char *ret = upb_gmalloc(strlen(base) + strlen(name) + 2);
6468     if (!ret) {
6469       return NULL;
6470     }
6471     ret[0] = '\0';
6472     strcat(ret, base);
6473     strcat(ret, ".");
6474     strcat(ret, name);
6475     return ret;
6476   }
6477 }
6478 
6479 /* Qualify the defname for all defs starting with offset "start" with "str". */
upb_descreader_qualify(upb_filedef * f,char * str,int32_t start)6480 static bool upb_descreader_qualify(upb_filedef *f, char *str, int32_t start) {
6481   size_t i;
6482   for (i = start; i < upb_filedef_defcount(f); i++) {
6483     upb_def *def = upb_filedef_mutabledef(f, i);
6484     char *name = upb_join(str, upb_def_fullname(def));
6485     if (!name) {
6486       /* Need better logic here; at this point we've qualified some names but
6487        * not others. */
6488       return false;
6489     }
6490     upb_def_setfullname(def, name, NULL);
6491     upb_gfree(name);
6492   }
6493   return true;
6494 }
6495 
6496 
6497 /* upb_descreader  ************************************************************/
6498 
upb_descreader_top(upb_descreader * r)6499 static upb_msgdef *upb_descreader_top(upb_descreader *r) {
6500   int index;
6501   assert(r->stack_len > 1);
6502   index = r->stack[r->stack_len-1].start - 1;
6503   assert(index >= 0);
6504   return upb_downcast_msgdef_mutable(upb_filedef_mutabledef(r->file, index));
6505 }
6506 
upb_descreader_last(upb_descreader * r)6507 static upb_def *upb_descreader_last(upb_descreader *r) {
6508   return upb_filedef_mutabledef(r->file, upb_filedef_defcount(r->file) - 1);
6509 }
6510 
6511 /* Start/end handlers for FileDescriptorProto and DescriptorProto (the two
6512  * entities that have names and can contain sub-definitions. */
upb_descreader_startcontainer(upb_descreader * r)6513 void upb_descreader_startcontainer(upb_descreader *r) {
6514   upb_descreader_frame *f = &r->stack[r->stack_len++];
6515   f->start = upb_filedef_defcount(r->file);
6516   f->name = NULL;
6517 }
6518 
upb_descreader_endcontainer(upb_descreader * r)6519 bool upb_descreader_endcontainer(upb_descreader *r) {
6520   upb_descreader_frame *f = &r->stack[--r->stack_len];
6521   if (!upb_descreader_qualify(r->file, f->name, f->start)) {
6522     return false;
6523   }
6524   upb_gfree(f->name);
6525   f->name = NULL;
6526   return true;
6527 }
6528 
upb_descreader_setscopename(upb_descreader * r,char * str)6529 void upb_descreader_setscopename(upb_descreader *r, char *str) {
6530   upb_descreader_frame *f = &r->stack[r->stack_len-1];
6531   upb_gfree(f->name);
6532   f->name = str;
6533 }
6534 
6535 /** Handlers for google.protobuf.FileDescriptorSet. ***************************/
6536 
fileset_startfile(void * closure,const void * hd)6537 static void *fileset_startfile(void *closure, const void *hd) {
6538   upb_descreader *r = closure;
6539   UPB_UNUSED(hd);
6540   r->file = upb_filedef_new(&r->files);
6541   upb_inttable_push(&r->files, upb_value_ptr(r->file));
6542   return r;
6543 }
6544 
6545 /** Handlers for google.protobuf.FileDescriptorProto. *************************/
6546 
file_start(void * closure,const void * hd)6547 static bool file_start(void *closure, const void *hd) {
6548   upb_descreader *r = closure;
6549   UPB_UNUSED(hd);
6550   upb_descreader_startcontainer(r);
6551   return true;
6552 }
6553 
file_end(void * closure,const void * hd,upb_status * status)6554 static bool file_end(void *closure, const void *hd, upb_status *status) {
6555   upb_descreader *r = closure;
6556   UPB_UNUSED(hd);
6557   UPB_UNUSED(status);
6558   return upb_descreader_endcontainer(r);
6559 }
6560 
file_onname(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)6561 static size_t file_onname(void *closure, const void *hd, const char *buf,
6562                           size_t n, const upb_bufhandle *handle) {
6563   upb_descreader *r = closure;
6564   char *name;
6565   bool ok;
6566   UPB_UNUSED(hd);
6567   UPB_UNUSED(handle);
6568 
6569   name = upb_strndup(buf, n);
6570   /* XXX: see comment at the top of the file. */
6571   ok = upb_filedef_setname(r->file, name, NULL);
6572   upb_gfree(name);
6573   UPB_ASSERT_VAR(ok, ok);
6574   return n;
6575 }
6576 
file_onpackage(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)6577 static size_t file_onpackage(void *closure, const void *hd, const char *buf,
6578                              size_t n, const upb_bufhandle *handle) {
6579   upb_descreader *r = closure;
6580   char *package;
6581   bool ok;
6582   UPB_UNUSED(hd);
6583   UPB_UNUSED(handle);
6584 
6585   package = upb_strndup(buf, n);
6586   /* XXX: see comment at the top of the file. */
6587   upb_descreader_setscopename(r, package);
6588   ok = upb_filedef_setpackage(r->file, package, NULL);
6589   UPB_ASSERT_VAR(ok, ok);
6590   return n;
6591 }
6592 
file_onsyntax(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)6593 static size_t file_onsyntax(void *closure, const void *hd, const char *buf,
6594                             size_t n, const upb_bufhandle *handle) {
6595   upb_descreader *r = closure;
6596   bool ok;
6597   UPB_UNUSED(hd);
6598   UPB_UNUSED(handle);
6599   /* XXX: see comment at the top of the file. */
6600   if (upb_streq("proto2", buf, n)) {
6601     ok = upb_filedef_setsyntax(r->file, UPB_SYNTAX_PROTO2, NULL);
6602   } else if (upb_streq("proto3", buf, n)) {
6603     ok = upb_filedef_setsyntax(r->file, UPB_SYNTAX_PROTO3, NULL);
6604   } else {
6605     ok = false;
6606   }
6607 
6608   UPB_ASSERT_VAR(ok, ok);
6609   return n;
6610 }
6611 
file_startmsg(void * closure,const void * hd)6612 static void *file_startmsg(void *closure, const void *hd) {
6613   upb_descreader *r = closure;
6614   upb_msgdef *m = upb_msgdef_new(&m);
6615   bool ok = upb_filedef_addmsg(r->file, m, &m, NULL);
6616   UPB_UNUSED(hd);
6617   UPB_ASSERT_VAR(ok, ok);
6618   return r;
6619 }
6620 
file_startenum(void * closure,const void * hd)6621 static void *file_startenum(void *closure, const void *hd) {
6622   upb_descreader *r = closure;
6623   upb_enumdef *e = upb_enumdef_new(&e);
6624   bool ok = upb_filedef_addenum(r->file, e, &e, NULL);
6625   UPB_UNUSED(hd);
6626   UPB_ASSERT_VAR(ok, ok);
6627   return r;
6628 }
6629 
file_startext(void * closure,const void * hd)6630 static void *file_startext(void *closure, const void *hd) {
6631   upb_descreader *r = closure;
6632   bool ok;
6633   r->f = upb_fielddef_new(r);
6634   ok = upb_filedef_addext(r->file, r->f, r, NULL);
6635   UPB_UNUSED(hd);
6636   UPB_ASSERT_VAR(ok, ok);
6637   return r;
6638 }
6639 
6640 /** Handlers for google.protobuf.EnumValueDescriptorProto. *********************/
6641 
enumval_startmsg(void * closure,const void * hd)6642 static bool enumval_startmsg(void *closure, const void *hd) {
6643   upb_descreader *r = closure;
6644   UPB_UNUSED(hd);
6645   r->saw_number = false;
6646   r->saw_name = false;
6647   return true;
6648 }
6649 
enumval_onname(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)6650 static size_t enumval_onname(void *closure, const void *hd, const char *buf,
6651                              size_t n, const upb_bufhandle *handle) {
6652   upb_descreader *r = closure;
6653   UPB_UNUSED(hd);
6654   UPB_UNUSED(handle);
6655   /* XXX: see comment at the top of the file. */
6656   upb_gfree(r->name);
6657   r->name = upb_strndup(buf, n);
6658   r->saw_name = true;
6659   return n;
6660 }
6661 
enumval_onnumber(void * closure,const void * hd,int32_t val)6662 static bool enumval_onnumber(void *closure, const void *hd, int32_t val) {
6663   upb_descreader *r = closure;
6664   UPB_UNUSED(hd);
6665   r->number = val;
6666   r->saw_number = true;
6667   return true;
6668 }
6669 
enumval_endmsg(void * closure,const void * hd,upb_status * status)6670 static bool enumval_endmsg(void *closure, const void *hd, upb_status *status) {
6671   upb_descreader *r = closure;
6672   upb_enumdef *e;
6673   UPB_UNUSED(hd);
6674 
6675   if(!r->saw_number || !r->saw_name) {
6676     upb_status_seterrmsg(status, "Enum value missing name or number.");
6677     return false;
6678   }
6679   e = upb_downcast_enumdef_mutable(upb_descreader_last(r));
6680   upb_enumdef_addval(e, r->name, r->number, status);
6681   upb_gfree(r->name);
6682   r->name = NULL;
6683   return true;
6684 }
6685 
6686 /** Handlers for google.protobuf.EnumDescriptorProto. *************************/
6687 
enum_endmsg(void * closure,const void * hd,upb_status * status)6688 static bool enum_endmsg(void *closure, const void *hd, upb_status *status) {
6689   upb_descreader *r = closure;
6690   upb_enumdef *e;
6691   UPB_UNUSED(hd);
6692 
6693   e = upb_downcast_enumdef_mutable(upb_descreader_last(r));
6694   if (upb_def_fullname(upb_descreader_last(r)) == NULL) {
6695     upb_status_seterrmsg(status, "Enum had no name.");
6696     return false;
6697   }
6698   if (upb_enumdef_numvals(e) == 0) {
6699     upb_status_seterrmsg(status, "Enum had no values.");
6700     return false;
6701   }
6702   return true;
6703 }
6704 
enum_onname(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)6705 static size_t enum_onname(void *closure, const void *hd, const char *buf,
6706                           size_t n, const upb_bufhandle *handle) {
6707   upb_descreader *r = closure;
6708   char *fullname = upb_strndup(buf, n);
6709   UPB_UNUSED(hd);
6710   UPB_UNUSED(handle);
6711   /* XXX: see comment at the top of the file. */
6712   upb_def_setfullname(upb_descreader_last(r), fullname, NULL);
6713   upb_gfree(fullname);
6714   return n;
6715 }
6716 
6717 /** Handlers for google.protobuf.FieldDescriptorProto *************************/
6718 
field_startmsg(void * closure,const void * hd)6719 static bool field_startmsg(void *closure, const void *hd) {
6720   upb_descreader *r = closure;
6721   UPB_UNUSED(hd);
6722   assert(r->f);
6723   upb_gfree(r->default_string);
6724   r->default_string = NULL;
6725 
6726   /* fielddefs default to packed, but descriptors default to non-packed. */
6727   upb_fielddef_setpacked(r->f, false);
6728   return true;
6729 }
6730 
6731 /* Converts the default value in string "str" into "d".  Passes a ref on str.
6732  * Returns true on success. */
parse_default(char * str,upb_fielddef * f)6733 static bool parse_default(char *str, upb_fielddef *f) {
6734   bool success = true;
6735   char *end;
6736   switch (upb_fielddef_type(f)) {
6737     case UPB_TYPE_INT32: {
6738       long val = strtol(str, &end, 0);
6739       if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end)
6740         success = false;
6741       else
6742         upb_fielddef_setdefaultint32(f, val);
6743       break;
6744     }
6745     case UPB_TYPE_INT64: {
6746       /* XXX: Need to write our own strtoll, since it's not available in c89. */
6747       long long val = strtol(str, &end, 0);
6748       if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end)
6749         success = false;
6750       else
6751         upb_fielddef_setdefaultint64(f, val);
6752       break;
6753     }
6754     case UPB_TYPE_UINT32: {
6755       unsigned long val = strtoul(str, &end, 0);
6756       if (val > UINT32_MAX || errno == ERANGE || *end)
6757         success = false;
6758       else
6759         upb_fielddef_setdefaultuint32(f, val);
6760       break;
6761     }
6762     case UPB_TYPE_UINT64: {
6763       /* XXX: Need to write our own strtoull, since it's not available in c89. */
6764       unsigned long long val = strtoul(str, &end, 0);
6765       if (val > UINT64_MAX || errno == ERANGE || *end)
6766         success = false;
6767       else
6768         upb_fielddef_setdefaultuint64(f, val);
6769       break;
6770     }
6771     case UPB_TYPE_DOUBLE: {
6772       double val = strtod(str, &end);
6773       if (errno == ERANGE || *end)
6774         success = false;
6775       else
6776         upb_fielddef_setdefaultdouble(f, val);
6777       break;
6778     }
6779     case UPB_TYPE_FLOAT: {
6780       /* XXX: Need to write our own strtof, since it's not available in c89. */
6781       float val = strtod(str, &end);
6782       if (errno == ERANGE || *end)
6783         success = false;
6784       else
6785         upb_fielddef_setdefaultfloat(f, val);
6786       break;
6787     }
6788     case UPB_TYPE_BOOL: {
6789       if (strcmp(str, "false") == 0)
6790         upb_fielddef_setdefaultbool(f, false);
6791       else if (strcmp(str, "true") == 0)
6792         upb_fielddef_setdefaultbool(f, true);
6793       else
6794         success = false;
6795       break;
6796     }
6797     default: abort();
6798   }
6799   return success;
6800 }
6801 
field_endmsg(void * closure,const void * hd,upb_status * status)6802 static bool field_endmsg(void *closure, const void *hd, upb_status *status) {
6803   upb_descreader *r = closure;
6804   upb_fielddef *f = r->f;
6805   UPB_UNUSED(hd);
6806 
6807   /* TODO: verify that all required fields were present. */
6808   assert(upb_fielddef_number(f) != 0);
6809   assert(upb_fielddef_name(f) != NULL);
6810   assert((upb_fielddef_subdefname(f) != NULL) == upb_fielddef_hassubdef(f));
6811 
6812   if (r->default_string) {
6813     if (upb_fielddef_issubmsg(f)) {
6814       upb_status_seterrmsg(status, "Submessages cannot have defaults.");
6815       return false;
6816     }
6817     if (upb_fielddef_isstring(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM) {
6818       upb_fielddef_setdefaultcstr(f, r->default_string, NULL);
6819     } else {
6820       if (r->default_string && !parse_default(r->default_string, f)) {
6821         /* We don't worry too much about giving a great error message since the
6822          * compiler should have ensured this was correct. */
6823         upb_status_seterrmsg(status, "Error converting default value.");
6824         return false;
6825       }
6826     }
6827   }
6828   return true;
6829 }
6830 
field_onlazy(void * closure,const void * hd,bool val)6831 static bool field_onlazy(void *closure, const void *hd, bool val) {
6832   upb_descreader *r = closure;
6833   UPB_UNUSED(hd);
6834 
6835   upb_fielddef_setlazy(r->f, val);
6836   return true;
6837 }
6838 
field_onpacked(void * closure,const void * hd,bool val)6839 static bool field_onpacked(void *closure, const void *hd, bool val) {
6840   upb_descreader *r = closure;
6841   UPB_UNUSED(hd);
6842 
6843   upb_fielddef_setpacked(r->f, val);
6844   return true;
6845 }
6846 
field_ontype(void * closure,const void * hd,int32_t val)6847 static bool field_ontype(void *closure, const void *hd, int32_t val) {
6848   upb_descreader *r = closure;
6849   UPB_UNUSED(hd);
6850 
6851   upb_fielddef_setdescriptortype(r->f, val);
6852   return true;
6853 }
6854 
field_onlabel(void * closure,const void * hd,int32_t val)6855 static bool field_onlabel(void *closure, const void *hd, int32_t val) {
6856   upb_descreader *r = closure;
6857   UPB_UNUSED(hd);
6858 
6859   upb_fielddef_setlabel(r->f, val);
6860   return true;
6861 }
6862 
field_onnumber(void * closure,const void * hd,int32_t val)6863 static bool field_onnumber(void *closure, const void *hd, int32_t val) {
6864   upb_descreader *r = closure;
6865   bool ok;
6866   UPB_UNUSED(hd);
6867 
6868   ok = upb_fielddef_setnumber(r->f, val, NULL);
6869   UPB_ASSERT_VAR(ok, ok);
6870   return true;
6871 }
6872 
field_onname(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)6873 static size_t field_onname(void *closure, const void *hd, const char *buf,
6874                            size_t n, const upb_bufhandle *handle) {
6875   upb_descreader *r = closure;
6876   char *name = upb_strndup(buf, n);
6877   UPB_UNUSED(hd);
6878   UPB_UNUSED(handle);
6879 
6880   /* XXX: see comment at the top of the file. */
6881   upb_fielddef_setname(r->f, name, NULL);
6882   upb_gfree(name);
6883   return n;
6884 }
6885 
field_ontypename(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)6886 static size_t field_ontypename(void *closure, const void *hd, const char *buf,
6887                                size_t n, const upb_bufhandle *handle) {
6888   upb_descreader *r = closure;
6889   char *name = upb_strndup(buf, n);
6890   UPB_UNUSED(hd);
6891   UPB_UNUSED(handle);
6892 
6893   /* XXX: see comment at the top of the file. */
6894   upb_fielddef_setsubdefname(r->f, name, NULL);
6895   upb_gfree(name);
6896   return n;
6897 }
6898 
field_onextendee(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)6899 static size_t field_onextendee(void *closure, const void *hd, const char *buf,
6900                                size_t n, const upb_bufhandle *handle) {
6901   upb_descreader *r = closure;
6902   char *name = upb_strndup(buf, n);
6903   UPB_UNUSED(hd);
6904   UPB_UNUSED(handle);
6905 
6906   /* XXX: see comment at the top of the file. */
6907   upb_fielddef_setcontainingtypename(r->f, name, NULL);
6908   upb_gfree(name);
6909   return n;
6910 }
6911 
field_ondefaultval(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)6912 static size_t field_ondefaultval(void *closure, const void *hd, const char *buf,
6913                                  size_t n, const upb_bufhandle *handle) {
6914   upb_descreader *r = closure;
6915   UPB_UNUSED(hd);
6916   UPB_UNUSED(handle);
6917 
6918   /* Have to convert from string to the correct type, but we might not know the
6919    * type yet, so we save it as a string until the end of the field.
6920    * XXX: see comment at the top of the file. */
6921   upb_gfree(r->default_string);
6922   r->default_string = upb_strndup(buf, n);
6923   return n;
6924 }
6925 
6926 /** Handlers for google.protobuf.DescriptorProto ******************************/
6927 
msg_start(void * closure,const void * hd)6928 static bool msg_start(void *closure, const void *hd) {
6929   upb_descreader *r = closure;
6930   UPB_UNUSED(hd);
6931 
6932   upb_descreader_startcontainer(r);
6933   return true;
6934 }
6935 
msg_end(void * closure,const void * hd,upb_status * status)6936 static bool msg_end(void *closure, const void *hd, upb_status *status) {
6937   upb_descreader *r = closure;
6938   upb_msgdef *m = upb_descreader_top(r);
6939   UPB_UNUSED(hd);
6940 
6941   if(!upb_def_fullname(upb_msgdef_upcast_mutable(m))) {
6942     upb_status_seterrmsg(status, "Encountered message with no name.");
6943     return false;
6944   }
6945   return upb_descreader_endcontainer(r);
6946 }
6947 
msg_name(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)6948 static size_t msg_name(void *closure, const void *hd, const char *buf,
6949                        size_t n, const upb_bufhandle *handle) {
6950   upb_descreader *r = closure;
6951   upb_msgdef *m = upb_descreader_top(r);
6952   /* XXX: see comment at the top of the file. */
6953   char *name = upb_strndup(buf, n);
6954   UPB_UNUSED(hd);
6955   UPB_UNUSED(handle);
6956 
6957   upb_def_setfullname(upb_msgdef_upcast_mutable(m), name, NULL);
6958   upb_descreader_setscopename(r, name);  /* Passes ownership of name. */
6959   return n;
6960 }
6961 
msg_startmsg(void * closure,const void * hd)6962 static void *msg_startmsg(void *closure, const void *hd) {
6963   upb_descreader *r = closure;
6964   upb_msgdef *m = upb_msgdef_new(&m);
6965   bool ok = upb_filedef_addmsg(r->file, m, &m, NULL);
6966   UPB_UNUSED(hd);
6967   UPB_ASSERT_VAR(ok, ok);
6968   return r;
6969 }
6970 
msg_startext(void * closure,const void * hd)6971 static void *msg_startext(void *closure, const void *hd) {
6972   upb_descreader *r = closure;
6973   upb_fielddef *f = upb_fielddef_new(&f);
6974   bool ok = upb_filedef_addext(r->file, f, &f, NULL);
6975   UPB_UNUSED(hd);
6976   UPB_ASSERT_VAR(ok, ok);
6977   return r;
6978 }
6979 
msg_startfield(void * closure,const void * hd)6980 static void *msg_startfield(void *closure, const void *hd) {
6981   upb_descreader *r = closure;
6982   r->f = upb_fielddef_new(&r->f);
6983   /* We can't add the new field to the message until its name/number are
6984    * filled in. */
6985   UPB_UNUSED(hd);
6986   return r;
6987 }
6988 
msg_endfield(void * closure,const void * hd)6989 static bool msg_endfield(void *closure, const void *hd) {
6990   upb_descreader *r = closure;
6991   upb_msgdef *m = upb_descreader_top(r);
6992   UPB_UNUSED(hd);
6993 
6994   upb_msgdef_addfield(m, r->f, &r->f, NULL);
6995   r->f = NULL;
6996   return true;
6997 }
6998 
msg_onmapentry(void * closure,const void * hd,bool mapentry)6999 static bool msg_onmapentry(void *closure, const void *hd, bool mapentry) {
7000   upb_descreader *r = closure;
7001   upb_msgdef *m = upb_descreader_top(r);
7002   UPB_UNUSED(hd);
7003 
7004   upb_msgdef_setmapentry(m, mapentry);
7005   r->f = NULL;
7006   return true;
7007 }
7008 
7009 
7010 
7011 /** Code to register handlers *************************************************/
7012 
7013 #define F(msg, field) upbdefs_google_protobuf_ ## msg ## _f_ ## field(m)
7014 
reghandlers(const void * closure,upb_handlers * h)7015 static void reghandlers(const void *closure, upb_handlers *h) {
7016   const upb_msgdef *m = upb_handlers_msgdef(h);
7017   UPB_UNUSED(closure);
7018 
7019   if (upbdefs_google_protobuf_FileDescriptorSet_is(m)) {
7020     upb_handlers_setstartsubmsg(h, F(FileDescriptorSet, file),
7021                                 &fileset_startfile, NULL);
7022   } else if (upbdefs_google_protobuf_DescriptorProto_is(m)) {
7023     upb_handlers_setstartmsg(h, &msg_start, NULL);
7024     upb_handlers_setendmsg(h, &msg_end, NULL);
7025     upb_handlers_setstring(h, F(DescriptorProto, name), &msg_name, NULL);
7026     upb_handlers_setstartsubmsg(h, F(DescriptorProto, extension), &msg_startext,
7027                                 NULL);
7028     upb_handlers_setstartsubmsg(h, F(DescriptorProto, nested_type),
7029                                 &msg_startmsg, NULL);
7030     upb_handlers_setstartsubmsg(h, F(DescriptorProto, field),
7031                                 &msg_startfield, NULL);
7032     upb_handlers_setendsubmsg(h, F(DescriptorProto, field),
7033                               &msg_endfield, NULL);
7034     upb_handlers_setstartsubmsg(h, F(DescriptorProto, enum_type),
7035                                 &file_startenum, NULL);
7036   } else if (upbdefs_google_protobuf_FileDescriptorProto_is(m)) {
7037     upb_handlers_setstartmsg(h, &file_start, NULL);
7038     upb_handlers_setendmsg(h, &file_end, NULL);
7039     upb_handlers_setstring(h, F(FileDescriptorProto, name), &file_onname,
7040                            NULL);
7041     upb_handlers_setstring(h, F(FileDescriptorProto, package), &file_onpackage,
7042                            NULL);
7043     upb_handlers_setstring(h, F(FileDescriptorProto, syntax), &file_onsyntax,
7044                            NULL);
7045     upb_handlers_setstartsubmsg(h, F(FileDescriptorProto, message_type),
7046                                 &file_startmsg, NULL);
7047     upb_handlers_setstartsubmsg(h, F(FileDescriptorProto, enum_type),
7048                                 &file_startenum, NULL);
7049     upb_handlers_setstartsubmsg(h, F(FileDescriptorProto, extension),
7050                                 &file_startext, NULL);
7051   } else if (upbdefs_google_protobuf_EnumValueDescriptorProto_is(m)) {
7052     upb_handlers_setstartmsg(h, &enumval_startmsg, NULL);
7053     upb_handlers_setendmsg(h, &enumval_endmsg, NULL);
7054     upb_handlers_setstring(h, F(EnumValueDescriptorProto, name), &enumval_onname, NULL);
7055     upb_handlers_setint32(h, F(EnumValueDescriptorProto, number), &enumval_onnumber,
7056                           NULL);
7057   } else if (upbdefs_google_protobuf_EnumDescriptorProto_is(m)) {
7058     upb_handlers_setendmsg(h, &enum_endmsg, NULL);
7059     upb_handlers_setstring(h, F(EnumDescriptorProto, name), &enum_onname, NULL);
7060   } else if (upbdefs_google_protobuf_FieldDescriptorProto_is(m)) {
7061     upb_handlers_setstartmsg(h, &field_startmsg, NULL);
7062     upb_handlers_setendmsg(h, &field_endmsg, NULL);
7063     upb_handlers_setint32(h, F(FieldDescriptorProto, type), &field_ontype,
7064                           NULL);
7065     upb_handlers_setint32(h, F(FieldDescriptorProto, label), &field_onlabel,
7066                           NULL);
7067     upb_handlers_setint32(h, F(FieldDescriptorProto, number), &field_onnumber,
7068                           NULL);
7069     upb_handlers_setstring(h, F(FieldDescriptorProto, name), &field_onname,
7070                            NULL);
7071     upb_handlers_setstring(h, F(FieldDescriptorProto, type_name),
7072                            &field_ontypename, NULL);
7073     upb_handlers_setstring(h, F(FieldDescriptorProto, extendee),
7074                            &field_onextendee, NULL);
7075     upb_handlers_setstring(h, F(FieldDescriptorProto, default_value),
7076                            &field_ondefaultval, NULL);
7077   } else if (upbdefs_google_protobuf_FieldOptions_is(m)) {
7078     upb_handlers_setbool(h, F(FieldOptions, lazy), &field_onlazy, NULL);
7079     upb_handlers_setbool(h, F(FieldOptions, packed), &field_onpacked, NULL);
7080   } else if (upbdefs_google_protobuf_MessageOptions_is(m)) {
7081     upb_handlers_setbool(h, F(MessageOptions, map_entry), &msg_onmapentry, NULL);
7082   }
7083 
7084   assert(upb_ok(upb_handlers_status(h)));
7085 }
7086 
7087 #undef F
7088 
descreader_cleanup(void * _r)7089 void descreader_cleanup(void *_r) {
7090   upb_descreader *r = _r;
7091   size_t i;
7092 
7093   for (i = 0; i < upb_descreader_filecount(r); i++) {
7094     upb_filedef_unref(upb_descreader_file(r, i), &r->files);
7095   }
7096 
7097   upb_gfree(r->name);
7098   upb_inttable_uninit(&r->files);
7099   upb_gfree(r->default_string);
7100   while (r->stack_len > 0) {
7101     upb_descreader_frame *f = &r->stack[--r->stack_len];
7102     upb_gfree(f->name);
7103   }
7104 }
7105 
7106 
7107 /* Public API  ****************************************************************/
7108 
upb_descreader_create(upb_env * e,const upb_handlers * h)7109 upb_descreader *upb_descreader_create(upb_env *e, const upb_handlers *h) {
7110   upb_descreader *r = upb_env_malloc(e, sizeof(upb_descreader));
7111   if (!r || !upb_env_addcleanup(e, descreader_cleanup, r)) {
7112     return NULL;
7113   }
7114 
7115   upb_inttable_init(&r->files, UPB_CTYPE_PTR);
7116   upb_sink_reset(upb_descreader_input(r), h, r);
7117   r->stack_len = 0;
7118   r->name = NULL;
7119   r->default_string = NULL;
7120 
7121   return r;
7122 }
7123 
upb_descreader_filecount(const upb_descreader * r)7124 size_t upb_descreader_filecount(const upb_descreader *r) {
7125   return upb_inttable_count(&r->files);
7126 }
7127 
upb_descreader_file(const upb_descreader * r,size_t i)7128 upb_filedef *upb_descreader_file(const upb_descreader *r, size_t i) {
7129   upb_value v;
7130   if (upb_inttable_lookup(&r->files, i, &v)) {
7131     return upb_value_getptr(v);
7132   } else {
7133     return NULL;
7134   }
7135 }
7136 
upb_descreader_input(upb_descreader * r)7137 upb_sink *upb_descreader_input(upb_descreader *r) {
7138   return &r->sink;
7139 }
7140 
upb_descreader_newhandlers(const void * owner)7141 const upb_handlers *upb_descreader_newhandlers(const void *owner) {
7142   const upb_msgdef *m = upbdefs_google_protobuf_FileDescriptorSet_get(&m);
7143   const upb_handlers *h = upb_handlers_newfrozen(m, owner, reghandlers, NULL);
7144   upb_msgdef_unref(m, &m);
7145   return h;
7146 }
7147 /*
7148 ** protobuf decoder bytecode compiler
7149 **
7150 ** Code to compile a upb::Handlers into bytecode for decoding a protobuf
7151 ** according to that specific schema and destination handlers.
7152 **
7153 ** Compiling to bytecode is always the first step.  If we are using the
7154 ** interpreted decoder we leave it as bytecode and interpret that.  If we are
7155 ** using a JIT decoder we use a code generator to turn the bytecode into native
7156 ** code, LLVM IR, etc.
7157 **
7158 ** Bytecode definition is in decoder.int.h.
7159 */
7160 
7161 #include <stdarg.h>
7162 
7163 #ifdef UPB_DUMP_BYTECODE
7164 #include <stdio.h>
7165 #endif
7166 
7167 #define MAXLABEL 5
7168 #define EMPTYLABEL -1
7169 
7170 /* mgroup *********************************************************************/
7171 
freegroup(upb_refcounted * r)7172 static void freegroup(upb_refcounted *r) {
7173   mgroup *g = (mgroup*)r;
7174   upb_inttable_uninit(&g->methods);
7175 #ifdef UPB_USE_JIT_X64
7176   upb_pbdecoder_freejit(g);
7177 #endif
7178   upb_gfree(g->bytecode);
7179   upb_gfree(g);
7180 }
7181 
visitgroup(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)7182 static void visitgroup(const upb_refcounted *r, upb_refcounted_visit *visit,
7183                        void *closure) {
7184   const mgroup *g = (const mgroup*)r;
7185   upb_inttable_iter i;
7186   upb_inttable_begin(&i, &g->methods);
7187   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
7188     upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
7189     visit(r, upb_pbdecodermethod_upcast(method), closure);
7190   }
7191 }
7192 
newgroup(const void * owner)7193 mgroup *newgroup(const void *owner) {
7194   mgroup *g = upb_gmalloc(sizeof(*g));
7195   static const struct upb_refcounted_vtbl vtbl = {visitgroup, freegroup};
7196   upb_refcounted_init(mgroup_upcast_mutable(g), &vtbl, owner);
7197   upb_inttable_init(&g->methods, UPB_CTYPE_PTR);
7198   g->bytecode = NULL;
7199   g->bytecode_end = NULL;
7200   return g;
7201 }
7202 
7203 
7204 /* upb_pbdecodermethod ********************************************************/
7205 
freemethod(upb_refcounted * r)7206 static void freemethod(upb_refcounted *r) {
7207   upb_pbdecodermethod *method = (upb_pbdecodermethod*)r;
7208 
7209   if (method->dest_handlers_) {
7210     upb_handlers_unref(method->dest_handlers_, method);
7211   }
7212 
7213   upb_inttable_uninit(&method->dispatch);
7214   upb_gfree(method);
7215 }
7216 
visitmethod(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)7217 static void visitmethod(const upb_refcounted *r, upb_refcounted_visit *visit,
7218                         void *closure) {
7219   const upb_pbdecodermethod *m = (const upb_pbdecodermethod*)r;
7220   visit(r, m->group, closure);
7221 }
7222 
newmethod(const upb_handlers * dest_handlers,mgroup * group)7223 static upb_pbdecodermethod *newmethod(const upb_handlers *dest_handlers,
7224                                       mgroup *group) {
7225   static const struct upb_refcounted_vtbl vtbl = {visitmethod, freemethod};
7226   upb_pbdecodermethod *ret = upb_gmalloc(sizeof(*ret));
7227   upb_refcounted_init(upb_pbdecodermethod_upcast_mutable(ret), &vtbl, &ret);
7228   upb_byteshandler_init(&ret->input_handler_);
7229 
7230   /* The method references the group and vice-versa, in a circular reference. */
7231   upb_ref2(ret, group);
7232   upb_ref2(group, ret);
7233   upb_inttable_insertptr(&group->methods, dest_handlers, upb_value_ptr(ret));
7234   upb_pbdecodermethod_unref(ret, &ret);
7235 
7236   ret->group = mgroup_upcast_mutable(group);
7237   ret->dest_handlers_ = dest_handlers;
7238   ret->is_native_ = false;  /* If we JIT, it will update this later. */
7239   upb_inttable_init(&ret->dispatch, UPB_CTYPE_UINT64);
7240 
7241   if (ret->dest_handlers_) {
7242     upb_handlers_ref(ret->dest_handlers_, ret);
7243   }
7244   return ret;
7245 }
7246 
upb_pbdecodermethod_desthandlers(const upb_pbdecodermethod * m)7247 const upb_handlers *upb_pbdecodermethod_desthandlers(
7248     const upb_pbdecodermethod *m) {
7249   return m->dest_handlers_;
7250 }
7251 
upb_pbdecodermethod_inputhandler(const upb_pbdecodermethod * m)7252 const upb_byteshandler *upb_pbdecodermethod_inputhandler(
7253     const upb_pbdecodermethod *m) {
7254   return &m->input_handler_;
7255 }
7256 
upb_pbdecodermethod_isnative(const upb_pbdecodermethod * m)7257 bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m) {
7258   return m->is_native_;
7259 }
7260 
upb_pbdecodermethod_new(const upb_pbdecodermethodopts * opts,const void * owner)7261 const upb_pbdecodermethod *upb_pbdecodermethod_new(
7262     const upb_pbdecodermethodopts *opts, const void *owner) {
7263   const upb_pbdecodermethod *ret;
7264   upb_pbcodecache cache;
7265 
7266   upb_pbcodecache_init(&cache);
7267   ret = upb_pbcodecache_getdecodermethod(&cache, opts);
7268   upb_pbdecodermethod_ref(ret, owner);
7269   upb_pbcodecache_uninit(&cache);
7270   return ret;
7271 }
7272 
7273 
7274 /* bytecode compiler **********************************************************/
7275 
7276 /* Data used only at compilation time. */
7277 typedef struct {
7278   mgroup *group;
7279 
7280   uint32_t *pc;
7281   int fwd_labels[MAXLABEL];
7282   int back_labels[MAXLABEL];
7283 
7284   /* For fields marked "lazy", parse them lazily or eagerly? */
7285   bool lazy;
7286 } compiler;
7287 
newcompiler(mgroup * group,bool lazy)7288 static compiler *newcompiler(mgroup *group, bool lazy) {
7289   compiler *ret = upb_gmalloc(sizeof(*ret));
7290   int i;
7291 
7292   ret->group = group;
7293   ret->lazy = lazy;
7294   for (i = 0; i < MAXLABEL; i++) {
7295     ret->fwd_labels[i] = EMPTYLABEL;
7296     ret->back_labels[i] = EMPTYLABEL;
7297   }
7298   return ret;
7299 }
7300 
freecompiler(compiler * c)7301 static void freecompiler(compiler *c) {
7302   upb_gfree(c);
7303 }
7304 
7305 const size_t ptr_words = sizeof(void*) / sizeof(uint32_t);
7306 
7307 /* How many words an instruction is. */
instruction_len(uint32_t instr)7308 static int instruction_len(uint32_t instr) {
7309   switch (getop(instr)) {
7310     case OP_SETDISPATCH: return 1 + ptr_words;
7311     case OP_TAGN: return 3;
7312     case OP_SETBIGGROUPNUM: return 2;
7313     default: return 1;
7314   }
7315 }
7316 
op_has_longofs(int32_t instruction)7317 bool op_has_longofs(int32_t instruction) {
7318   switch (getop(instruction)) {
7319     case OP_CALL:
7320     case OP_BRANCH:
7321     case OP_CHECKDELIM:
7322       return true;
7323     /* The "tag" instructions only have 8 bytes available for the jump target,
7324      * but that is ok because these opcodes only require short jumps. */
7325     case OP_TAG1:
7326     case OP_TAG2:
7327     case OP_TAGN:
7328       return false;
7329     default:
7330       assert(false);
7331       return false;
7332   }
7333 }
7334 
getofs(uint32_t instruction)7335 static int32_t getofs(uint32_t instruction) {
7336   if (op_has_longofs(instruction)) {
7337     return (int32_t)instruction >> 8;
7338   } else {
7339     return (int8_t)(instruction >> 8);
7340   }
7341 }
7342 
setofs(uint32_t * instruction,int32_t ofs)7343 static void setofs(uint32_t *instruction, int32_t ofs) {
7344   if (op_has_longofs(*instruction)) {
7345     *instruction = getop(*instruction) | ofs << 8;
7346   } else {
7347     *instruction = (*instruction & ~0xff00) | ((ofs & 0xff) << 8);
7348   }
7349   assert(getofs(*instruction) == ofs);  /* Would fail in cases of overflow. */
7350 }
7351 
pcofs(compiler * c)7352 static uint32_t pcofs(compiler *c) { return c->pc - c->group->bytecode; }
7353 
7354 /* Defines a local label at the current PC location.  All previous forward
7355  * references are updated to point to this location.  The location is noted
7356  * for any future backward references. */
label(compiler * c,unsigned int label)7357 static void label(compiler *c, unsigned int label) {
7358   int val;
7359   uint32_t *codep;
7360 
7361   assert(label < MAXLABEL);
7362   val = c->fwd_labels[label];
7363   codep = (val == EMPTYLABEL) ? NULL : c->group->bytecode + val;
7364   while (codep) {
7365     int ofs = getofs(*codep);
7366     setofs(codep, c->pc - codep - instruction_len(*codep));
7367     codep = ofs ? codep + ofs : NULL;
7368   }
7369   c->fwd_labels[label] = EMPTYLABEL;
7370   c->back_labels[label] = pcofs(c);
7371 }
7372 
7373 /* Creates a reference to a numbered label; either a forward reference
7374  * (positive arg) or backward reference (negative arg).  For forward references
7375  * the value returned now is actually a "next" pointer into a linked list of all
7376  * instructions that use this label and will be patched later when the label is
7377  * defined with label().
7378  *
7379  * The returned value is the offset that should be written into the instruction.
7380  */
labelref(compiler * c,int label)7381 static int32_t labelref(compiler *c, int label) {
7382   assert(label < MAXLABEL);
7383   if (label == LABEL_DISPATCH) {
7384     /* No resolving required. */
7385     return 0;
7386   } else if (label < 0) {
7387     /* Backward local label.  Relative to the next instruction. */
7388     uint32_t from = (c->pc + 1) - c->group->bytecode;
7389     return c->back_labels[-label] - from;
7390   } else {
7391     /* Forward local label: prepend to (possibly-empty) linked list. */
7392     int *lptr = &c->fwd_labels[label];
7393     int32_t ret = (*lptr == EMPTYLABEL) ? 0 : *lptr - pcofs(c);
7394     *lptr = pcofs(c);
7395     return ret;
7396   }
7397 }
7398 
put32(compiler * c,uint32_t v)7399 static void put32(compiler *c, uint32_t v) {
7400   mgroup *g = c->group;
7401   if (c->pc == g->bytecode_end) {
7402     int ofs = pcofs(c);
7403     size_t oldsize = g->bytecode_end - g->bytecode;
7404     size_t newsize = UPB_MAX(oldsize * 2, 64);
7405     /* TODO(haberman): handle OOM. */
7406     g->bytecode = upb_grealloc(g->bytecode, oldsize * sizeof(uint32_t),
7407                                             newsize * sizeof(uint32_t));
7408     g->bytecode_end = g->bytecode + newsize;
7409     c->pc = g->bytecode + ofs;
7410   }
7411   *c->pc++ = v;
7412 }
7413 
putop(compiler * c,opcode op,...)7414 static void putop(compiler *c, opcode op, ...) {
7415   va_list ap;
7416   va_start(ap, op);
7417 
7418   switch (op) {
7419     case OP_SETDISPATCH: {
7420       uintptr_t ptr = (uintptr_t)va_arg(ap, void*);
7421       put32(c, OP_SETDISPATCH);
7422       put32(c, ptr);
7423       if (sizeof(uintptr_t) > sizeof(uint32_t))
7424         put32(c, (uint64_t)ptr >> 32);
7425       break;
7426     }
7427     case OP_STARTMSG:
7428     case OP_ENDMSG:
7429     case OP_PUSHLENDELIM:
7430     case OP_POP:
7431     case OP_SETDELIM:
7432     case OP_HALT:
7433     case OP_RET:
7434     case OP_DISPATCH:
7435       put32(c, op);
7436       break;
7437     case OP_PARSE_DOUBLE:
7438     case OP_PARSE_FLOAT:
7439     case OP_PARSE_INT64:
7440     case OP_PARSE_UINT64:
7441     case OP_PARSE_INT32:
7442     case OP_PARSE_FIXED64:
7443     case OP_PARSE_FIXED32:
7444     case OP_PARSE_BOOL:
7445     case OP_PARSE_UINT32:
7446     case OP_PARSE_SFIXED32:
7447     case OP_PARSE_SFIXED64:
7448     case OP_PARSE_SINT32:
7449     case OP_PARSE_SINT64:
7450     case OP_STARTSEQ:
7451     case OP_ENDSEQ:
7452     case OP_STARTSUBMSG:
7453     case OP_ENDSUBMSG:
7454     case OP_STARTSTR:
7455     case OP_STRING:
7456     case OP_ENDSTR:
7457     case OP_PUSHTAGDELIM:
7458       put32(c, op | va_arg(ap, upb_selector_t) << 8);
7459       break;
7460     case OP_SETBIGGROUPNUM:
7461       put32(c, op);
7462       put32(c, va_arg(ap, int));
7463       break;
7464     case OP_CALL: {
7465       const upb_pbdecodermethod *method = va_arg(ap, upb_pbdecodermethod *);
7466       put32(c, op | (method->code_base.ofs - (pcofs(c) + 1)) << 8);
7467       break;
7468     }
7469     case OP_CHECKDELIM:
7470     case OP_BRANCH: {
7471       uint32_t instruction = op;
7472       int label = va_arg(ap, int);
7473       setofs(&instruction, labelref(c, label));
7474       put32(c, instruction);
7475       break;
7476     }
7477     case OP_TAG1:
7478     case OP_TAG2: {
7479       int label = va_arg(ap, int);
7480       uint64_t tag = va_arg(ap, uint64_t);
7481       uint32_t instruction = op | (tag << 16);
7482       assert(tag <= 0xffff);
7483       setofs(&instruction, labelref(c, label));
7484       put32(c, instruction);
7485       break;
7486     }
7487     case OP_TAGN: {
7488       int label = va_arg(ap, int);
7489       uint64_t tag = va_arg(ap, uint64_t);
7490       uint32_t instruction = op | (upb_value_size(tag) << 16);
7491       setofs(&instruction, labelref(c, label));
7492       put32(c, instruction);
7493       put32(c, tag);
7494       put32(c, tag >> 32);
7495       break;
7496     }
7497   }
7498 
7499   va_end(ap);
7500 }
7501 
7502 #if defined(UPB_USE_JIT_X64) || defined(UPB_DUMP_BYTECODE)
7503 
upb_pbdecoder_getopname(unsigned int op)7504 const char *upb_pbdecoder_getopname(unsigned int op) {
7505 #define QUOTE(x) #x
7506 #define EXPAND_AND_QUOTE(x) QUOTE(x)
7507 #define OPNAME(x) OP_##x
7508 #define OP(x) case OPNAME(x): return EXPAND_AND_QUOTE(OPNAME(x));
7509 #define T(x) OP(PARSE_##x)
7510   /* Keep in sync with list in decoder.int.h. */
7511   switch ((opcode)op) {
7512     T(DOUBLE) T(FLOAT) T(INT64) T(UINT64) T(INT32) T(FIXED64) T(FIXED32)
7513     T(BOOL) T(UINT32) T(SFIXED32) T(SFIXED64) T(SINT32) T(SINT64)
7514     OP(STARTMSG) OP(ENDMSG) OP(STARTSEQ) OP(ENDSEQ) OP(STARTSUBMSG)
7515     OP(ENDSUBMSG) OP(STARTSTR) OP(STRING) OP(ENDSTR) OP(CALL) OP(RET)
7516     OP(PUSHLENDELIM) OP(PUSHTAGDELIM) OP(SETDELIM) OP(CHECKDELIM)
7517     OP(BRANCH) OP(TAG1) OP(TAG2) OP(TAGN) OP(SETDISPATCH) OP(POP)
7518     OP(SETBIGGROUPNUM) OP(DISPATCH) OP(HALT)
7519   }
7520   return "<unknown op>";
7521 #undef OP
7522 #undef T
7523 }
7524 
7525 #endif
7526 
7527 #ifdef UPB_DUMP_BYTECODE
7528 
dumpbc(uint32_t * p,uint32_t * end,FILE * f)7529 static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) {
7530 
7531   uint32_t *begin = p;
7532 
7533   while (p < end) {
7534     fprintf(f, "%p  %8tx", p, p - begin);
7535     uint32_t instr = *p++;
7536     uint8_t op = getop(instr);
7537     fprintf(f, " %s", upb_pbdecoder_getopname(op));
7538     switch ((opcode)op) {
7539       case OP_SETDISPATCH: {
7540         const upb_inttable *dispatch;
7541         memcpy(&dispatch, p, sizeof(void*));
7542         p += ptr_words;
7543         const upb_pbdecodermethod *method =
7544             (void *)((char *)dispatch -
7545                      offsetof(upb_pbdecodermethod, dispatch));
7546         fprintf(f, " %s", upb_msgdef_fullname(
7547                               upb_handlers_msgdef(method->dest_handlers_)));
7548         break;
7549       }
7550       case OP_DISPATCH:
7551       case OP_STARTMSG:
7552       case OP_ENDMSG:
7553       case OP_PUSHLENDELIM:
7554       case OP_POP:
7555       case OP_SETDELIM:
7556       case OP_HALT:
7557       case OP_RET:
7558         break;
7559       case OP_PARSE_DOUBLE:
7560       case OP_PARSE_FLOAT:
7561       case OP_PARSE_INT64:
7562       case OP_PARSE_UINT64:
7563       case OP_PARSE_INT32:
7564       case OP_PARSE_FIXED64:
7565       case OP_PARSE_FIXED32:
7566       case OP_PARSE_BOOL:
7567       case OP_PARSE_UINT32:
7568       case OP_PARSE_SFIXED32:
7569       case OP_PARSE_SFIXED64:
7570       case OP_PARSE_SINT32:
7571       case OP_PARSE_SINT64:
7572       case OP_STARTSEQ:
7573       case OP_ENDSEQ:
7574       case OP_STARTSUBMSG:
7575       case OP_ENDSUBMSG:
7576       case OP_STARTSTR:
7577       case OP_STRING:
7578       case OP_ENDSTR:
7579       case OP_PUSHTAGDELIM:
7580         fprintf(f, " %d", instr >> 8);
7581         break;
7582       case OP_SETBIGGROUPNUM:
7583         fprintf(f, " %d", *p++);
7584         break;
7585       case OP_CHECKDELIM:
7586       case OP_CALL:
7587       case OP_BRANCH:
7588         fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
7589         break;
7590       case OP_TAG1:
7591       case OP_TAG2: {
7592         fprintf(f, " tag:0x%x", instr >> 16);
7593         if (getofs(instr)) {
7594           fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
7595         }
7596         break;
7597       }
7598       case OP_TAGN: {
7599         uint64_t tag = *p++;
7600         tag |= (uint64_t)*p++ << 32;
7601         fprintf(f, " tag:0x%llx", (long long)tag);
7602         fprintf(f, " n:%d", instr >> 16);
7603         if (getofs(instr)) {
7604           fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
7605         }
7606         break;
7607       }
7608     }
7609     fputs("\n", f);
7610   }
7611 }
7612 
7613 #endif
7614 
get_encoded_tag(const upb_fielddef * f,int wire_type)7615 static uint64_t get_encoded_tag(const upb_fielddef *f, int wire_type) {
7616   uint32_t tag = (upb_fielddef_number(f) << 3) | wire_type;
7617   uint64_t encoded_tag = upb_vencode32(tag);
7618   /* No tag should be greater than 5 bytes. */
7619   assert(encoded_tag <= 0xffffffffff);
7620   return encoded_tag;
7621 }
7622 
putchecktag(compiler * c,const upb_fielddef * f,int wire_type,int dest)7623 static void putchecktag(compiler *c, const upb_fielddef *f,
7624                         int wire_type, int dest) {
7625   uint64_t tag = get_encoded_tag(f, wire_type);
7626   switch (upb_value_size(tag)) {
7627     case 1:
7628       putop(c, OP_TAG1, dest, tag);
7629       break;
7630     case 2:
7631       putop(c, OP_TAG2, dest, tag);
7632       break;
7633     default:
7634       putop(c, OP_TAGN, dest, tag);
7635       break;
7636   }
7637 }
7638 
getsel(const upb_fielddef * f,upb_handlertype_t type)7639 static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) {
7640   upb_selector_t selector;
7641   bool ok = upb_handlers_getselector(f, type, &selector);
7642   UPB_ASSERT_VAR(ok, ok);
7643   return selector;
7644 }
7645 
7646 /* Takes an existing, primary dispatch table entry and repacks it with a
7647  * different alternate wire type.  Called when we are inserting a secondary
7648  * dispatch table entry for an alternate wire type. */
repack(uint64_t dispatch,int new_wt2)7649 static uint64_t repack(uint64_t dispatch, int new_wt2) {
7650   uint64_t ofs;
7651   uint8_t wt1;
7652   uint8_t old_wt2;
7653   upb_pbdecoder_unpackdispatch(dispatch, &ofs, &wt1, &old_wt2);
7654   assert(old_wt2 == NO_WIRE_TYPE);  /* wt2 should not be set yet. */
7655   return upb_pbdecoder_packdispatch(ofs, wt1, new_wt2);
7656 }
7657 
7658 /* Marks the current bytecode position as the dispatch target for this message,
7659  * field, and wire type. */
dispatchtarget(compiler * c,upb_pbdecodermethod * method,const upb_fielddef * f,int wire_type)7660 static void dispatchtarget(compiler *c, upb_pbdecodermethod *method,
7661                            const upb_fielddef *f, int wire_type) {
7662   /* Offset is relative to msg base. */
7663   uint64_t ofs = pcofs(c) - method->code_base.ofs;
7664   uint32_t fn = upb_fielddef_number(f);
7665   upb_inttable *d = &method->dispatch;
7666   upb_value v;
7667   if (upb_inttable_remove(d, fn, &v)) {
7668     /* TODO: prioritize based on packed setting in .proto file. */
7669     uint64_t repacked = repack(upb_value_getuint64(v), wire_type);
7670     upb_inttable_insert(d, fn, upb_value_uint64(repacked));
7671     upb_inttable_insert(d, fn + UPB_MAX_FIELDNUMBER, upb_value_uint64(ofs));
7672   } else {
7673     uint64_t val = upb_pbdecoder_packdispatch(ofs, wire_type, NO_WIRE_TYPE);
7674     upb_inttable_insert(d, fn, upb_value_uint64(val));
7675   }
7676 }
7677 
putpush(compiler * c,const upb_fielddef * f)7678 static void putpush(compiler *c, const upb_fielddef *f) {
7679   if (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE) {
7680     putop(c, OP_PUSHLENDELIM);
7681   } else {
7682     uint32_t fn = upb_fielddef_number(f);
7683     if (fn >= 1 << 24) {
7684       putop(c, OP_PUSHTAGDELIM, 0);
7685       putop(c, OP_SETBIGGROUPNUM, fn);
7686     } else {
7687       putop(c, OP_PUSHTAGDELIM, fn);
7688     }
7689   }
7690 }
7691 
find_submethod(const compiler * c,const upb_pbdecodermethod * method,const upb_fielddef * f)7692 static upb_pbdecodermethod *find_submethod(const compiler *c,
7693                                            const upb_pbdecodermethod *method,
7694                                            const upb_fielddef *f) {
7695   const upb_handlers *sub =
7696       upb_handlers_getsubhandlers(method->dest_handlers_, f);
7697   upb_value v;
7698   return upb_inttable_lookupptr(&c->group->methods, sub, &v)
7699              ? upb_value_getptr(v)
7700              : NULL;
7701 }
7702 
putsel(compiler * c,opcode op,upb_selector_t sel,const upb_handlers * h)7703 static void putsel(compiler *c, opcode op, upb_selector_t sel,
7704                    const upb_handlers *h) {
7705   if (upb_handlers_gethandler(h, sel)) {
7706     putop(c, op, sel);
7707   }
7708 }
7709 
7710 /* Puts an opcode to call a callback, but only if a callback actually exists for
7711  * this field and handler type. */
maybeput(compiler * c,opcode op,const upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)7712 static void maybeput(compiler *c, opcode op, const upb_handlers *h,
7713                      const upb_fielddef *f, upb_handlertype_t type) {
7714   putsel(c, op, getsel(f, type), h);
7715 }
7716 
haslazyhandlers(const upb_handlers * h,const upb_fielddef * f)7717 static bool haslazyhandlers(const upb_handlers *h, const upb_fielddef *f) {
7718   if (!upb_fielddef_lazy(f))
7719     return false;
7720 
7721   return upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STARTSTR)) ||
7722          upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STRING)) ||
7723          upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_ENDSTR));
7724 }
7725 
7726 
7727 /* bytecode compiler code generation ******************************************/
7728 
7729 /* Symbolic names for our local labels. */
7730 #define LABEL_LOOPSTART 1  /* Top of a repeated field loop. */
7731 #define LABEL_LOOPBREAK 2  /* To jump out of a repeated loop */
7732 #define LABEL_FIELD     3  /* Jump backward to find the most recent field. */
7733 #define LABEL_ENDMSG    4  /* To reach the OP_ENDMSG instr for this msg. */
7734 
7735 /* Generates bytecode to parse a single non-lazy message field. */
generate_msgfield(compiler * c,const upb_fielddef * f,upb_pbdecodermethod * method)7736 static void generate_msgfield(compiler *c, const upb_fielddef *f,
7737                               upb_pbdecodermethod *method) {
7738   const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
7739   const upb_pbdecodermethod *sub_m = find_submethod(c, method, f);
7740   int wire_type;
7741 
7742   if (!sub_m) {
7743     /* Don't emit any code for this field at all; it will be parsed as an
7744      * unknown field.
7745      *
7746      * TODO(haberman): we should change this to parse it as a string field
7747      * instead.  It will probably be faster, but more importantly, once we
7748      * start vending unknown fields, a field shouldn't be treated as unknown
7749      * just because it doesn't have subhandlers registered. */
7750     return;
7751   }
7752 
7753   label(c, LABEL_FIELD);
7754 
7755   wire_type =
7756       (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE)
7757           ? UPB_WIRE_TYPE_DELIMITED
7758           : UPB_WIRE_TYPE_START_GROUP;
7759 
7760   if (upb_fielddef_isseq(f)) {
7761     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7762     putchecktag(c, f, wire_type, LABEL_DISPATCH);
7763    dispatchtarget(c, method, f, wire_type);
7764     putop(c, OP_PUSHTAGDELIM, 0);
7765     putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
7766    label(c, LABEL_LOOPSTART);
7767     putpush(c, f);
7768     putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
7769     putop(c, OP_CALL, sub_m);
7770     putop(c, OP_POP);
7771     maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
7772     if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
7773       putop(c, OP_SETDELIM);
7774     }
7775     putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
7776     putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
7777     putop(c, OP_BRANCH, -LABEL_LOOPSTART);
7778    label(c, LABEL_LOOPBREAK);
7779     putop(c, OP_POP);
7780     maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
7781   } else {
7782     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7783     putchecktag(c, f, wire_type, LABEL_DISPATCH);
7784    dispatchtarget(c, method, f, wire_type);
7785     putpush(c, f);
7786     putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
7787     putop(c, OP_CALL, sub_m);
7788     putop(c, OP_POP);
7789     maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
7790     if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
7791       putop(c, OP_SETDELIM);
7792     }
7793   }
7794 }
7795 
7796 /* Generates bytecode to parse a single string or lazy submessage field. */
generate_delimfield(compiler * c,const upb_fielddef * f,upb_pbdecodermethod * method)7797 static void generate_delimfield(compiler *c, const upb_fielddef *f,
7798                                 upb_pbdecodermethod *method) {
7799   const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
7800 
7801   label(c, LABEL_FIELD);
7802   if (upb_fielddef_isseq(f)) {
7803     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7804     putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
7805    dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
7806     putop(c, OP_PUSHTAGDELIM, 0);
7807     putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
7808    label(c, LABEL_LOOPSTART);
7809     putop(c, OP_PUSHLENDELIM);
7810     putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
7811     /* Need to emit even if no handler to skip past the string. */
7812     putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
7813     putop(c, OP_POP);
7814     maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
7815     putop(c, OP_SETDELIM);
7816     putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
7817     putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_LOOPBREAK);
7818     putop(c, OP_BRANCH, -LABEL_LOOPSTART);
7819    label(c, LABEL_LOOPBREAK);
7820     putop(c, OP_POP);
7821     maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
7822   } else {
7823     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7824     putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
7825    dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
7826     putop(c, OP_PUSHLENDELIM);
7827     putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
7828     putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
7829     putop(c, OP_POP);
7830     maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
7831     putop(c, OP_SETDELIM);
7832   }
7833 }
7834 
7835 /* Generates bytecode to parse a single primitive field. */
generate_primitivefield(compiler * c,const upb_fielddef * f,upb_pbdecodermethod * method)7836 static void generate_primitivefield(compiler *c, const upb_fielddef *f,
7837                                     upb_pbdecodermethod *method) {
7838   const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
7839   upb_descriptortype_t descriptor_type = upb_fielddef_descriptortype(f);
7840   opcode parse_type;
7841   upb_selector_t sel;
7842   int wire_type;
7843 
7844   label(c, LABEL_FIELD);
7845 
7846   /* From a decoding perspective, ENUM is the same as INT32. */
7847   if (descriptor_type == UPB_DESCRIPTOR_TYPE_ENUM)
7848     descriptor_type = UPB_DESCRIPTOR_TYPE_INT32;
7849 
7850   parse_type = (opcode)descriptor_type;
7851 
7852   /* TODO(haberman): generate packed or non-packed first depending on "packed"
7853    * setting in the fielddef.  This will favor (in speed) whichever was
7854    * specified. */
7855 
7856   assert((int)parse_type >= 0 && parse_type <= OP_MAX);
7857   sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
7858   wire_type = upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
7859   if (upb_fielddef_isseq(f)) {
7860     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7861     putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
7862    dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
7863     putop(c, OP_PUSHLENDELIM);
7864     putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));  /* Packed */
7865    label(c, LABEL_LOOPSTART);
7866     putop(c, parse_type, sel);
7867     putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
7868     putop(c, OP_BRANCH, -LABEL_LOOPSTART);
7869    dispatchtarget(c, method, f, wire_type);
7870     putop(c, OP_PUSHTAGDELIM, 0);
7871     putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));  /* Non-packed */
7872    label(c, LABEL_LOOPSTART);
7873     putop(c, parse_type, sel);
7874     putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
7875     putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
7876     putop(c, OP_BRANCH, -LABEL_LOOPSTART);
7877    label(c, LABEL_LOOPBREAK);
7878     putop(c, OP_POP);  /* Packed and non-packed join. */
7879     maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
7880     putop(c, OP_SETDELIM);  /* Could remove for non-packed by dup ENDSEQ. */
7881   } else {
7882     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7883     putchecktag(c, f, wire_type, LABEL_DISPATCH);
7884    dispatchtarget(c, method, f, wire_type);
7885     putop(c, parse_type, sel);
7886   }
7887 }
7888 
7889 /* Adds bytecode for parsing the given message to the given decoderplan,
7890  * while adding all dispatch targets to this message's dispatch table. */
compile_method(compiler * c,upb_pbdecodermethod * method)7891 static void compile_method(compiler *c, upb_pbdecodermethod *method) {
7892   const upb_handlers *h;
7893   const upb_msgdef *md;
7894   uint32_t* start_pc;
7895   upb_msg_field_iter i;
7896   upb_value val;
7897 
7898   assert(method);
7899 
7900   /* Clear all entries in the dispatch table. */
7901   upb_inttable_uninit(&method->dispatch);
7902   upb_inttable_init(&method->dispatch, UPB_CTYPE_UINT64);
7903 
7904   h = upb_pbdecodermethod_desthandlers(method);
7905   md = upb_handlers_msgdef(h);
7906 
7907  method->code_base.ofs = pcofs(c);
7908   putop(c, OP_SETDISPATCH, &method->dispatch);
7909   putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h);
7910  label(c, LABEL_FIELD);
7911   start_pc = c->pc;
7912   for(upb_msg_field_begin(&i, md);
7913       !upb_msg_field_done(&i);
7914       upb_msg_field_next(&i)) {
7915     const upb_fielddef *f = upb_msg_iter_field(&i);
7916     upb_fieldtype_t type = upb_fielddef_type(f);
7917 
7918     if (type == UPB_TYPE_MESSAGE && !(haslazyhandlers(h, f) && c->lazy)) {
7919       generate_msgfield(c, f, method);
7920     } else if (type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES ||
7921                type == UPB_TYPE_MESSAGE) {
7922       generate_delimfield(c, f, method);
7923     } else {
7924       generate_primitivefield(c, f, method);
7925     }
7926   }
7927 
7928   /* If there were no fields, or if no handlers were defined, we need to
7929    * generate a non-empty loop body so that we can at least dispatch for unknown
7930    * fields and check for the end of the message. */
7931   if (c->pc == start_pc) {
7932     /* Check for end-of-message. */
7933     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7934     /* Unconditionally dispatch. */
7935     putop(c, OP_DISPATCH, 0);
7936   }
7937 
7938   /* For now we just loop back to the last field of the message (or if none,
7939    * the DISPATCH opcode for the message). */
7940   putop(c, OP_BRANCH, -LABEL_FIELD);
7941 
7942   /* Insert both a label and a dispatch table entry for this end-of-msg. */
7943  label(c, LABEL_ENDMSG);
7944   val = upb_value_uint64(pcofs(c) - method->code_base.ofs);
7945   upb_inttable_insert(&method->dispatch, DISPATCH_ENDMSG, val);
7946 
7947   putsel(c, OP_ENDMSG, UPB_ENDMSG_SELECTOR, h);
7948   putop(c, OP_RET);
7949 
7950   upb_inttable_compact(&method->dispatch);
7951 }
7952 
7953 /* Populate "methods" with new upb_pbdecodermethod objects reachable from "h".
7954  * Returns the method for these handlers.
7955  *
7956  * Generates a new method for every destination handlers reachable from "h". */
find_methods(compiler * c,const upb_handlers * h)7957 static void find_methods(compiler *c, const upb_handlers *h) {
7958   upb_value v;
7959   upb_msg_field_iter i;
7960   const upb_msgdef *md;
7961 
7962   if (upb_inttable_lookupptr(&c->group->methods, h, &v))
7963     return;
7964   newmethod(h, c->group);
7965 
7966   /* Find submethods. */
7967   md = upb_handlers_msgdef(h);
7968   for(upb_msg_field_begin(&i, md);
7969       !upb_msg_field_done(&i);
7970       upb_msg_field_next(&i)) {
7971     const upb_fielddef *f = upb_msg_iter_field(&i);
7972     const upb_handlers *sub_h;
7973     if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
7974         (sub_h = upb_handlers_getsubhandlers(h, f)) != NULL) {
7975       /* We only generate a decoder method for submessages with handlers.
7976        * Others will be parsed as unknown fields. */
7977       find_methods(c, sub_h);
7978     }
7979   }
7980 }
7981 
7982 /* (Re-)compile bytecode for all messages in "msgs."
7983  * Overwrites any existing bytecode in "c". */
compile_methods(compiler * c)7984 static void compile_methods(compiler *c) {
7985   upb_inttable_iter i;
7986 
7987   /* Start over at the beginning of the bytecode. */
7988   c->pc = c->group->bytecode;
7989 
7990   upb_inttable_begin(&i, &c->group->methods);
7991   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
7992     upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
7993     compile_method(c, method);
7994   }
7995 }
7996 
set_bytecode_handlers(mgroup * g)7997 static void set_bytecode_handlers(mgroup *g) {
7998   upb_inttable_iter i;
7999   upb_inttable_begin(&i, &g->methods);
8000   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
8001     upb_pbdecodermethod *m = upb_value_getptr(upb_inttable_iter_value(&i));
8002     upb_byteshandler *h = &m->input_handler_;
8003 
8004     m->code_base.ptr = g->bytecode + m->code_base.ofs;
8005 
8006     upb_byteshandler_setstartstr(h, upb_pbdecoder_startbc, m->code_base.ptr);
8007     upb_byteshandler_setstring(h, upb_pbdecoder_decode, g);
8008     upb_byteshandler_setendstr(h, upb_pbdecoder_end, m);
8009   }
8010 }
8011 
8012 
8013 /* JIT setup. *****************************************************************/
8014 
8015 #ifdef UPB_USE_JIT_X64
8016 
sethandlers(mgroup * g,bool allowjit)8017 static void sethandlers(mgroup *g, bool allowjit) {
8018   g->jit_code = NULL;
8019   if (allowjit) {
8020     /* Compile byte-code into machine code, create handlers. */
8021     upb_pbdecoder_jit(g);
8022   } else {
8023     set_bytecode_handlers(g);
8024   }
8025 }
8026 
8027 #else  /* UPB_USE_JIT_X64 */
8028 
sethandlers(mgroup * g,bool allowjit)8029 static void sethandlers(mgroup *g, bool allowjit) {
8030   /* No JIT compiled in; use bytecode handlers unconditionally. */
8031   UPB_UNUSED(allowjit);
8032   set_bytecode_handlers(g);
8033 }
8034 
8035 #endif  /* UPB_USE_JIT_X64 */
8036 
8037 
8038 /* TODO(haberman): allow this to be constructed for an arbitrary set of dest
8039  * handlers and other mgroups (but verify we have a transitive closure). */
mgroup_new(const upb_handlers * dest,bool allowjit,bool lazy,const void * owner)8040 const mgroup *mgroup_new(const upb_handlers *dest, bool allowjit, bool lazy,
8041                          const void *owner) {
8042   mgroup *g;
8043   compiler *c;
8044 
8045   UPB_UNUSED(allowjit);
8046   assert(upb_handlers_isfrozen(dest));
8047 
8048   g = newgroup(owner);
8049   c = newcompiler(g, lazy);
8050   find_methods(c, dest);
8051 
8052   /* We compile in two passes:
8053    * 1. all messages are assigned relative offsets from the beginning of the
8054    *    bytecode (saved in method->code_base).
8055    * 2. forwards OP_CALL instructions can be correctly linked since message
8056    *    offsets have been previously assigned.
8057    *
8058    * Could avoid the second pass by linking OP_CALL instructions somehow. */
8059   compile_methods(c);
8060   compile_methods(c);
8061   g->bytecode_end = c->pc;
8062   freecompiler(c);
8063 
8064 #ifdef UPB_DUMP_BYTECODE
8065   {
8066     FILE *f = fopen("/tmp/upb-bytecode", "w");
8067     assert(f);
8068     dumpbc(g->bytecode, g->bytecode_end, stderr);
8069     dumpbc(g->bytecode, g->bytecode_end, f);
8070     fclose(f);
8071 
8072     f = fopen("/tmp/upb-bytecode.bin", "wb");
8073     assert(f);
8074     fwrite(g->bytecode, 1, g->bytecode_end - g->bytecode, f);
8075     fclose(f);
8076   }
8077 #endif
8078 
8079   sethandlers(g, allowjit);
8080   return g;
8081 }
8082 
8083 
8084 /* upb_pbcodecache ************************************************************/
8085 
upb_pbcodecache_init(upb_pbcodecache * c)8086 void upb_pbcodecache_init(upb_pbcodecache *c) {
8087   upb_inttable_init(&c->groups, UPB_CTYPE_CONSTPTR);
8088   c->allow_jit_ = true;
8089 }
8090 
upb_pbcodecache_uninit(upb_pbcodecache * c)8091 void upb_pbcodecache_uninit(upb_pbcodecache *c) {
8092   upb_inttable_iter i;
8093   upb_inttable_begin(&i, &c->groups);
8094   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
8095     const mgroup *group = upb_value_getconstptr(upb_inttable_iter_value(&i));
8096     mgroup_unref(group, c);
8097   }
8098   upb_inttable_uninit(&c->groups);
8099 }
8100 
upb_pbcodecache_allowjit(const upb_pbcodecache * c)8101 bool upb_pbcodecache_allowjit(const upb_pbcodecache *c) {
8102   return c->allow_jit_;
8103 }
8104 
upb_pbcodecache_setallowjit(upb_pbcodecache * c,bool allow)8105 bool upb_pbcodecache_setallowjit(upb_pbcodecache *c, bool allow) {
8106   if (upb_inttable_count(&c->groups) > 0)
8107     return false;
8108   c->allow_jit_ = allow;
8109   return true;
8110 }
8111 
upb_pbcodecache_getdecodermethod(upb_pbcodecache * c,const upb_pbdecodermethodopts * opts)8112 const upb_pbdecodermethod *upb_pbcodecache_getdecodermethod(
8113     upb_pbcodecache *c, const upb_pbdecodermethodopts *opts) {
8114   upb_value v;
8115   bool ok;
8116 
8117   /* Right now we build a new DecoderMethod every time.
8118    * TODO(haberman): properly cache methods by their true key. */
8119   const mgroup *g = mgroup_new(opts->handlers, c->allow_jit_, opts->lazy, c);
8120   upb_inttable_push(&c->groups, upb_value_constptr(g));
8121 
8122   ok = upb_inttable_lookupptr(&g->methods, opts->handlers, &v);
8123   UPB_ASSERT_VAR(ok, ok);
8124   return upb_value_getptr(v);
8125 }
8126 
8127 
8128 /* upb_pbdecodermethodopts ****************************************************/
8129 
upb_pbdecodermethodopts_init(upb_pbdecodermethodopts * opts,const upb_handlers * h)8130 void upb_pbdecodermethodopts_init(upb_pbdecodermethodopts *opts,
8131                                   const upb_handlers *h) {
8132   opts->handlers = h;
8133   opts->lazy = false;
8134 }
8135 
upb_pbdecodermethodopts_setlazy(upb_pbdecodermethodopts * opts,bool lazy)8136 void upb_pbdecodermethodopts_setlazy(upb_pbdecodermethodopts *opts, bool lazy) {
8137   opts->lazy = lazy;
8138 }
8139 /*
8140 ** upb::Decoder (Bytecode Decoder VM)
8141 **
8142 ** Bytecode must previously have been generated using the bytecode compiler in
8143 ** compile_decoder.c.  This decoder then walks through the bytecode op-by-op to
8144 ** parse the input.
8145 **
8146 ** Decoding is fully resumable; we just keep a pointer to the current bytecode
8147 ** instruction and resume from there.  A fair amount of the logic here is to
8148 ** handle the fact that values can span buffer seams and we have to be able to
8149 ** be capable of suspending/resuming from any byte in the stream.  This
8150 ** sometimes requires keeping a few trailing bytes from the last buffer around
8151 ** in the "residual" buffer.
8152 */
8153 
8154 #include <inttypes.h>
8155 #include <stddef.h>
8156 
8157 #ifdef UPB_DUMP_BYTECODE
8158 #include <stdio.h>
8159 #endif
8160 
8161 #define CHECK_SUSPEND(x) if (!(x)) return upb_pbdecoder_suspend(d);
8162 
8163 /* Error messages that are shared between the bytecode and JIT decoders. */
8164 const char *kPbDecoderStackOverflow = "Nesting too deep.";
8165 const char *kPbDecoderSubmessageTooLong =
8166     "Submessage end extends past enclosing submessage.";
8167 
8168 /* Error messages shared within this file. */
8169 static const char *kUnterminatedVarint = "Unterminated varint.";
8170 
8171 /* upb_pbdecoder **************************************************************/
8172 
8173 static opcode halt = OP_HALT;
8174 
8175 /* A dummy character we can point to when the user passes us a NULL buffer.
8176  * We need this because in C (NULL + 0) and (NULL - NULL) are undefined
8177  * behavior, which would invalidate functions like curbufleft(). */
8178 static const char dummy_char;
8179 
8180 /* Whether an op consumes any of the input buffer. */
consumes_input(opcode op)8181 static bool consumes_input(opcode op) {
8182   switch (op) {
8183     case OP_SETDISPATCH:
8184     case OP_STARTMSG:
8185     case OP_ENDMSG:
8186     case OP_STARTSEQ:
8187     case OP_ENDSEQ:
8188     case OP_STARTSUBMSG:
8189     case OP_ENDSUBMSG:
8190     case OP_STARTSTR:
8191     case OP_ENDSTR:
8192     case OP_PUSHTAGDELIM:
8193     case OP_POP:
8194     case OP_SETDELIM:
8195     case OP_SETBIGGROUPNUM:
8196     case OP_CHECKDELIM:
8197     case OP_CALL:
8198     case OP_RET:
8199     case OP_BRANCH:
8200       return false;
8201     default:
8202       return true;
8203   }
8204 }
8205 
stacksize(upb_pbdecoder * d,size_t entries)8206 static size_t stacksize(upb_pbdecoder *d, size_t entries) {
8207   UPB_UNUSED(d);
8208   return entries * sizeof(upb_pbdecoder_frame);
8209 }
8210 
callstacksize(upb_pbdecoder * d,size_t entries)8211 static size_t callstacksize(upb_pbdecoder *d, size_t entries) {
8212   UPB_UNUSED(d);
8213 
8214 #ifdef UPB_USE_JIT_X64
8215   if (d->method_->is_native_) {
8216     /* Each native stack frame needs two pointers, plus we need a few frames for
8217      * the enter/exit trampolines. */
8218     size_t ret = entries * sizeof(void*) * 2;
8219     ret += sizeof(void*) * 10;
8220     return ret;
8221   }
8222 #endif
8223 
8224   return entries * sizeof(uint32_t*);
8225 }
8226 
8227 
8228 static bool in_residual_buf(const upb_pbdecoder *d, const char *p);
8229 
8230 /* It's unfortunate that we have to micro-manage the compiler with
8231  * UPB_FORCEINLINE and UPB_NOINLINE, especially since this tuning is necessarily
8232  * specific to one hardware configuration.  But empirically on a Core i7,
8233  * performance increases 30-50% with these annotations.  Every instance where
8234  * these appear, gcc 4.2.1 made the wrong decision and degraded performance in
8235  * benchmarks. */
8236 
seterr(upb_pbdecoder * d,const char * msg)8237 static void seterr(upb_pbdecoder *d, const char *msg) {
8238   upb_status status = UPB_STATUS_INIT;
8239   upb_status_seterrmsg(&status, msg);
8240   upb_env_reporterror(d->env, &status);
8241 }
8242 
upb_pbdecoder_seterr(upb_pbdecoder * d,const char * msg)8243 void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg) {
8244   seterr(d, msg);
8245 }
8246 
8247 
8248 /* Buffering ******************************************************************/
8249 
8250 /* We operate on one buffer at a time, which is either the user's buffer passed
8251  * to our "decode" callback or some residual bytes from the previous buffer. */
8252 
8253 /* How many bytes can be safely read from d->ptr without reading past end-of-buf
8254  * or past the current delimited end. */
curbufleft(const upb_pbdecoder * d)8255 static size_t curbufleft(const upb_pbdecoder *d) {
8256   assert(d->data_end >= d->ptr);
8257   return d->data_end - d->ptr;
8258 }
8259 
8260 /* How many bytes are available before end-of-buffer. */
bufleft(const upb_pbdecoder * d)8261 static size_t bufleft(const upb_pbdecoder *d) {
8262   return d->end - d->ptr;
8263 }
8264 
8265 /* Overall stream offset of d->ptr. */
offset(const upb_pbdecoder * d)8266 uint64_t offset(const upb_pbdecoder *d) {
8267   return d->bufstart_ofs + (d->ptr - d->buf);
8268 }
8269 
8270 /* How many bytes are available before the end of this delimited region. */
delim_remaining(const upb_pbdecoder * d)8271 size_t delim_remaining(const upb_pbdecoder *d) {
8272   return d->top->end_ofs - offset(d);
8273 }
8274 
8275 /* Advances d->ptr. */
advance(upb_pbdecoder * d,size_t len)8276 static void advance(upb_pbdecoder *d, size_t len) {
8277   assert(curbufleft(d) >= len);
8278   d->ptr += len;
8279 }
8280 
in_buf(const char * p,const char * buf,const char * end)8281 static bool in_buf(const char *p, const char *buf, const char *end) {
8282   return p >= buf && p <= end;
8283 }
8284 
in_residual_buf(const upb_pbdecoder * d,const char * p)8285 static bool in_residual_buf(const upb_pbdecoder *d, const char *p) {
8286   return in_buf(p, d->residual, d->residual_end);
8287 }
8288 
8289 /* Calculates the delim_end value, which is affected by both the current buffer
8290  * and the parsing stack, so must be called whenever either is updated. */
set_delim_end(upb_pbdecoder * d)8291 static void set_delim_end(upb_pbdecoder *d) {
8292   size_t delim_ofs = d->top->end_ofs - d->bufstart_ofs;
8293   if (delim_ofs <= (size_t)(d->end - d->buf)) {
8294     d->delim_end = d->buf + delim_ofs;
8295     d->data_end = d->delim_end;
8296   } else {
8297     d->data_end = d->end;
8298     d->delim_end = NULL;
8299   }
8300 }
8301 
switchtobuf(upb_pbdecoder * d,const char * buf,const char * end)8302 static void switchtobuf(upb_pbdecoder *d, const char *buf, const char *end) {
8303   d->ptr = buf;
8304   d->buf = buf;
8305   d->end = end;
8306   set_delim_end(d);
8307 }
8308 
advancetobuf(upb_pbdecoder * d,const char * buf,size_t len)8309 static void advancetobuf(upb_pbdecoder *d, const char *buf, size_t len) {
8310   assert(curbufleft(d) == 0);
8311   d->bufstart_ofs += (d->end - d->buf);
8312   switchtobuf(d, buf, buf + len);
8313 }
8314 
checkpoint(upb_pbdecoder * d)8315 static void checkpoint(upb_pbdecoder *d) {
8316   /* The assertion here is in the interests of efficiency, not correctness.
8317    * We are trying to ensure that we don't checkpoint() more often than
8318    * necessary. */
8319   assert(d->checkpoint != d->ptr);
8320   d->checkpoint = d->ptr;
8321 }
8322 
8323 /* Skips "bytes" bytes in the stream, which may be more than available.  If we
8324  * skip more bytes than are available, we return a long read count to the caller
8325  * indicating how many bytes can be skipped over before passing actual data
8326  * again.  Skipped bytes can pass a NULL buffer and the decoder guarantees they
8327  * won't actually be read.
8328  */
skip(upb_pbdecoder * d,size_t bytes)8329 static int32_t skip(upb_pbdecoder *d, size_t bytes) {
8330   assert(!in_residual_buf(d, d->ptr) || d->size_param == 0);
8331   assert(d->skip == 0);
8332   if (bytes > delim_remaining(d)) {
8333     seterr(d, "Skipped value extended beyond enclosing submessage.");
8334     return upb_pbdecoder_suspend(d);
8335   } else if (bufleft(d) >= bytes) {
8336     /* Skipped data is all in current buffer, and more is still available. */
8337     advance(d, bytes);
8338     d->skip = 0;
8339     return DECODE_OK;
8340   } else {
8341     /* Skipped data extends beyond currently available buffers. */
8342     d->pc = d->last;
8343     d->skip = bytes - curbufleft(d);
8344     d->bufstart_ofs += (d->end - d->buf);
8345     d->residual_end = d->residual;
8346     switchtobuf(d, d->residual, d->residual_end);
8347     return d->size_param + d->skip;
8348   }
8349 }
8350 
8351 
8352 /* Resumes the decoder from an initial state or from a previous suspend. */
upb_pbdecoder_resume(upb_pbdecoder * d,void * p,const char * buf,size_t size,const upb_bufhandle * handle)8353 int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
8354                              size_t size, const upb_bufhandle *handle) {
8355   UPB_UNUSED(p);  /* Useless; just for the benefit of the JIT. */
8356 
8357   /* d->skip and d->residual_end could probably elegantly be represented
8358    * as a single variable, to more easily represent this invariant. */
8359   assert(!(d->skip && d->residual_end > d->residual));
8360 
8361   /* We need to remember the original size_param, so that the value we return
8362    * is relative to it, even if we do some skipping first. */
8363   d->size_param = size;
8364   d->handle = handle;
8365 
8366   /* Have to handle this case specially (ie. not with skip()) because the user
8367    * is allowed to pass a NULL buffer here, which won't allow us to safely
8368    * calculate a d->end or use our normal functions like curbufleft(). */
8369   if (d->skip && d->skip >= size) {
8370     d->skip -= size;
8371     d->bufstart_ofs += size;
8372     buf = &dummy_char;
8373     size = 0;
8374 
8375     /* We can't just return now, because we might need to execute some ops
8376      * like CHECKDELIM, which could call some callbacks and pop the stack. */
8377   }
8378 
8379   /* We need to pretend that this was the actual buffer param, since some of the
8380    * calculations assume that d->ptr/d->buf is relative to this. */
8381   d->buf_param = buf;
8382 
8383   if (!buf) {
8384     /* NULL buf is ok if its entire span is covered by the "skip" above, but
8385      * by this point we know that "skip" doesn't cover the buffer. */
8386     seterr(d, "Passed NULL buffer over non-skippable region.");
8387     return upb_pbdecoder_suspend(d);
8388   }
8389 
8390   if (d->residual_end > d->residual) {
8391     /* We have residual bytes from the last buffer. */
8392     assert(d->ptr == d->residual);
8393   } else {
8394     switchtobuf(d, buf, buf + size);
8395   }
8396 
8397   d->checkpoint = d->ptr;
8398 
8399   /* Handle skips that don't cover the whole buffer (as above). */
8400   if (d->skip) {
8401     size_t skip_bytes = d->skip;
8402     d->skip = 0;
8403     CHECK_RETURN(skip(d, skip_bytes));
8404     checkpoint(d);
8405   }
8406 
8407   /* If we're inside an unknown group, continue to parse unknown values. */
8408   if (d->top->groupnum < 0) {
8409     CHECK_RETURN(upb_pbdecoder_skipunknown(d, -1, 0));
8410     checkpoint(d);
8411   }
8412 
8413   return DECODE_OK;
8414 }
8415 
8416 /* Suspends the decoder at the last checkpoint, without saving any residual
8417  * bytes.  If there are any unconsumed bytes, returns a short byte count. */
upb_pbdecoder_suspend(upb_pbdecoder * d)8418 size_t upb_pbdecoder_suspend(upb_pbdecoder *d) {
8419   d->pc = d->last;
8420   if (d->checkpoint == d->residual) {
8421     /* Checkpoint was in residual buf; no user bytes were consumed. */
8422     d->ptr = d->residual;
8423     return 0;
8424   } else {
8425     size_t ret = d->size_param - (d->end - d->checkpoint);
8426     assert(!in_residual_buf(d, d->checkpoint));
8427     assert(d->buf == d->buf_param || d->buf == &dummy_char);
8428 
8429     d->bufstart_ofs += (d->checkpoint - d->buf);
8430     d->residual_end = d->residual;
8431     switchtobuf(d, d->residual, d->residual_end);
8432     return ret;
8433   }
8434 }
8435 
8436 /* Suspends the decoder at the last checkpoint, and saves any unconsumed
8437  * bytes in our residual buffer.  This is necessary if we need more user
8438  * bytes to form a complete value, which might not be contiguous in the
8439  * user's buffers.  Always consumes all user bytes. */
suspend_save(upb_pbdecoder * d)8440 static size_t suspend_save(upb_pbdecoder *d) {
8441   /* We hit end-of-buffer before we could parse a full value.
8442    * Save any unconsumed bytes (if any) to the residual buffer. */
8443   d->pc = d->last;
8444 
8445   if (d->checkpoint == d->residual) {
8446     /* Checkpoint was in residual buf; append user byte(s) to residual buf. */
8447     assert((d->residual_end - d->residual) + d->size_param <=
8448            sizeof(d->residual));
8449     if (!in_residual_buf(d, d->ptr)) {
8450       d->bufstart_ofs -= (d->residual_end - d->residual);
8451     }
8452     memcpy(d->residual_end, d->buf_param, d->size_param);
8453     d->residual_end += d->size_param;
8454   } else {
8455     /* Checkpoint was in user buf; old residual bytes not needed. */
8456     size_t save;
8457     assert(!in_residual_buf(d, d->checkpoint));
8458 
8459     d->ptr = d->checkpoint;
8460     save = curbufleft(d);
8461     assert(save <= sizeof(d->residual));
8462     memcpy(d->residual, d->ptr, save);
8463     d->residual_end = d->residual + save;
8464     d->bufstart_ofs = offset(d);
8465   }
8466 
8467   switchtobuf(d, d->residual, d->residual_end);
8468   return d->size_param;
8469 }
8470 
8471 /* Copies the next "bytes" bytes into "buf" and advances the stream.
8472  * Requires that this many bytes are available in the current buffer. */
consumebytes(upb_pbdecoder * d,void * buf,size_t bytes)8473 UPB_FORCEINLINE static void consumebytes(upb_pbdecoder *d, void *buf,
8474                                          size_t bytes) {
8475   assert(bytes <= curbufleft(d));
8476   memcpy(buf, d->ptr, bytes);
8477   advance(d, bytes);
8478 }
8479 
8480 /* Slow path for getting the next "bytes" bytes, regardless of whether they are
8481  * available in the current buffer or not.  Returns a status code as described
8482  * in decoder.int.h. */
getbytes_slow(upb_pbdecoder * d,void * buf,size_t bytes)8483 UPB_NOINLINE static int32_t getbytes_slow(upb_pbdecoder *d, void *buf,
8484                                           size_t bytes) {
8485   const size_t avail = curbufleft(d);
8486   consumebytes(d, buf, avail);
8487   bytes -= avail;
8488   assert(bytes > 0);
8489   if (in_residual_buf(d, d->ptr)) {
8490     advancetobuf(d, d->buf_param, d->size_param);
8491   }
8492   if (curbufleft(d) >= bytes) {
8493     consumebytes(d, (char *)buf + avail, bytes);
8494     return DECODE_OK;
8495   } else if (d->data_end == d->delim_end) {
8496     seterr(d, "Submessage ended in the middle of a value or group");
8497     return upb_pbdecoder_suspend(d);
8498   } else {
8499     return suspend_save(d);
8500   }
8501 }
8502 
8503 /* Gets the next "bytes" bytes, regardless of whether they are available in the
8504  * current buffer or not.  Returns a status code as described in decoder.int.h.
8505  */
getbytes(upb_pbdecoder * d,void * buf,size_t bytes)8506 UPB_FORCEINLINE static int32_t getbytes(upb_pbdecoder *d, void *buf,
8507                                         size_t bytes) {
8508   if (curbufleft(d) >= bytes) {
8509     /* Buffer has enough data to satisfy. */
8510     consumebytes(d, buf, bytes);
8511     return DECODE_OK;
8512   } else {
8513     return getbytes_slow(d, buf, bytes);
8514   }
8515 }
8516 
peekbytes_slow(upb_pbdecoder * d,void * buf,size_t bytes)8517 UPB_NOINLINE static size_t peekbytes_slow(upb_pbdecoder *d, void *buf,
8518                                           size_t bytes) {
8519   size_t ret = curbufleft(d);
8520   memcpy(buf, d->ptr, ret);
8521   if (in_residual_buf(d, d->ptr)) {
8522     size_t copy = UPB_MIN(bytes - ret, d->size_param);
8523     memcpy((char *)buf + ret, d->buf_param, copy);
8524     ret += copy;
8525   }
8526   return ret;
8527 }
8528 
peekbytes(upb_pbdecoder * d,void * buf,size_t bytes)8529 UPB_FORCEINLINE static size_t peekbytes(upb_pbdecoder *d, void *buf,
8530                                         size_t bytes) {
8531   if (curbufleft(d) >= bytes) {
8532     memcpy(buf, d->ptr, bytes);
8533     return bytes;
8534   } else {
8535     return peekbytes_slow(d, buf, bytes);
8536   }
8537 }
8538 
8539 
8540 /* Decoding of wire types *****************************************************/
8541 
8542 /* Slow path for decoding a varint from the current buffer position.
8543  * Returns a status code as described in decoder.int.h. */
upb_pbdecoder_decode_varint_slow(upb_pbdecoder * d,uint64_t * u64)8544 UPB_NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d,
8545                                                       uint64_t *u64) {
8546   uint8_t byte = 0x80;
8547   int bitpos;
8548   *u64 = 0;
8549   for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) {
8550     CHECK_RETURN(getbytes(d, &byte, 1));
8551     *u64 |= (uint64_t)(byte & 0x7F) << bitpos;
8552   }
8553   if(bitpos == 70 && (byte & 0x80)) {
8554     seterr(d, kUnterminatedVarint);
8555     return upb_pbdecoder_suspend(d);
8556   }
8557   return DECODE_OK;
8558 }
8559 
8560 /* Decodes a varint from the current buffer position.
8561  * Returns a status code as described in decoder.int.h. */
decode_varint(upb_pbdecoder * d,uint64_t * u64)8562 UPB_FORCEINLINE static int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) {
8563   if (curbufleft(d) > 0 && !(*d->ptr & 0x80)) {
8564     *u64 = *d->ptr;
8565     advance(d, 1);
8566     return DECODE_OK;
8567   } else if (curbufleft(d) >= 10) {
8568     /* Fast case. */
8569     upb_decoderet r = upb_vdecode_fast(d->ptr);
8570     if (r.p == NULL) {
8571       seterr(d, kUnterminatedVarint);
8572       return upb_pbdecoder_suspend(d);
8573     }
8574     advance(d, r.p - d->ptr);
8575     *u64 = r.val;
8576     return DECODE_OK;
8577   } else {
8578     /* Slow case -- varint spans buffer seam. */
8579     return upb_pbdecoder_decode_varint_slow(d, u64);
8580   }
8581 }
8582 
8583 /* Decodes a 32-bit varint from the current buffer position.
8584  * Returns a status code as described in decoder.int.h. */
decode_v32(upb_pbdecoder * d,uint32_t * u32)8585 UPB_FORCEINLINE static int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) {
8586   uint64_t u64;
8587   int32_t ret = decode_varint(d, &u64);
8588   if (ret >= 0) return ret;
8589   if (u64 > UINT32_MAX) {
8590     seterr(d, "Unterminated 32-bit varint");
8591     /* TODO(haberman) guarantee that this function return is >= 0 somehow,
8592      * so we know this path will always be treated as error by our caller.
8593      * Right now the size_t -> int32_t can overflow and produce negative values.
8594      */
8595     *u32 = 0;
8596     return upb_pbdecoder_suspend(d);
8597   }
8598   *u32 = u64;
8599   return DECODE_OK;
8600 }
8601 
8602 /* Decodes a fixed32 from the current buffer position.
8603  * Returns a status code as described in decoder.int.h.
8604  * TODO: proper byte swapping for big-endian machines. */
decode_fixed32(upb_pbdecoder * d,uint32_t * u32)8605 UPB_FORCEINLINE static int32_t decode_fixed32(upb_pbdecoder *d, uint32_t *u32) {
8606   return getbytes(d, u32, 4);
8607 }
8608 
8609 /* Decodes a fixed64 from the current buffer position.
8610  * Returns a status code as described in decoder.int.h.
8611  * TODO: proper byte swapping for big-endian machines. */
decode_fixed64(upb_pbdecoder * d,uint64_t * u64)8612 UPB_FORCEINLINE static int32_t decode_fixed64(upb_pbdecoder *d, uint64_t *u64) {
8613   return getbytes(d, u64, 8);
8614 }
8615 
8616 /* Non-static versions of the above functions.
8617  * These are called by the JIT for fallback paths. */
upb_pbdecoder_decode_f32(upb_pbdecoder * d,uint32_t * u32)8618 int32_t upb_pbdecoder_decode_f32(upb_pbdecoder *d, uint32_t *u32) {
8619   return decode_fixed32(d, u32);
8620 }
8621 
upb_pbdecoder_decode_f64(upb_pbdecoder * d,uint64_t * u64)8622 int32_t upb_pbdecoder_decode_f64(upb_pbdecoder *d, uint64_t *u64) {
8623   return decode_fixed64(d, u64);
8624 }
8625 
as_double(uint64_t n)8626 static double as_double(uint64_t n) { double d; memcpy(&d, &n, 8); return d; }
as_float(uint32_t n)8627 static float  as_float(uint32_t n)  { float  f; memcpy(&f, &n, 4); return f; }
8628 
8629 /* Pushes a frame onto the decoder stack. */
decoder_push(upb_pbdecoder * d,uint64_t end)8630 static bool decoder_push(upb_pbdecoder *d, uint64_t end) {
8631   upb_pbdecoder_frame *fr = d->top;
8632 
8633   if (end > fr->end_ofs) {
8634     seterr(d, kPbDecoderSubmessageTooLong);
8635     return false;
8636   } else if (fr == d->limit) {
8637     seterr(d, kPbDecoderStackOverflow);
8638     return false;
8639   }
8640 
8641   fr++;
8642   fr->end_ofs = end;
8643   fr->dispatch = NULL;
8644   fr->groupnum = 0;
8645   d->top = fr;
8646   return true;
8647 }
8648 
pushtagdelim(upb_pbdecoder * d,uint32_t arg)8649 static bool pushtagdelim(upb_pbdecoder *d, uint32_t arg) {
8650   /* While we expect to see an "end" tag (either ENDGROUP or a non-sequence
8651    * field number) prior to hitting any enclosing submessage end, pushing our
8652    * existing delim end prevents us from continuing to parse values from a
8653    * corrupt proto that doesn't give us an END tag in time. */
8654   if (!decoder_push(d, d->top->end_ofs))
8655     return false;
8656   d->top->groupnum = arg;
8657   return true;
8658 }
8659 
8660 /* Pops a frame from the decoder stack. */
decoder_pop(upb_pbdecoder * d)8661 static void decoder_pop(upb_pbdecoder *d) { d->top--; }
8662 
upb_pbdecoder_checktag_slow(upb_pbdecoder * d,uint64_t expected)8663 UPB_NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d,
8664                                                  uint64_t expected) {
8665   uint64_t data = 0;
8666   size_t bytes = upb_value_size(expected);
8667   size_t read = peekbytes(d, &data, bytes);
8668   if (read == bytes && data == expected) {
8669     /* Advance past matched bytes. */
8670     int32_t ok = getbytes(d, &data, read);
8671     UPB_ASSERT_VAR(ok, ok < 0);
8672     return DECODE_OK;
8673   } else if (read < bytes && memcmp(&data, &expected, read) == 0) {
8674     return suspend_save(d);
8675   } else {
8676     return DECODE_MISMATCH;
8677   }
8678 }
8679 
upb_pbdecoder_skipunknown(upb_pbdecoder * d,int32_t fieldnum,uint8_t wire_type)8680 int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, int32_t fieldnum,
8681                                   uint8_t wire_type) {
8682   if (fieldnum >= 0)
8683     goto have_tag;
8684 
8685   while (true) {
8686     uint32_t tag;
8687     CHECK_RETURN(decode_v32(d, &tag));
8688     wire_type = tag & 0x7;
8689     fieldnum = tag >> 3;
8690 
8691 have_tag:
8692     if (fieldnum == 0) {
8693       seterr(d, "Saw invalid field number (0)");
8694       return upb_pbdecoder_suspend(d);
8695     }
8696 
8697     /* TODO: deliver to unknown field callback. */
8698     switch (wire_type) {
8699       case UPB_WIRE_TYPE_32BIT:
8700         CHECK_RETURN(skip(d, 4));
8701         break;
8702       case UPB_WIRE_TYPE_64BIT:
8703         CHECK_RETURN(skip(d, 8));
8704         break;
8705       case UPB_WIRE_TYPE_VARINT: {
8706         uint64_t u64;
8707         CHECK_RETURN(decode_varint(d, &u64));
8708         break;
8709       }
8710       case UPB_WIRE_TYPE_DELIMITED: {
8711         uint32_t len;
8712         CHECK_RETURN(decode_v32(d, &len));
8713         CHECK_RETURN(skip(d, len));
8714         break;
8715       }
8716       case UPB_WIRE_TYPE_START_GROUP:
8717         CHECK_SUSPEND(pushtagdelim(d, -fieldnum));
8718         break;
8719       case UPB_WIRE_TYPE_END_GROUP:
8720         if (fieldnum == -d->top->groupnum) {
8721           decoder_pop(d);
8722         } else if (fieldnum == d->top->groupnum) {
8723           return DECODE_ENDGROUP;
8724         } else {
8725           seterr(d, "Unmatched ENDGROUP tag.");
8726           return upb_pbdecoder_suspend(d);
8727         }
8728         break;
8729       default:
8730         seterr(d, "Invalid wire type");
8731         return upb_pbdecoder_suspend(d);
8732     }
8733 
8734     if (d->top->groupnum >= 0) {
8735       return DECODE_OK;
8736     }
8737 
8738     /* Unknown group -- continue looping over unknown fields. */
8739     checkpoint(d);
8740   }
8741 }
8742 
goto_endmsg(upb_pbdecoder * d)8743 static void goto_endmsg(upb_pbdecoder *d) {
8744   upb_value v;
8745   bool found = upb_inttable_lookup32(d->top->dispatch, DISPATCH_ENDMSG, &v);
8746   UPB_ASSERT_VAR(found, found);
8747   d->pc = d->top->base + upb_value_getuint64(v);
8748 }
8749 
8750 /* Parses a tag and jumps to the corresponding bytecode instruction for this
8751  * field.
8752  *
8753  * If the tag is unknown (or the wire type doesn't match), parses the field as
8754  * unknown.  If the tag is a valid ENDGROUP tag, jumps to the bytecode
8755  * instruction for the end of message. */
dispatch(upb_pbdecoder * d)8756 static int32_t dispatch(upb_pbdecoder *d) {
8757   upb_inttable *dispatch = d->top->dispatch;
8758   uint32_t tag;
8759   uint8_t wire_type;
8760   uint32_t fieldnum;
8761   upb_value val;
8762   int32_t retval;
8763 
8764   /* Decode tag. */
8765   CHECK_RETURN(decode_v32(d, &tag));
8766   wire_type = tag & 0x7;
8767   fieldnum = tag >> 3;
8768 
8769   /* Lookup tag.  Because of packed/non-packed compatibility, we have to
8770    * check the wire type against two possibilities. */
8771   if (fieldnum != DISPATCH_ENDMSG &&
8772       upb_inttable_lookup32(dispatch, fieldnum, &val)) {
8773     uint64_t v = upb_value_getuint64(val);
8774     if (wire_type == (v & 0xff)) {
8775       d->pc = d->top->base + (v >> 16);
8776       return DECODE_OK;
8777     } else if (wire_type == ((v >> 8) & 0xff)) {
8778       bool found =
8779           upb_inttable_lookup(dispatch, fieldnum + UPB_MAX_FIELDNUMBER, &val);
8780       UPB_ASSERT_VAR(found, found);
8781       d->pc = d->top->base + upb_value_getuint64(val);
8782       return DECODE_OK;
8783     }
8784   }
8785 
8786   /* We have some unknown fields (or ENDGROUP) to parse.  The DISPATCH or TAG
8787    * bytecode that triggered this is preceded by a CHECKDELIM bytecode which
8788    * we need to back up to, so that when we're done skipping unknown data we
8789    * can re-check the delimited end. */
8790   d->last--;  /* Necessary if we get suspended */
8791   d->pc = d->last;
8792   assert(getop(*d->last) == OP_CHECKDELIM);
8793 
8794   /* Unknown field or ENDGROUP. */
8795   retval = upb_pbdecoder_skipunknown(d, fieldnum, wire_type);
8796 
8797   CHECK_RETURN(retval);
8798 
8799   if (retval == DECODE_ENDGROUP) {
8800     goto_endmsg(d);
8801     return DECODE_OK;
8802   }
8803 
8804   return DECODE_OK;
8805 }
8806 
8807 /* Callers know that the stack is more than one deep because the opcodes that
8808  * call this only occur after PUSH operations. */
outer_frame(upb_pbdecoder * d)8809 upb_pbdecoder_frame *outer_frame(upb_pbdecoder *d) {
8810   assert(d->top != d->stack);
8811   return d->top - 1;
8812 }
8813 
8814 
8815 /* The main decoding loop *****************************************************/
8816 
8817 /* The main decoder VM function.  Uses traditional bytecode dispatch loop with a
8818  * switch() statement. */
run_decoder_vm(upb_pbdecoder * d,const mgroup * group,const upb_bufhandle * handle)8819 size_t run_decoder_vm(upb_pbdecoder *d, const mgroup *group,
8820                       const upb_bufhandle* handle) {
8821 
8822 #define VMCASE(op, code) \
8823   case op: { code; if (consumes_input(op)) checkpoint(d); break; }
8824 #define PRIMITIVE_OP(type, wt, name, convfunc, ctype) \
8825   VMCASE(OP_PARSE_ ## type, { \
8826     ctype val; \
8827     CHECK_RETURN(decode_ ## wt(d, &val)); \
8828     upb_sink_put ## name(&d->top->sink, arg, (convfunc)(val)); \
8829   })
8830 
8831   while(1) {
8832     int32_t instruction;
8833     opcode op;
8834     uint32_t arg;
8835     int32_t longofs;
8836 
8837     d->last = d->pc;
8838     instruction = *d->pc++;
8839     op = getop(instruction);
8840     arg = instruction >> 8;
8841     longofs = arg;
8842     assert(d->ptr != d->residual_end);
8843     UPB_UNUSED(group);
8844 #ifdef UPB_DUMP_BYTECODE
8845     fprintf(stderr, "s_ofs=%d buf_ofs=%d data_rem=%d buf_rem=%d delim_rem=%d "
8846                     "%x %s (%d)\n",
8847             (int)offset(d),
8848             (int)(d->ptr - d->buf),
8849             (int)(d->data_end - d->ptr),
8850             (int)(d->end - d->ptr),
8851             (int)((d->top->end_ofs - d->bufstart_ofs) - (d->ptr - d->buf)),
8852             (int)(d->pc - 1 - group->bytecode),
8853             upb_pbdecoder_getopname(op),
8854             arg);
8855 #endif
8856     switch (op) {
8857       /* Technically, we are losing data if we see a 32-bit varint that is not
8858        * properly sign-extended.  We could detect this and error about the data
8859        * loss, but proto2 does not do this, so we pass. */
8860       PRIMITIVE_OP(INT32,    varint,  int32,  int32_t,      uint64_t)
8861       PRIMITIVE_OP(INT64,    varint,  int64,  int64_t,      uint64_t)
8862       PRIMITIVE_OP(UINT32,   varint,  uint32, uint32_t,     uint64_t)
8863       PRIMITIVE_OP(UINT64,   varint,  uint64, uint64_t,     uint64_t)
8864       PRIMITIVE_OP(FIXED32,  fixed32, uint32, uint32_t,     uint32_t)
8865       PRIMITIVE_OP(FIXED64,  fixed64, uint64, uint64_t,     uint64_t)
8866       PRIMITIVE_OP(SFIXED32, fixed32, int32,  int32_t,      uint32_t)
8867       PRIMITIVE_OP(SFIXED64, fixed64, int64,  int64_t,      uint64_t)
8868       PRIMITIVE_OP(BOOL,     varint,  bool,   bool,         uint64_t)
8869       PRIMITIVE_OP(DOUBLE,   fixed64, double, as_double,    uint64_t)
8870       PRIMITIVE_OP(FLOAT,    fixed32, float,  as_float,     uint32_t)
8871       PRIMITIVE_OP(SINT32,   varint,  int32,  upb_zzdec_32, uint64_t)
8872       PRIMITIVE_OP(SINT64,   varint,  int64,  upb_zzdec_64, uint64_t)
8873 
8874       VMCASE(OP_SETDISPATCH,
8875         d->top->base = d->pc - 1;
8876         memcpy(&d->top->dispatch, d->pc, sizeof(void*));
8877         d->pc += sizeof(void*) / sizeof(uint32_t);
8878       )
8879       VMCASE(OP_STARTMSG,
8880         CHECK_SUSPEND(upb_sink_startmsg(&d->top->sink));
8881       )
8882       VMCASE(OP_ENDMSG,
8883         CHECK_SUSPEND(upb_sink_endmsg(&d->top->sink, d->status));
8884       )
8885       VMCASE(OP_STARTSEQ,
8886         upb_pbdecoder_frame *outer = outer_frame(d);
8887         CHECK_SUSPEND(upb_sink_startseq(&outer->sink, arg, &d->top->sink));
8888       )
8889       VMCASE(OP_ENDSEQ,
8890         CHECK_SUSPEND(upb_sink_endseq(&d->top->sink, arg));
8891       )
8892       VMCASE(OP_STARTSUBMSG,
8893         upb_pbdecoder_frame *outer = outer_frame(d);
8894         CHECK_SUSPEND(upb_sink_startsubmsg(&outer->sink, arg, &d->top->sink));
8895       )
8896       VMCASE(OP_ENDSUBMSG,
8897         CHECK_SUSPEND(upb_sink_endsubmsg(&d->top->sink, arg));
8898       )
8899       VMCASE(OP_STARTSTR,
8900         uint32_t len = delim_remaining(d);
8901         upb_pbdecoder_frame *outer = outer_frame(d);
8902         CHECK_SUSPEND(upb_sink_startstr(&outer->sink, arg, len, &d->top->sink));
8903         if (len == 0) {
8904           d->pc++;  /* Skip OP_STRING. */
8905         }
8906       )
8907       VMCASE(OP_STRING,
8908         uint32_t len = curbufleft(d);
8909         size_t n = upb_sink_putstring(&d->top->sink, arg, d->ptr, len, handle);
8910         if (n > len) {
8911           if (n > delim_remaining(d)) {
8912             seterr(d, "Tried to skip past end of string.");
8913             return upb_pbdecoder_suspend(d);
8914           } else {
8915             int32_t ret = skip(d, n);
8916             /* This shouldn't return DECODE_OK, because n > len. */
8917             assert(ret >= 0);
8918             return ret;
8919           }
8920         }
8921         advance(d, n);
8922         if (n < len || d->delim_end == NULL) {
8923           /* We aren't finished with this string yet. */
8924           d->pc--;  /* Repeat OP_STRING. */
8925           if (n > 0) checkpoint(d);
8926           return upb_pbdecoder_suspend(d);
8927         }
8928       )
8929       VMCASE(OP_ENDSTR,
8930         CHECK_SUSPEND(upb_sink_endstr(&d->top->sink, arg));
8931       )
8932       VMCASE(OP_PUSHTAGDELIM,
8933         CHECK_SUSPEND(pushtagdelim(d, arg));
8934       )
8935       VMCASE(OP_SETBIGGROUPNUM,
8936         d->top->groupnum = *d->pc++;
8937       )
8938       VMCASE(OP_POP,
8939         assert(d->top > d->stack);
8940         decoder_pop(d);
8941       )
8942       VMCASE(OP_PUSHLENDELIM,
8943         uint32_t len;
8944         CHECK_RETURN(decode_v32(d, &len));
8945         CHECK_SUSPEND(decoder_push(d, offset(d) + len));
8946         set_delim_end(d);
8947       )
8948       VMCASE(OP_SETDELIM,
8949         set_delim_end(d);
8950       )
8951       VMCASE(OP_CHECKDELIM,
8952         /* We are guaranteed of this assert because we never allow ourselves to
8953          * consume bytes beyond data_end, which covers delim_end when non-NULL.
8954          */
8955         assert(!(d->delim_end && d->ptr > d->delim_end));
8956         if (d->ptr == d->delim_end)
8957           d->pc += longofs;
8958       )
8959       VMCASE(OP_CALL,
8960         d->callstack[d->call_len++] = d->pc;
8961         d->pc += longofs;
8962       )
8963       VMCASE(OP_RET,
8964         assert(d->call_len > 0);
8965         d->pc = d->callstack[--d->call_len];
8966       )
8967       VMCASE(OP_BRANCH,
8968         d->pc += longofs;
8969       )
8970       VMCASE(OP_TAG1,
8971         uint8_t expected;
8972         CHECK_SUSPEND(curbufleft(d) > 0);
8973         expected = (arg >> 8) & 0xff;
8974         if (*d->ptr == expected) {
8975           advance(d, 1);
8976         } else {
8977           int8_t shortofs;
8978          badtag:
8979           shortofs = arg;
8980           if (shortofs == LABEL_DISPATCH) {
8981             CHECK_RETURN(dispatch(d));
8982           } else {
8983             d->pc += shortofs;
8984             break; /* Avoid checkpoint(). */
8985           }
8986         }
8987       )
8988       VMCASE(OP_TAG2,
8989         uint16_t expected;
8990         CHECK_SUSPEND(curbufleft(d) > 0);
8991         expected = (arg >> 8) & 0xffff;
8992         if (curbufleft(d) >= 2) {
8993           uint16_t actual;
8994           memcpy(&actual, d->ptr, 2);
8995           if (expected == actual) {
8996             advance(d, 2);
8997           } else {
8998             goto badtag;
8999           }
9000         } else {
9001           int32_t result = upb_pbdecoder_checktag_slow(d, expected);
9002           if (result == DECODE_MISMATCH) goto badtag;
9003           if (result >= 0) return result;
9004         }
9005       )
9006       VMCASE(OP_TAGN, {
9007         uint64_t expected;
9008         int32_t result;
9009         memcpy(&expected, d->pc, 8);
9010         d->pc += 2;
9011         result = upb_pbdecoder_checktag_slow(d, expected);
9012         if (result == DECODE_MISMATCH) goto badtag;
9013         if (result >= 0) return result;
9014       })
9015       VMCASE(OP_DISPATCH, {
9016         CHECK_RETURN(dispatch(d));
9017       })
9018       VMCASE(OP_HALT, {
9019         return d->size_param;
9020       })
9021     }
9022   }
9023 }
9024 
9025 
9026 /* BytesHandler handlers ******************************************************/
9027 
upb_pbdecoder_startbc(void * closure,const void * pc,size_t size_hint)9028 void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint) {
9029   upb_pbdecoder *d = closure;
9030   UPB_UNUSED(size_hint);
9031   d->top->end_ofs = UINT64_MAX;
9032   d->bufstart_ofs = 0;
9033   d->call_len = 1;
9034   d->callstack[0] = &halt;
9035   d->pc = pc;
9036   d->skip = 0;
9037   return d;
9038 }
9039 
upb_pbdecoder_startjit(void * closure,const void * hd,size_t size_hint)9040 void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint) {
9041   upb_pbdecoder *d = closure;
9042   UPB_UNUSED(hd);
9043   UPB_UNUSED(size_hint);
9044   d->top->end_ofs = UINT64_MAX;
9045   d->bufstart_ofs = 0;
9046   d->call_len = 0;
9047   d->skip = 0;
9048   return d;
9049 }
9050 
upb_pbdecoder_end(void * closure,const void * handler_data)9051 bool upb_pbdecoder_end(void *closure, const void *handler_data) {
9052   upb_pbdecoder *d = closure;
9053   const upb_pbdecodermethod *method = handler_data;
9054   uint64_t end;
9055   char dummy;
9056 
9057   if (d->residual_end > d->residual) {
9058     seterr(d, "Unexpected EOF: decoder still has buffered unparsed data");
9059     return false;
9060   }
9061 
9062   if (d->skip) {
9063     seterr(d, "Unexpected EOF inside skipped data");
9064     return false;
9065   }
9066 
9067   if (d->top->end_ofs != UINT64_MAX) {
9068     seterr(d, "Unexpected EOF inside delimited string");
9069     return false;
9070   }
9071 
9072   /* The user's end() call indicates that the message ends here. */
9073   end = offset(d);
9074   d->top->end_ofs = end;
9075 
9076 #ifdef UPB_USE_JIT_X64
9077   if (method->is_native_) {
9078     const mgroup *group = (const mgroup*)method->group;
9079     if (d->top != d->stack)
9080       d->stack->end_ofs = 0;
9081     group->jit_code(closure, method->code_base.ptr, &dummy, 0, NULL);
9082   } else
9083 #endif
9084   {
9085     const uint32_t *p = d->pc;
9086     d->stack->end_ofs = end;
9087     /* Check the previous bytecode, but guard against beginning. */
9088     if (p != method->code_base.ptr) p--;
9089     if (getop(*p) == OP_CHECKDELIM) {
9090       /* Rewind from OP_TAG* to OP_CHECKDELIM. */
9091       assert(getop(*d->pc) == OP_TAG1 ||
9092              getop(*d->pc) == OP_TAG2 ||
9093              getop(*d->pc) == OP_TAGN ||
9094              getop(*d->pc) == OP_DISPATCH);
9095       d->pc = p;
9096     }
9097     upb_pbdecoder_decode(closure, handler_data, &dummy, 0, NULL);
9098   }
9099 
9100   if (d->call_len != 0) {
9101     seterr(d, "Unexpected EOF inside submessage or group");
9102     return false;
9103   }
9104 
9105   return true;
9106 }
9107 
upb_pbdecoder_decode(void * decoder,const void * group,const char * buf,size_t size,const upb_bufhandle * handle)9108 size_t upb_pbdecoder_decode(void *decoder, const void *group, const char *buf,
9109                             size_t size, const upb_bufhandle *handle) {
9110   int32_t result = upb_pbdecoder_resume(decoder, NULL, buf, size, handle);
9111 
9112   if (result == DECODE_ENDGROUP) goto_endmsg(decoder);
9113   CHECK_RETURN(result);
9114 
9115   return run_decoder_vm(decoder, group, handle);
9116 }
9117 
9118 
9119 /* Public API *****************************************************************/
9120 
upb_pbdecoder_reset(upb_pbdecoder * d)9121 void upb_pbdecoder_reset(upb_pbdecoder *d) {
9122   d->top = d->stack;
9123   d->top->groupnum = 0;
9124   d->ptr = d->residual;
9125   d->buf = d->residual;
9126   d->end = d->residual;
9127   d->residual_end = d->residual;
9128 }
9129 
upb_pbdecoder_create(upb_env * e,const upb_pbdecodermethod * m,upb_sink * sink)9130 upb_pbdecoder *upb_pbdecoder_create(upb_env *e, const upb_pbdecodermethod *m,
9131                                     upb_sink *sink) {
9132   const size_t default_max_nesting = 64;
9133 #ifndef NDEBUG
9134   size_t size_before = upb_env_bytesallocated(e);
9135 #endif
9136 
9137   upb_pbdecoder *d = upb_env_malloc(e, sizeof(upb_pbdecoder));
9138   if (!d) return NULL;
9139 
9140   d->method_ = m;
9141   d->callstack = upb_env_malloc(e, callstacksize(d, default_max_nesting));
9142   d->stack = upb_env_malloc(e, stacksize(d, default_max_nesting));
9143   if (!d->stack || !d->callstack) {
9144     return NULL;
9145   }
9146 
9147   d->env = e;
9148   d->limit = d->stack + default_max_nesting - 1;
9149   d->stack_size = default_max_nesting;
9150   d->status = NULL;
9151 
9152   upb_pbdecoder_reset(d);
9153   upb_bytessink_reset(&d->input_, &m->input_handler_, d);
9154 
9155   assert(sink);
9156   if (d->method_->dest_handlers_) {
9157     if (sink->handlers != d->method_->dest_handlers_)
9158       return NULL;
9159   }
9160   upb_sink_reset(&d->top->sink, sink->handlers, sink->closure);
9161 
9162   /* If this fails, increase the value in decoder.h. */
9163   assert(upb_env_bytesallocated(e) - size_before <= UPB_PB_DECODER_SIZE);
9164   return d;
9165 }
9166 
upb_pbdecoder_bytesparsed(const upb_pbdecoder * d)9167 uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d) {
9168   return offset(d);
9169 }
9170 
upb_pbdecoder_method(const upb_pbdecoder * d)9171 const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d) {
9172   return d->method_;
9173 }
9174 
upb_pbdecoder_input(upb_pbdecoder * d)9175 upb_bytessink *upb_pbdecoder_input(upb_pbdecoder *d) {
9176   return &d->input_;
9177 }
9178 
upb_pbdecoder_maxnesting(const upb_pbdecoder * d)9179 size_t upb_pbdecoder_maxnesting(const upb_pbdecoder *d) {
9180   return d->stack_size;
9181 }
9182 
upb_pbdecoder_setmaxnesting(upb_pbdecoder * d,size_t max)9183 bool upb_pbdecoder_setmaxnesting(upb_pbdecoder *d, size_t max) {
9184   assert(d->top >= d->stack);
9185 
9186   if (max < (size_t)(d->top - d->stack)) {
9187     /* Can't set a limit smaller than what we are currently at. */
9188     return false;
9189   }
9190 
9191   if (max > d->stack_size) {
9192     /* Need to reallocate stack and callstack to accommodate. */
9193     size_t old_size = stacksize(d, d->stack_size);
9194     size_t new_size = stacksize(d, max);
9195     void *p = upb_env_realloc(d->env, d->stack, old_size, new_size);
9196     if (!p) {
9197       return false;
9198     }
9199     d->stack = p;
9200 
9201     old_size = callstacksize(d, d->stack_size);
9202     new_size = callstacksize(d, max);
9203     p = upb_env_realloc(d->env, d->callstack, old_size, new_size);
9204     if (!p) {
9205       return false;
9206     }
9207     d->callstack = p;
9208 
9209     d->stack_size = max;
9210   }
9211 
9212   d->limit = d->stack + max - 1;
9213   return true;
9214 }
9215 /*
9216 ** upb::Encoder
9217 **
9218 ** Since we are implementing pure handlers (ie. without any out-of-band access
9219 ** to pre-computed lengths), we have to buffer all submessages before we can
9220 ** emit even their first byte.
9221 **
9222 ** Not knowing the size of submessages also means we can't write a perfect
9223 ** zero-copy implementation, even with buffering.  Lengths are stored as
9224 ** varints, which means that we don't know how many bytes to reserve for the
9225 ** length until we know what the length is.
9226 **
9227 ** This leaves us with three main choices:
9228 **
9229 ** 1. buffer all submessage data in a temporary buffer, then copy it exactly
9230 **    once into the output buffer.
9231 **
9232 ** 2. attempt to buffer data directly into the output buffer, estimating how
9233 **    many bytes each length will take.  When our guesses are wrong, use
9234 **    memmove() to grow or shrink the allotted space.
9235 **
9236 ** 3. buffer directly into the output buffer, allocating a max length
9237 **    ahead-of-time for each submessage length.  If we overallocated, we waste
9238 **    space, but no memcpy() or memmove() is required.  This approach requires
9239 **    defining a maximum size for submessages and rejecting submessages that
9240 **    exceed that size.
9241 **
9242 ** (2) and (3) have the potential to have better performance, but they are more
9243 ** complicated and subtle to implement:
9244 **
9245 **   (3) requires making an arbitrary choice of the maximum message size; it
9246 **       wastes space when submessages are shorter than this and fails
9247 **       completely when they are longer.  This makes it more finicky and
9248 **       requires configuration based on the input.  It also makes it impossible
9249 **       to perfectly match the output of reference encoders that always use the
9250 **       optimal amount of space for each length.
9251 **
9252 **   (2) requires guessing the the size upfront, and if multiple lengths are
9253 **       guessed wrong the minimum required number of memmove() operations may
9254 **       be complicated to compute correctly.  Implemented properly, it may have
9255 **       a useful amortized or average cost, but more investigation is required
9256 **       to determine this and what the optimal algorithm is to achieve it.
9257 **
9258 **   (1) makes you always pay for exactly one copy, but its implementation is
9259 **       the simplest and its performance is predictable.
9260 **
9261 ** So for now, we implement (1) only.  If we wish to optimize later, we should
9262 ** be able to do it without affecting users.
9263 **
9264 ** The strategy is to buffer the segments of data that do *not* depend on
9265 ** unknown lengths in one buffer, and keep a separate buffer of segment pointers
9266 ** and lengths.  When the top-level submessage ends, we can go beginning to end,
9267 ** alternating the writing of lengths with memcpy() of the rest of the data.
9268 ** At the top level though, no buffering is required.
9269 */
9270 
9271 
9272 
9273 /* The output buffer is divided into segments; a segment is a string of data
9274  * that is "ready to go" -- it does not need any varint lengths inserted into
9275  * the middle.  The seams between segments are where varints will be inserted
9276  * once they are known.
9277  *
9278  * We also use the concept of a "run", which is a range of encoded bytes that
9279  * occur at a single submessage level.  Every segment contains one or more runs.
9280  *
9281  * A segment can span messages.  Consider:
9282  *
9283  *                  .--Submessage lengths---------.
9284  *                  |       |                     |
9285  *                  |       V                     V
9286  *                  V      | |---------------    | |-----------------
9287  * Submessages:    | |-----------------------------------------------
9288  * Top-level msg: ------------------------------------------------------------
9289  *
9290  * Segments:          -----   -------------------   -----------------
9291  * Runs:              *----   *--------------*---   *----------------
9292  * (* marks the start)
9293  *
9294  * Note that the top-level menssage is not in any segment because it does not
9295  * have any length preceding it.
9296  *
9297  * A segment is only interrupted when another length needs to be inserted.  So
9298  * observe how the second segment spans both the inner submessage and part of
9299  * the next enclosing message. */
9300 typedef struct {
9301   uint32_t msglen;  /* The length to varint-encode before this segment. */
9302   uint32_t seglen;  /* Length of the segment. */
9303 } upb_pb_encoder_segment;
9304 
9305 struct upb_pb_encoder {
9306   upb_env *env;
9307 
9308   /* Our input and output. */
9309   upb_sink input_;
9310   upb_bytessink *output_;
9311 
9312   /* The "subclosure" -- used as the inner closure as part of the bytessink
9313    * protocol. */
9314   void *subc;
9315 
9316   /* The output buffer and limit, and our current write position.  "buf"
9317    * initially points to "initbuf", but is dynamically allocated if we need to
9318    * grow beyond the initial size. */
9319   char *buf, *ptr, *limit;
9320 
9321   /* The beginning of the current run, or undefined if we are at the top
9322    * level. */
9323   char *runbegin;
9324 
9325   /* The list of segments we are accumulating. */
9326   upb_pb_encoder_segment *segbuf, *segptr, *seglimit;
9327 
9328   /* The stack of enclosing submessages.  Each entry in the stack points to the
9329    * segment where this submessage's length is being accumulated. */
9330   int *stack, *top, *stacklimit;
9331 
9332   /* Depth of startmsg/endmsg calls. */
9333   int depth;
9334 };
9335 
9336 /* low-level buffering ********************************************************/
9337 
9338 /* Low-level functions for interacting with the output buffer. */
9339 
9340 /* TODO(haberman): handle pushback */
putbuf(upb_pb_encoder * e,const char * buf,size_t len)9341 static void putbuf(upb_pb_encoder *e, const char *buf, size_t len) {
9342   size_t n = upb_bytessink_putbuf(e->output_, e->subc, buf, len, NULL);
9343   UPB_ASSERT_VAR(n, n == len);
9344 }
9345 
top(upb_pb_encoder * e)9346 static upb_pb_encoder_segment *top(upb_pb_encoder *e) {
9347   return &e->segbuf[*e->top];
9348 }
9349 
9350 /* Call to ensure that at least "bytes" bytes are available for writing at
9351  * e->ptr.  Returns false if the bytes could not be allocated. */
reserve(upb_pb_encoder * e,size_t bytes)9352 static bool reserve(upb_pb_encoder *e, size_t bytes) {
9353   if ((size_t)(e->limit - e->ptr) < bytes) {
9354     /* Grow buffer. */
9355     char *new_buf;
9356     size_t needed = bytes + (e->ptr - e->buf);
9357     size_t old_size = e->limit - e->buf;
9358 
9359     size_t new_size = old_size;
9360 
9361     while (new_size < needed) {
9362       new_size *= 2;
9363     }
9364 
9365     new_buf = upb_env_realloc(e->env, e->buf, old_size, new_size);
9366 
9367     if (new_buf == NULL) {
9368       return false;
9369     }
9370 
9371     e->ptr = new_buf + (e->ptr - e->buf);
9372     e->runbegin = new_buf + (e->runbegin - e->buf);
9373     e->limit = new_buf + new_size;
9374     e->buf = new_buf;
9375   }
9376 
9377   return true;
9378 }
9379 
9380 /* Call when "bytes" bytes have been writte at e->ptr.  The caller *must* have
9381  * previously called reserve() with at least this many bytes. */
encoder_advance(upb_pb_encoder * e,size_t bytes)9382 static void encoder_advance(upb_pb_encoder *e, size_t bytes) {
9383   assert((size_t)(e->limit - e->ptr) >= bytes);
9384   e->ptr += bytes;
9385 }
9386 
9387 /* Call when all of the bytes for a handler have been written.  Flushes the
9388  * bytes if possible and necessary, returning false if this failed. */
commit(upb_pb_encoder * e)9389 static bool commit(upb_pb_encoder *e) {
9390   if (!e->top) {
9391     /* We aren't inside a delimited region.  Flush our accumulated bytes to
9392      * the output.
9393      *
9394      * TODO(haberman): in the future we may want to delay flushing for
9395      * efficiency reasons. */
9396     putbuf(e, e->buf, e->ptr - e->buf);
9397     e->ptr = e->buf;
9398   }
9399 
9400   return true;
9401 }
9402 
9403 /* Writes the given bytes to the buffer, handling reserve/advance. */
encode_bytes(upb_pb_encoder * e,const void * data,size_t len)9404 static bool encode_bytes(upb_pb_encoder *e, const void *data, size_t len) {
9405   if (!reserve(e, len)) {
9406     return false;
9407   }
9408 
9409   memcpy(e->ptr, data, len);
9410   encoder_advance(e, len);
9411   return true;
9412 }
9413 
9414 /* Finish the current run by adding the run totals to the segment and message
9415  * length. */
accumulate(upb_pb_encoder * e)9416 static void accumulate(upb_pb_encoder *e) {
9417   size_t run_len;
9418   assert(e->ptr >= e->runbegin);
9419   run_len = e->ptr - e->runbegin;
9420   e->segptr->seglen += run_len;
9421   top(e)->msglen += run_len;
9422   e->runbegin = e->ptr;
9423 }
9424 
9425 /* Call to indicate the start of delimited region for which the full length is
9426  * not yet known.  All data will be buffered until the length is known.
9427  * Delimited regions may be nested; their lengths will all be tracked properly. */
start_delim(upb_pb_encoder * e)9428 static bool start_delim(upb_pb_encoder *e) {
9429   if (e->top) {
9430     /* We are already buffering, advance to the next segment and push it on the
9431      * stack. */
9432     accumulate(e);
9433 
9434     if (++e->top == e->stacklimit) {
9435       /* TODO(haberman): grow stack? */
9436       return false;
9437     }
9438 
9439     if (++e->segptr == e->seglimit) {
9440       /* Grow segment buffer. */
9441       size_t old_size =
9442           (e->seglimit - e->segbuf) * sizeof(upb_pb_encoder_segment);
9443       size_t new_size = old_size * 2;
9444       upb_pb_encoder_segment *new_buf =
9445           upb_env_realloc(e->env, e->segbuf, old_size, new_size);
9446 
9447       if (new_buf == NULL) {
9448         return false;
9449       }
9450 
9451       e->segptr = new_buf + (e->segptr - e->segbuf);
9452       e->seglimit = new_buf + (new_size / sizeof(upb_pb_encoder_segment));
9453       e->segbuf = new_buf;
9454     }
9455   } else {
9456     /* We were previously at the top level, start buffering. */
9457     e->segptr = e->segbuf;
9458     e->top = e->stack;
9459     e->runbegin = e->ptr;
9460   }
9461 
9462   *e->top = e->segptr - e->segbuf;
9463   e->segptr->seglen = 0;
9464   e->segptr->msglen = 0;
9465 
9466   return true;
9467 }
9468 
9469 /* Call to indicate the end of a delimited region.  We now know the length of
9470  * the delimited region.  If we are not nested inside any other delimited
9471  * regions, we can now emit all of the buffered data we accumulated. */
end_delim(upb_pb_encoder * e)9472 static bool end_delim(upb_pb_encoder *e) {
9473   size_t msglen;
9474   accumulate(e);
9475   msglen = top(e)->msglen;
9476 
9477   if (e->top == e->stack) {
9478     /* All lengths are now available, emit all buffered data. */
9479     char buf[UPB_PB_VARINT_MAX_LEN];
9480     upb_pb_encoder_segment *s;
9481     const char *ptr = e->buf;
9482     for (s = e->segbuf; s <= e->segptr; s++) {
9483       size_t lenbytes = upb_vencode64(s->msglen, buf);
9484       putbuf(e, buf, lenbytes);
9485       putbuf(e, ptr, s->seglen);
9486       ptr += s->seglen;
9487     }
9488 
9489     e->ptr = e->buf;
9490     e->top = NULL;
9491   } else {
9492     /* Need to keep buffering; propagate length info into enclosing
9493      * submessages. */
9494     --e->top;
9495     top(e)->msglen += msglen + upb_varint_size(msglen);
9496   }
9497 
9498   return true;
9499 }
9500 
9501 
9502 /* tag_t **********************************************************************/
9503 
9504 /* A precomputed (pre-encoded) tag and length. */
9505 
9506 typedef struct {
9507   uint8_t bytes;
9508   char tag[7];
9509 } tag_t;
9510 
9511 /* Allocates a new tag for this field, and sets it in these handlerattr. */
new_tag(upb_handlers * h,const upb_fielddef * f,upb_wiretype_t wt,upb_handlerattr * attr)9512 static void new_tag(upb_handlers *h, const upb_fielddef *f, upb_wiretype_t wt,
9513                     upb_handlerattr *attr) {
9514   uint32_t n = upb_fielddef_number(f);
9515 
9516   tag_t *tag = upb_gmalloc(sizeof(tag_t));
9517   tag->bytes = upb_vencode64((n << 3) | wt, tag->tag);
9518 
9519   upb_handlerattr_init(attr);
9520   upb_handlerattr_sethandlerdata(attr, tag);
9521   upb_handlers_addcleanup(h, tag, upb_gfree);
9522 }
9523 
encode_tag(upb_pb_encoder * e,const tag_t * tag)9524 static bool encode_tag(upb_pb_encoder *e, const tag_t *tag) {
9525   return encode_bytes(e, tag->tag, tag->bytes);
9526 }
9527 
9528 
9529 /* encoding of wire types *****************************************************/
9530 
encode_fixed64(upb_pb_encoder * e,uint64_t val)9531 static bool encode_fixed64(upb_pb_encoder *e, uint64_t val) {
9532   /* TODO(haberman): byte-swap for big endian. */
9533   return encode_bytes(e, &val, sizeof(uint64_t));
9534 }
9535 
encode_fixed32(upb_pb_encoder * e,uint32_t val)9536 static bool encode_fixed32(upb_pb_encoder *e, uint32_t val) {
9537   /* TODO(haberman): byte-swap for big endian. */
9538   return encode_bytes(e, &val, sizeof(uint32_t));
9539 }
9540 
encode_varint(upb_pb_encoder * e,uint64_t val)9541 static bool encode_varint(upb_pb_encoder *e, uint64_t val) {
9542   if (!reserve(e, UPB_PB_VARINT_MAX_LEN)) {
9543     return false;
9544   }
9545 
9546   encoder_advance(e, upb_vencode64(val, e->ptr));
9547   return true;
9548 }
9549 
dbl2uint64(double d)9550 static uint64_t dbl2uint64(double d) {
9551   uint64_t ret;
9552   memcpy(&ret, &d, sizeof(uint64_t));
9553   return ret;
9554 }
9555 
flt2uint32(float d)9556 static uint32_t flt2uint32(float d) {
9557   uint32_t ret;
9558   memcpy(&ret, &d, sizeof(uint32_t));
9559   return ret;
9560 }
9561 
9562 
9563 /* encoding of proto types ****************************************************/
9564 
startmsg(void * c,const void * hd)9565 static bool startmsg(void *c, const void *hd) {
9566   upb_pb_encoder *e = c;
9567   UPB_UNUSED(hd);
9568   if (e->depth++ == 0) {
9569     upb_bytessink_start(e->output_, 0, &e->subc);
9570   }
9571   return true;
9572 }
9573 
endmsg(void * c,const void * hd,upb_status * status)9574 static bool endmsg(void *c, const void *hd, upb_status *status) {
9575   upb_pb_encoder *e = c;
9576   UPB_UNUSED(hd);
9577   UPB_UNUSED(status);
9578   if (--e->depth == 0) {
9579     upb_bytessink_end(e->output_);
9580   }
9581   return true;
9582 }
9583 
encode_startdelimfield(void * c,const void * hd)9584 static void *encode_startdelimfield(void *c, const void *hd) {
9585   bool ok = encode_tag(c, hd) && commit(c) && start_delim(c);
9586   return ok ? c : UPB_BREAK;
9587 }
9588 
encode_enddelimfield(void * c,const void * hd)9589 static bool encode_enddelimfield(void *c, const void *hd) {
9590   UPB_UNUSED(hd);
9591   return end_delim(c);
9592 }
9593 
encode_startgroup(void * c,const void * hd)9594 static void *encode_startgroup(void *c, const void *hd) {
9595   return (encode_tag(c, hd) && commit(c)) ? c : UPB_BREAK;
9596 }
9597 
encode_endgroup(void * c,const void * hd)9598 static bool encode_endgroup(void *c, const void *hd) {
9599   return encode_tag(c, hd) && commit(c);
9600 }
9601 
encode_startstr(void * c,const void * hd,size_t size_hint)9602 static void *encode_startstr(void *c, const void *hd, size_t size_hint) {
9603   UPB_UNUSED(size_hint);
9604   return encode_startdelimfield(c, hd);
9605 }
9606 
encode_strbuf(void * c,const void * hd,const char * buf,size_t len,const upb_bufhandle * h)9607 static size_t encode_strbuf(void *c, const void *hd, const char *buf,
9608                             size_t len, const upb_bufhandle *h) {
9609   UPB_UNUSED(hd);
9610   UPB_UNUSED(h);
9611   return encode_bytes(c, buf, len) ? len : 0;
9612 }
9613 
9614 #define T(type, ctype, convert, encode)                                  \
9615   static bool encode_scalar_##type(void *e, const void *hd, ctype val) { \
9616     return encode_tag(e, hd) && encode(e, (convert)(val)) && commit(e);  \
9617   }                                                                      \
9618   static bool encode_packed_##type(void *e, const void *hd, ctype val) { \
9619     UPB_UNUSED(hd);                                                      \
9620     return encode(e, (convert)(val));                                    \
9621   }
9622 
T(double,double,dbl2uint64,encode_fixed64)9623 T(double,   double,   dbl2uint64,   encode_fixed64)
9624 T(float,    float,    flt2uint32,   encode_fixed32)
9625 T(int64,    int64_t,  uint64_t,     encode_varint)
9626 T(int32,    int32_t,  uint32_t,     encode_varint)
9627 T(fixed64,  uint64_t, uint64_t,     encode_fixed64)
9628 T(fixed32,  uint32_t, uint32_t,     encode_fixed32)
9629 T(bool,     bool,     bool,         encode_varint)
9630 T(uint32,   uint32_t, uint32_t,     encode_varint)
9631 T(uint64,   uint64_t, uint64_t,     encode_varint)
9632 T(enum,     int32_t,  uint32_t,     encode_varint)
9633 T(sfixed32, int32_t,  uint32_t,     encode_fixed32)
9634 T(sfixed64, int64_t,  uint64_t,     encode_fixed64)
9635 T(sint32,   int32_t,  upb_zzenc_32, encode_varint)
9636 T(sint64,   int64_t,  upb_zzenc_64, encode_varint)
9637 
9638 #undef T
9639 
9640 
9641 /* code to build the handlers *************************************************/
9642 
9643 static void newhandlers_callback(const void *closure, upb_handlers *h) {
9644   const upb_msgdef *m;
9645   upb_msg_field_iter i;
9646 
9647   UPB_UNUSED(closure);
9648 
9649   upb_handlers_setstartmsg(h, startmsg, NULL);
9650   upb_handlers_setendmsg(h, endmsg, NULL);
9651 
9652   m = upb_handlers_msgdef(h);
9653   for(upb_msg_field_begin(&i, m);
9654       !upb_msg_field_done(&i);
9655       upb_msg_field_next(&i)) {
9656     const upb_fielddef *f = upb_msg_iter_field(&i);
9657     bool packed = upb_fielddef_isseq(f) && upb_fielddef_isprimitive(f) &&
9658                   upb_fielddef_packed(f);
9659     upb_handlerattr attr;
9660     upb_wiretype_t wt =
9661         packed ? UPB_WIRE_TYPE_DELIMITED
9662                : upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
9663 
9664     /* Pre-encode the tag for this field. */
9665     new_tag(h, f, wt, &attr);
9666 
9667     if (packed) {
9668       upb_handlers_setstartseq(h, f, encode_startdelimfield, &attr);
9669       upb_handlers_setendseq(h, f, encode_enddelimfield, &attr);
9670     }
9671 
9672 #define T(upper, lower, upbtype)                                     \
9673   case UPB_DESCRIPTOR_TYPE_##upper:                                  \
9674     if (packed) {                                                    \
9675       upb_handlers_set##upbtype(h, f, encode_packed_##lower, &attr); \
9676     } else {                                                         \
9677       upb_handlers_set##upbtype(h, f, encode_scalar_##lower, &attr); \
9678     }                                                                \
9679     break;
9680 
9681     switch (upb_fielddef_descriptortype(f)) {
9682       T(DOUBLE,   double,   double);
9683       T(FLOAT,    float,    float);
9684       T(INT64,    int64,    int64);
9685       T(INT32,    int32,    int32);
9686       T(FIXED64,  fixed64,  uint64);
9687       T(FIXED32,  fixed32,  uint32);
9688       T(BOOL,     bool,     bool);
9689       T(UINT32,   uint32,   uint32);
9690       T(UINT64,   uint64,   uint64);
9691       T(ENUM,     enum,     int32);
9692       T(SFIXED32, sfixed32, int32);
9693       T(SFIXED64, sfixed64, int64);
9694       T(SINT32,   sint32,   int32);
9695       T(SINT64,   sint64,   int64);
9696       case UPB_DESCRIPTOR_TYPE_STRING:
9697       case UPB_DESCRIPTOR_TYPE_BYTES:
9698         upb_handlers_setstartstr(h, f, encode_startstr, &attr);
9699         upb_handlers_setendstr(h, f, encode_enddelimfield, &attr);
9700         upb_handlers_setstring(h, f, encode_strbuf, &attr);
9701         break;
9702       case UPB_DESCRIPTOR_TYPE_MESSAGE:
9703         upb_handlers_setstartsubmsg(h, f, encode_startdelimfield, &attr);
9704         upb_handlers_setendsubmsg(h, f, encode_enddelimfield, &attr);
9705         break;
9706       case UPB_DESCRIPTOR_TYPE_GROUP: {
9707         /* Endgroup takes a different tag (wire_type = END_GROUP). */
9708         upb_handlerattr attr2;
9709         new_tag(h, f, UPB_WIRE_TYPE_END_GROUP, &attr2);
9710 
9711         upb_handlers_setstartsubmsg(h, f, encode_startgroup, &attr);
9712         upb_handlers_setendsubmsg(h, f, encode_endgroup, &attr2);
9713 
9714         upb_handlerattr_uninit(&attr2);
9715         break;
9716       }
9717     }
9718 
9719 #undef T
9720 
9721     upb_handlerattr_uninit(&attr);
9722   }
9723 }
9724 
upb_pb_encoder_reset(upb_pb_encoder * e)9725 void upb_pb_encoder_reset(upb_pb_encoder *e) {
9726   e->segptr = NULL;
9727   e->top = NULL;
9728   e->depth = 0;
9729 }
9730 
9731 
9732 /* public API *****************************************************************/
9733 
upb_pb_encoder_newhandlers(const upb_msgdef * m,const void * owner)9734 const upb_handlers *upb_pb_encoder_newhandlers(const upb_msgdef *m,
9735                                                const void *owner) {
9736   return upb_handlers_newfrozen(m, owner, newhandlers_callback, NULL);
9737 }
9738 
upb_pb_encoder_create(upb_env * env,const upb_handlers * h,upb_bytessink * output)9739 upb_pb_encoder *upb_pb_encoder_create(upb_env *env, const upb_handlers *h,
9740                                       upb_bytessink *output) {
9741   const size_t initial_bufsize = 256;
9742   const size_t initial_segbufsize = 16;
9743   /* TODO(haberman): make this configurable. */
9744   const size_t stack_size = 64;
9745 #ifndef NDEBUG
9746   const size_t size_before = upb_env_bytesallocated(env);
9747 #endif
9748 
9749   upb_pb_encoder *e = upb_env_malloc(env, sizeof(upb_pb_encoder));
9750   if (!e) return NULL;
9751 
9752   e->buf = upb_env_malloc(env, initial_bufsize);
9753   e->segbuf = upb_env_malloc(env, initial_segbufsize * sizeof(*e->segbuf));
9754   e->stack = upb_env_malloc(env, stack_size * sizeof(*e->stack));
9755 
9756   if (!e->buf || !e->segbuf || !e->stack) {
9757     return NULL;
9758   }
9759 
9760   e->limit = e->buf + initial_bufsize;
9761   e->seglimit = e->segbuf + initial_segbufsize;
9762   e->stacklimit = e->stack + stack_size;
9763 
9764   upb_pb_encoder_reset(e);
9765   upb_sink_reset(&e->input_, h, e);
9766 
9767   e->env = env;
9768   e->output_ = output;
9769   e->subc = output->closure;
9770   e->ptr = e->buf;
9771 
9772   /* If this fails, increase the value in encoder.h. */
9773   assert(upb_env_bytesallocated(env) - size_before <= UPB_PB_ENCODER_SIZE);
9774   return e;
9775 }
9776 
upb_pb_encoder_input(upb_pb_encoder * e)9777 upb_sink *upb_pb_encoder_input(upb_pb_encoder *e) { return &e->input_; }
9778 
9779 
9780 
upb_loaddescriptor(const char * buf,size_t n,const void * owner,upb_status * status)9781 upb_filedef **upb_loaddescriptor(const char *buf, size_t n, const void *owner,
9782                                  upb_status *status) {
9783   /* Create handlers. */
9784   const upb_pbdecodermethod *decoder_m;
9785   const upb_handlers *reader_h = upb_descreader_newhandlers(&reader_h);
9786   upb_env env;
9787   upb_pbdecodermethodopts opts;
9788   upb_pbdecoder *decoder;
9789   upb_descreader *reader;
9790   bool ok;
9791   size_t i;
9792   upb_filedef **ret = NULL;
9793 
9794   upb_pbdecodermethodopts_init(&opts, reader_h);
9795   decoder_m = upb_pbdecodermethod_new(&opts, &decoder_m);
9796 
9797   upb_env_init(&env);
9798   upb_env_reporterrorsto(&env, status);
9799 
9800   reader = upb_descreader_create(&env, reader_h);
9801   decoder = upb_pbdecoder_create(&env, decoder_m, upb_descreader_input(reader));
9802 
9803   /* Push input data. */
9804   ok = upb_bufsrc_putbuf(buf, n, upb_pbdecoder_input(decoder));
9805 
9806   if (!ok) {
9807     goto cleanup;
9808   }
9809 
9810   ret = upb_gmalloc(sizeof (*ret) * (upb_descreader_filecount(reader) + 1));
9811 
9812   if (!ret) {
9813     goto cleanup;
9814   }
9815 
9816   for (i = 0; i < upb_descreader_filecount(reader); i++) {
9817     ret[i] = upb_descreader_file(reader, i);
9818     upb_filedef_ref(ret[i], owner);
9819   }
9820 
9821   ret[i] = NULL;
9822 
9823 cleanup:
9824   upb_env_uninit(&env);
9825   upb_handlers_unref(reader_h, &reader_h);
9826   upb_pbdecodermethod_unref(decoder_m, &decoder_m);
9827   return ret;
9828 }
9829 /*
9830  * upb::pb::TextPrinter
9831  *
9832  * OPT: This is not optimized at all.  It uses printf() which parses the format
9833  * string every time, and it allocates memory for every put.
9834  */
9835 
9836 
9837 #include <ctype.h>
9838 #include <float.h>
9839 #include <inttypes.h>
9840 #include <stdarg.h>
9841 #include <stdio.h>
9842 #include <string.h>
9843 
9844 
9845 struct upb_textprinter {
9846   upb_sink input_;
9847   upb_bytessink *output_;
9848   int indent_depth_;
9849   bool single_line_;
9850   void *subc;
9851 };
9852 
9853 #define CHECK(x) if ((x) < 0) goto err;
9854 
shortname(const char * longname)9855 static const char *shortname(const char *longname) {
9856   const char *last = strrchr(longname, '.');
9857   return last ? last + 1 : longname;
9858 }
9859 
indent(upb_textprinter * p)9860 static int indent(upb_textprinter *p) {
9861   int i;
9862   if (!p->single_line_)
9863     for (i = 0; i < p->indent_depth_; i++)
9864       upb_bytessink_putbuf(p->output_, p->subc, "  ", 2, NULL);
9865   return 0;
9866 }
9867 
endfield(upb_textprinter * p)9868 static int endfield(upb_textprinter *p) {
9869   const char ch = (p->single_line_ ? ' ' : '\n');
9870   upb_bytessink_putbuf(p->output_, p->subc, &ch, 1, NULL);
9871   return 0;
9872 }
9873 
putescaped(upb_textprinter * p,const char * buf,size_t len,bool preserve_utf8)9874 static int putescaped(upb_textprinter *p, const char *buf, size_t len,
9875                       bool preserve_utf8) {
9876   /* Based on CEscapeInternal() from Google's protobuf release. */
9877   char dstbuf[4096], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf);
9878   const char *end = buf + len;
9879 
9880   /* I think hex is prettier and more useful, but proto2 uses octal; should
9881    * investigate whether it can parse hex also. */
9882   const bool use_hex = false;
9883   bool last_hex_escape = false; /* true if last output char was \xNN */
9884 
9885   for (; buf < end; buf++) {
9886     bool is_hex_escape;
9887 
9888     if (dstend - dst < 4) {
9889       upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL);
9890       dst = dstbuf;
9891     }
9892 
9893     is_hex_escape = false;
9894     switch (*buf) {
9895       case '\n': *(dst++) = '\\'; *(dst++) = 'n';  break;
9896       case '\r': *(dst++) = '\\'; *(dst++) = 'r';  break;
9897       case '\t': *(dst++) = '\\'; *(dst++) = 't';  break;
9898       case '\"': *(dst++) = '\\'; *(dst++) = '\"'; break;
9899       case '\'': *(dst++) = '\\'; *(dst++) = '\''; break;
9900       case '\\': *(dst++) = '\\'; *(dst++) = '\\'; break;
9901       default:
9902         /* Note that if we emit \xNN and the buf character after that is a hex
9903          * digit then that digit must be escaped too to prevent it being
9904          * interpreted as part of the character code by C. */
9905         if ((!preserve_utf8 || (uint8_t)*buf < 0x80) &&
9906             (!isprint(*buf) || (last_hex_escape && isxdigit(*buf)))) {
9907           sprintf(dst, (use_hex ? "\\x%02x" : "\\%03o"), (uint8_t)*buf);
9908           is_hex_escape = use_hex;
9909           dst += 4;
9910         } else {
9911           *(dst++) = *buf; break;
9912         }
9913     }
9914     last_hex_escape = is_hex_escape;
9915   }
9916   /* Flush remaining data. */
9917   upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL);
9918   return 0;
9919 }
9920 
putf(upb_textprinter * p,const char * fmt,...)9921 bool putf(upb_textprinter *p, const char *fmt, ...) {
9922   va_list args;
9923   va_list args_copy;
9924   char *str;
9925   int written;
9926   int len;
9927   bool ok;
9928 
9929   va_start(args, fmt);
9930 
9931   /* Run once to get the length of the string. */
9932   _upb_va_copy(args_copy, args);
9933   len = _upb_vsnprintf(NULL, 0, fmt, args_copy);
9934   va_end(args_copy);
9935 
9936   /* + 1 for NULL terminator (vsprintf() requires it even if we don't). */
9937   str = upb_gmalloc(len + 1);
9938   if (!str) return false;
9939   written = vsprintf(str, fmt, args);
9940   va_end(args);
9941   UPB_ASSERT_VAR(written, written == len);
9942 
9943   ok = upb_bytessink_putbuf(p->output_, p->subc, str, len, NULL);
9944   upb_gfree(str);
9945   return ok;
9946 }
9947 
9948 
9949 /* handlers *******************************************************************/
9950 
textprinter_startmsg(void * c,const void * hd)9951 static bool textprinter_startmsg(void *c, const void *hd) {
9952   upb_textprinter *p = c;
9953   UPB_UNUSED(hd);
9954   if (p->indent_depth_ == 0) {
9955     upb_bytessink_start(p->output_, 0, &p->subc);
9956   }
9957   return true;
9958 }
9959 
textprinter_endmsg(void * c,const void * hd,upb_status * s)9960 static bool textprinter_endmsg(void *c, const void *hd, upb_status *s) {
9961   upb_textprinter *p = c;
9962   UPB_UNUSED(hd);
9963   UPB_UNUSED(s);
9964   if (p->indent_depth_ == 0) {
9965     upb_bytessink_end(p->output_);
9966   }
9967   return true;
9968 }
9969 
9970 #define TYPE(name, ctype, fmt) \
9971   static bool textprinter_put ## name(void *closure, const void *handler_data, \
9972                                       ctype val) {                             \
9973     upb_textprinter *p = closure;                                              \
9974     const upb_fielddef *f = handler_data;                                      \
9975     CHECK(indent(p));                                                          \
9976     putf(p, "%s: " fmt, upb_fielddef_name(f), val);                            \
9977     CHECK(endfield(p));                                                        \
9978     return true;                                                               \
9979   err:                                                                         \
9980     return false;                                                              \
9981 }
9982 
textprinter_putbool(void * closure,const void * handler_data,bool val)9983 static bool textprinter_putbool(void *closure, const void *handler_data,
9984                                 bool val) {
9985   upb_textprinter *p = closure;
9986   const upb_fielddef *f = handler_data;
9987   CHECK(indent(p));
9988   putf(p, "%s: %s", upb_fielddef_name(f), val ? "true" : "false");
9989   CHECK(endfield(p));
9990   return true;
9991 err:
9992   return false;
9993 }
9994 
9995 #define STRINGIFY_HELPER(x) #x
9996 #define STRINGIFY_MACROVAL(x) STRINGIFY_HELPER(x)
9997 
9998 TYPE(int32,  int32_t,  "%" PRId32)
9999 TYPE(int64,  int64_t,  "%" PRId64)
10000 TYPE(uint32, uint32_t, "%" PRIu32)
10001 TYPE(uint64, uint64_t, "%" PRIu64)
STRINGIFY_MACROVAL(FLT_DIG)10002 TYPE(float,  float,    "%." STRINGIFY_MACROVAL(FLT_DIG) "g")
10003 TYPE(double, double,   "%." STRINGIFY_MACROVAL(DBL_DIG) "g")
10004 
10005 #undef TYPE
10006 
10007 /* Output a symbolic value from the enum if found, else just print as int32. */
10008 static bool textprinter_putenum(void *closure, const void *handler_data,
10009                                 int32_t val) {
10010   upb_textprinter *p = closure;
10011   const upb_fielddef *f = handler_data;
10012   const upb_enumdef *enum_def = upb_downcast_enumdef(upb_fielddef_subdef(f));
10013   const char *label = upb_enumdef_iton(enum_def, val);
10014   if (label) {
10015     indent(p);
10016     putf(p, "%s: %s", upb_fielddef_name(f), label);
10017     endfield(p);
10018   } else {
10019     if (!textprinter_putint32(closure, handler_data, val))
10020       return false;
10021   }
10022   return true;
10023 }
10024 
textprinter_startstr(void * closure,const void * handler_data,size_t size_hint)10025 static void *textprinter_startstr(void *closure, const void *handler_data,
10026                       size_t size_hint) {
10027   upb_textprinter *p = closure;
10028   const upb_fielddef *f = handler_data;
10029   UPB_UNUSED(size_hint);
10030   indent(p);
10031   putf(p, "%s: \"", upb_fielddef_name(f));
10032   return p;
10033 }
10034 
textprinter_endstr(void * closure,const void * handler_data)10035 static bool textprinter_endstr(void *closure, const void *handler_data) {
10036   upb_textprinter *p = closure;
10037   UPB_UNUSED(handler_data);
10038   putf(p, "\"");
10039   endfield(p);
10040   return true;
10041 }
10042 
textprinter_putstr(void * closure,const void * hd,const char * buf,size_t len,const upb_bufhandle * handle)10043 static size_t textprinter_putstr(void *closure, const void *hd, const char *buf,
10044                                  size_t len, const upb_bufhandle *handle) {
10045   upb_textprinter *p = closure;
10046   const upb_fielddef *f = hd;
10047   UPB_UNUSED(handle);
10048   CHECK(putescaped(p, buf, len, upb_fielddef_type(f) == UPB_TYPE_STRING));
10049   return len;
10050 err:
10051   return 0;
10052 }
10053 
textprinter_startsubmsg(void * closure,const void * handler_data)10054 static void *textprinter_startsubmsg(void *closure, const void *handler_data) {
10055   upb_textprinter *p = closure;
10056   const char *name = handler_data;
10057   CHECK(indent(p));
10058   putf(p, "%s {%c", name, p->single_line_ ? ' ' : '\n');
10059   p->indent_depth_++;
10060   return p;
10061 err:
10062   return UPB_BREAK;
10063 }
10064 
textprinter_endsubmsg(void * closure,const void * handler_data)10065 static bool textprinter_endsubmsg(void *closure, const void *handler_data) {
10066   upb_textprinter *p = closure;
10067   UPB_UNUSED(handler_data);
10068   p->indent_depth_--;
10069   CHECK(indent(p));
10070   upb_bytessink_putbuf(p->output_, p->subc, "}", 1, NULL);
10071   CHECK(endfield(p));
10072   return true;
10073 err:
10074   return false;
10075 }
10076 
onmreg(const void * c,upb_handlers * h)10077 static void onmreg(const void *c, upb_handlers *h) {
10078   const upb_msgdef *m = upb_handlers_msgdef(h);
10079   upb_msg_field_iter i;
10080   UPB_UNUSED(c);
10081 
10082   upb_handlers_setstartmsg(h, textprinter_startmsg, NULL);
10083   upb_handlers_setendmsg(h, textprinter_endmsg, NULL);
10084 
10085   for(upb_msg_field_begin(&i, m);
10086       !upb_msg_field_done(&i);
10087       upb_msg_field_next(&i)) {
10088     upb_fielddef *f = upb_msg_iter_field(&i);
10089     upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
10090     upb_handlerattr_sethandlerdata(&attr, f);
10091     switch (upb_fielddef_type(f)) {
10092       case UPB_TYPE_INT32:
10093         upb_handlers_setint32(h, f, textprinter_putint32, &attr);
10094         break;
10095       case UPB_TYPE_INT64:
10096         upb_handlers_setint64(h, f, textprinter_putint64, &attr);
10097         break;
10098       case UPB_TYPE_UINT32:
10099         upb_handlers_setuint32(h, f, textprinter_putuint32, &attr);
10100         break;
10101       case UPB_TYPE_UINT64:
10102         upb_handlers_setuint64(h, f, textprinter_putuint64, &attr);
10103         break;
10104       case UPB_TYPE_FLOAT:
10105         upb_handlers_setfloat(h, f, textprinter_putfloat, &attr);
10106         break;
10107       case UPB_TYPE_DOUBLE:
10108         upb_handlers_setdouble(h, f, textprinter_putdouble, &attr);
10109         break;
10110       case UPB_TYPE_BOOL:
10111         upb_handlers_setbool(h, f, textprinter_putbool, &attr);
10112         break;
10113       case UPB_TYPE_STRING:
10114       case UPB_TYPE_BYTES:
10115         upb_handlers_setstartstr(h, f, textprinter_startstr, &attr);
10116         upb_handlers_setstring(h, f, textprinter_putstr, &attr);
10117         upb_handlers_setendstr(h, f, textprinter_endstr, &attr);
10118         break;
10119       case UPB_TYPE_MESSAGE: {
10120         const char *name =
10121             upb_fielddef_istagdelim(f)
10122                 ? shortname(upb_msgdef_fullname(upb_fielddef_msgsubdef(f)))
10123                 : upb_fielddef_name(f);
10124         upb_handlerattr_sethandlerdata(&attr, name);
10125         upb_handlers_setstartsubmsg(h, f, textprinter_startsubmsg, &attr);
10126         upb_handlers_setendsubmsg(h, f, textprinter_endsubmsg, &attr);
10127         break;
10128       }
10129       case UPB_TYPE_ENUM:
10130         upb_handlers_setint32(h, f, textprinter_putenum, &attr);
10131         break;
10132     }
10133   }
10134 }
10135 
textprinter_reset(upb_textprinter * p,bool single_line)10136 static void textprinter_reset(upb_textprinter *p, bool single_line) {
10137   p->single_line_ = single_line;
10138   p->indent_depth_ = 0;
10139 }
10140 
10141 
10142 /* Public API *****************************************************************/
10143 
upb_textprinter_create(upb_env * env,const upb_handlers * h,upb_bytessink * output)10144 upb_textprinter *upb_textprinter_create(upb_env *env, const upb_handlers *h,
10145                                         upb_bytessink *output) {
10146   upb_textprinter *p = upb_env_malloc(env, sizeof(upb_textprinter));
10147   if (!p) return NULL;
10148 
10149   p->output_ = output;
10150   upb_sink_reset(&p->input_, h, p);
10151   textprinter_reset(p, false);
10152 
10153   return p;
10154 }
10155 
upb_textprinter_newhandlers(const upb_msgdef * m,const void * owner)10156 const upb_handlers *upb_textprinter_newhandlers(const upb_msgdef *m,
10157                                                 const void *owner) {
10158   return upb_handlers_newfrozen(m, owner, &onmreg, NULL);
10159 }
10160 
upb_textprinter_input(upb_textprinter * p)10161 upb_sink *upb_textprinter_input(upb_textprinter *p) { return &p->input_; }
10162 
upb_textprinter_setsingleline(upb_textprinter * p,bool single_line)10163 void upb_textprinter_setsingleline(upb_textprinter *p, bool single_line) {
10164   p->single_line_ = single_line;
10165 }
10166 
10167 
10168 /* Index is descriptor type. */
10169 const uint8_t upb_pb_native_wire_types[] = {
10170   UPB_WIRE_TYPE_END_GROUP,     /* ENDGROUP */
10171   UPB_WIRE_TYPE_64BIT,         /* DOUBLE */
10172   UPB_WIRE_TYPE_32BIT,         /* FLOAT */
10173   UPB_WIRE_TYPE_VARINT,        /* INT64 */
10174   UPB_WIRE_TYPE_VARINT,        /* UINT64 */
10175   UPB_WIRE_TYPE_VARINT,        /* INT32 */
10176   UPB_WIRE_TYPE_64BIT,         /* FIXED64 */
10177   UPB_WIRE_TYPE_32BIT,         /* FIXED32 */
10178   UPB_WIRE_TYPE_VARINT,        /* BOOL */
10179   UPB_WIRE_TYPE_DELIMITED,     /* STRING */
10180   UPB_WIRE_TYPE_START_GROUP,   /* GROUP */
10181   UPB_WIRE_TYPE_DELIMITED,     /* MESSAGE */
10182   UPB_WIRE_TYPE_DELIMITED,     /* BYTES */
10183   UPB_WIRE_TYPE_VARINT,        /* UINT32 */
10184   UPB_WIRE_TYPE_VARINT,        /* ENUM */
10185   UPB_WIRE_TYPE_32BIT,         /* SFIXED32 */
10186   UPB_WIRE_TYPE_64BIT,         /* SFIXED64 */
10187   UPB_WIRE_TYPE_VARINT,        /* SINT32 */
10188   UPB_WIRE_TYPE_VARINT,        /* SINT64 */
10189 };
10190 
10191 /* A basic branch-based decoder, uses 32-bit values to get good performance
10192  * on 32-bit architectures (but performs well on 64-bits also).
10193  * This scheme comes from the original Google Protobuf implementation
10194  * (proto2). */
upb_vdecode_max8_branch32(upb_decoderet r)10195 upb_decoderet upb_vdecode_max8_branch32(upb_decoderet r) {
10196   upb_decoderet err = {NULL, 0};
10197   const char *p = r.p;
10198   uint32_t low = (uint32_t)r.val;
10199   uint32_t high = 0;
10200   uint32_t b;
10201   b = *(p++); low  |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done;
10202   b = *(p++); low  |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done;
10203   b = *(p++); low  |= (b & 0x7fU) << 28;
10204               high  = (b & 0x7fU) >>  4; if (!(b & 0x80)) goto done;
10205   b = *(p++); high |= (b & 0x7fU) <<  3; if (!(b & 0x80)) goto done;
10206   b = *(p++); high |= (b & 0x7fU) << 10; if (!(b & 0x80)) goto done;
10207   b = *(p++); high |= (b & 0x7fU) << 17; if (!(b & 0x80)) goto done;
10208   b = *(p++); high |= (b & 0x7fU) << 24; if (!(b & 0x80)) goto done;
10209   b = *(p++); high |= (b & 0x7fU) << 31; if (!(b & 0x80)) goto done;
10210   return err;
10211 
10212 done:
10213   r.val = ((uint64_t)high << 32) | low;
10214   r.p = p;
10215   return r;
10216 }
10217 
10218 /* Like the previous, but uses 64-bit values. */
upb_vdecode_max8_branch64(upb_decoderet r)10219 upb_decoderet upb_vdecode_max8_branch64(upb_decoderet r) {
10220   const char *p = r.p;
10221   uint64_t val = r.val;
10222   uint64_t b;
10223   upb_decoderet err = {NULL, 0};
10224   b = *(p++); val |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done;
10225   b = *(p++); val |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done;
10226   b = *(p++); val |= (b & 0x7fU) << 28; if (!(b & 0x80)) goto done;
10227   b = *(p++); val |= (b & 0x7fU) << 35; if (!(b & 0x80)) goto done;
10228   b = *(p++); val |= (b & 0x7fU) << 42; if (!(b & 0x80)) goto done;
10229   b = *(p++); val |= (b & 0x7fU) << 49; if (!(b & 0x80)) goto done;
10230   b = *(p++); val |= (b & 0x7fU) << 56; if (!(b & 0x80)) goto done;
10231   b = *(p++); val |= (b & 0x7fU) << 63; if (!(b & 0x80)) goto done;
10232   return err;
10233 
10234 done:
10235   r.val = val;
10236   r.p = p;
10237   return r;
10238 }
10239 
10240 /* Given an encoded varint v, returns an integer with a single bit set that
10241  * indicates the end of the varint.  Subtracting one from this value will
10242  * yield a mask that leaves only bits that are part of the varint.  Returns
10243  * 0 if the varint is unterminated. */
upb_get_vstopbit(uint64_t v)10244 static uint64_t upb_get_vstopbit(uint64_t v) {
10245   uint64_t cbits = v | 0x7f7f7f7f7f7f7f7fULL;
10246   return ~cbits & (cbits+1);
10247 }
10248 
10249 /* A branchless decoder.  Credit to Pascal Massimino for the bit-twiddling. */
upb_vdecode_max8_massimino(upb_decoderet r)10250 upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r) {
10251   uint64_t b;
10252   uint64_t stop_bit;
10253   upb_decoderet my_r;
10254   memcpy(&b, r.p, sizeof(b));
10255   stop_bit = upb_get_vstopbit(b);
10256   b =  (b & 0x7f7f7f7f7f7f7f7fULL) & (stop_bit - 1);
10257   b +=       b & 0x007f007f007f007fULL;
10258   b +=  3 * (b & 0x0000ffff0000ffffULL);
10259   b += 15 * (b & 0x00000000ffffffffULL);
10260   if (stop_bit == 0) {
10261     /* Error: unterminated varint. */
10262     upb_decoderet err_r = {(void*)0, 0};
10263     return err_r;
10264   }
10265   my_r = upb_decoderet_make(r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
10266                             r.val | (b << 7));
10267   return my_r;
10268 }
10269 
10270 /* A branchless decoder.  Credit to Daniel Wright for the bit-twiddling. */
upb_vdecode_max8_wright(upb_decoderet r)10271 upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) {
10272   uint64_t b;
10273   uint64_t stop_bit;
10274   upb_decoderet my_r;
10275   memcpy(&b, r.p, sizeof(b));
10276   stop_bit = upb_get_vstopbit(b);
10277   b &= (stop_bit - 1);
10278   b = ((b & 0x7f007f007f007f00ULL) >> 1) | (b & 0x007f007f007f007fULL);
10279   b = ((b & 0xffff0000ffff0000ULL) >> 2) | (b & 0x0000ffff0000ffffULL);
10280   b = ((b & 0xffffffff00000000ULL) >> 4) | (b & 0x00000000ffffffffULL);
10281   if (stop_bit == 0) {
10282     /* Error: unterminated varint. */
10283     upb_decoderet err_r = {(void*)0, 0};
10284     return err_r;
10285   }
10286   my_r = upb_decoderet_make(r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
10287                             r.val | (b << 14));
10288   return my_r;
10289 }
10290 
10291 #line 1 "upb/json/parser.rl"
10292 /*
10293 ** upb::json::Parser (upb_json_parser)
10294 **
10295 ** A parser that uses the Ragel State Machine Compiler to generate
10296 ** the finite automata.
10297 **
10298 ** Ragel only natively handles regular languages, but we can manually
10299 ** program it a bit to handle context-free languages like JSON, by using
10300 ** the "fcall" and "fret" constructs.
10301 **
10302 ** This parser can handle the basics, but needs several things to be fleshed
10303 ** out:
10304 **
10305 ** - handling of unicode escape sequences (including high surrogate pairs).
10306 ** - properly check and report errors for unknown fields, stack overflow,
10307 **   improper array nesting (or lack of nesting).
10308 ** - handling of base64 sequences with padding characters.
10309 ** - handling of push-back (non-success returns from sink functions).
10310 ** - handling of keys/escape-sequences/etc that span input buffers.
10311 */
10312 
10313 #include <assert.h>
10314 #include <errno.h>
10315 #include <stdint.h>
10316 #include <stdlib.h>
10317 #include <string.h>
10318 
10319 
10320 #define UPB_JSON_MAX_DEPTH 64
10321 
10322 typedef struct {
10323   upb_sink sink;
10324 
10325   /* The current message in which we're parsing, and the field whose value we're
10326    * expecting next. */
10327   const upb_msgdef *m;
10328   const upb_fielddef *f;
10329 
10330   /* The table mapping json name to fielddef for this message. */
10331   upb_strtable *name_table;
10332 
10333   /* We are in a repeated-field context, ready to emit mapentries as
10334    * submessages. This flag alters the start-of-object (open-brace) behavior to
10335    * begin a sequence of mapentry messages rather than a single submessage. */
10336   bool is_map;
10337 
10338   /* We are in a map-entry message context. This flag is set when parsing the
10339    * value field of a single map entry and indicates to all value-field parsers
10340    * (subobjects, strings, numbers, and bools) that the map-entry submessage
10341    * should end as soon as the value is parsed. */
10342   bool is_mapentry;
10343 
10344   /* If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent
10345    * message's map field that we're currently parsing. This differs from |f|
10346    * because |f| is the field in the *current* message (i.e., the map-entry
10347    * message itself), not the parent's field that leads to this map. */
10348   const upb_fielddef *mapfield;
10349 } upb_jsonparser_frame;
10350 
10351 struct upb_json_parser {
10352   upb_env *env;
10353   const upb_json_parsermethod *method;
10354   upb_bytessink input_;
10355 
10356   /* Stack to track the JSON scopes we are in. */
10357   upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH];
10358   upb_jsonparser_frame *top;
10359   upb_jsonparser_frame *limit;
10360 
10361   upb_status status;
10362 
10363   /* Ragel's internal parsing stack for the parsing state machine. */
10364   int current_state;
10365   int parser_stack[UPB_JSON_MAX_DEPTH];
10366   int parser_top;
10367 
10368   /* The handle for the current buffer. */
10369   const upb_bufhandle *handle;
10370 
10371   /* Accumulate buffer.  See details in parser.rl. */
10372   const char *accumulated;
10373   size_t accumulated_len;
10374   char *accumulate_buf;
10375   size_t accumulate_buf_size;
10376 
10377   /* Multi-part text data.  See details in parser.rl. */
10378   int multipart_state;
10379   upb_selector_t string_selector;
10380 
10381   /* Input capture.  See details in parser.rl. */
10382   const char *capture;
10383 
10384   /* Intermediate result of parsing a unicode escape sequence. */
10385   uint32_t digit;
10386 };
10387 
10388 struct upb_json_parsermethod {
10389   upb_refcounted base;
10390 
10391   upb_byteshandler input_handler_;
10392 
10393   /* Mainly for the purposes of refcounting, so all the fielddefs we point
10394    * to stay alive. */
10395   const upb_msgdef *msg;
10396 
10397   /* Keys are upb_msgdef*, values are upb_strtable (json_name -> fielddef) */
10398   upb_inttable name_tables;
10399 };
10400 
10401 #define PARSER_CHECK_RETURN(x) if (!(x)) return false
10402 
10403 /* Used to signal that a capture has been suspended. */
10404 static char suspend_capture;
10405 
getsel_for_handlertype(upb_json_parser * p,upb_handlertype_t type)10406 static upb_selector_t getsel_for_handlertype(upb_json_parser *p,
10407                                              upb_handlertype_t type) {
10408   upb_selector_t sel;
10409   bool ok = upb_handlers_getselector(p->top->f, type, &sel);
10410   UPB_ASSERT_VAR(ok, ok);
10411   return sel;
10412 }
10413 
parser_getsel(upb_json_parser * p)10414 static upb_selector_t parser_getsel(upb_json_parser *p) {
10415   return getsel_for_handlertype(
10416       p, upb_handlers_getprimitivehandlertype(p->top->f));
10417 }
10418 
check_stack(upb_json_parser * p)10419 static bool check_stack(upb_json_parser *p) {
10420   if ((p->top + 1) == p->limit) {
10421     upb_status_seterrmsg(&p->status, "Nesting too deep");
10422     upb_env_reporterror(p->env, &p->status);
10423     return false;
10424   }
10425 
10426   return true;
10427 }
10428 
set_name_table(upb_json_parser * p,upb_jsonparser_frame * frame)10429 static void set_name_table(upb_json_parser *p, upb_jsonparser_frame *frame) {
10430   upb_value v;
10431   bool ok = upb_inttable_lookupptr(&p->method->name_tables, frame->m, &v);
10432   UPB_ASSERT_VAR(ok, ok);
10433   frame->name_table = upb_value_getptr(v);
10434 }
10435 
10436 /* There are GCC/Clang built-ins for overflow checking which we could start
10437  * using if there was any performance benefit to it. */
10438 
checked_add(size_t a,size_t b,size_t * c)10439 static bool checked_add(size_t a, size_t b, size_t *c) {
10440   if (SIZE_MAX - a < b) return false;
10441   *c = a + b;
10442   return true;
10443 }
10444 
saturating_multiply(size_t a,size_t b)10445 static size_t saturating_multiply(size_t a, size_t b) {
10446   /* size_t is unsigned, so this is defined behavior even on overflow. */
10447   size_t ret = a * b;
10448   if (b != 0 && ret / b != a) {
10449     ret = SIZE_MAX;
10450   }
10451   return ret;
10452 }
10453 
10454 
10455 /* Base64 decoding ************************************************************/
10456 
10457 /* TODO(haberman): make this streaming. */
10458 
10459 static const signed char b64table[] = {
10460   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
10461   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
10462   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
10463   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
10464   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
10465   -1,      -1,      -1,      62/*+*/, -1,      -1,      -1,      63/*/ */,
10466   52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/,
10467   60/*8*/, 61/*9*/, -1,      -1,      -1,      -1,      -1,      -1,
10468   -1,       0/*A*/,  1/*B*/,  2/*C*/,  3/*D*/,  4/*E*/,  5/*F*/,  6/*G*/,
10469   07/*H*/,  8/*I*/,  9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/,
10470   15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/,
10471   23/*X*/, 24/*Y*/, 25/*Z*/, -1,      -1,      -1,      -1,      -1,
10472   -1,      26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/,
10473   33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/,
10474   41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/,
10475   49/*x*/, 50/*y*/, 51/*z*/, -1,      -1,      -1,      -1,      -1,
10476   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
10477   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
10478   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
10479   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
10480   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
10481   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
10482   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
10483   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
10484   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
10485   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
10486   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
10487   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
10488   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
10489   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
10490   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
10491   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1
10492 };
10493 
10494 /* Returns the table value sign-extended to 32 bits.  Knowing that the upper
10495  * bits will be 1 for unrecognized characters makes it easier to check for
10496  * this error condition later (see below). */
b64lookup(unsigned char ch)10497 int32_t b64lookup(unsigned char ch) { return b64table[ch]; }
10498 
10499 /* Returns true if the given character is not a valid base64 character or
10500  * padding. */
nonbase64(unsigned char ch)10501 bool nonbase64(unsigned char ch) { return b64lookup(ch) == -1 && ch != '='; }
10502 
base64_push(upb_json_parser * p,upb_selector_t sel,const char * ptr,size_t len)10503 static bool base64_push(upb_json_parser *p, upb_selector_t sel, const char *ptr,
10504                         size_t len) {
10505   const char *limit = ptr + len;
10506   for (; ptr < limit; ptr += 4) {
10507     uint32_t val;
10508     char output[3];
10509 
10510     if (limit - ptr < 4) {
10511       upb_status_seterrf(&p->status,
10512                          "Base64 input for bytes field not a multiple of 4: %s",
10513                          upb_fielddef_name(p->top->f));
10514       upb_env_reporterror(p->env, &p->status);
10515       return false;
10516     }
10517 
10518     val = b64lookup(ptr[0]) << 18 |
10519           b64lookup(ptr[1]) << 12 |
10520           b64lookup(ptr[2]) << 6  |
10521           b64lookup(ptr[3]);
10522 
10523     /* Test the upper bit; returns true if any of the characters returned -1. */
10524     if (val & 0x80000000) {
10525       goto otherchar;
10526     }
10527 
10528     output[0] = val >> 16;
10529     output[1] = (val >> 8) & 0xff;
10530     output[2] = val & 0xff;
10531     upb_sink_putstring(&p->top->sink, sel, output, 3, NULL);
10532   }
10533   return true;
10534 
10535 otherchar:
10536   if (nonbase64(ptr[0]) || nonbase64(ptr[1]) || nonbase64(ptr[2]) ||
10537       nonbase64(ptr[3]) ) {
10538     upb_status_seterrf(&p->status,
10539                        "Non-base64 characters in bytes field: %s",
10540                        upb_fielddef_name(p->top->f));
10541     upb_env_reporterror(p->env, &p->status);
10542     return false;
10543   } if (ptr[2] == '=') {
10544     uint32_t val;
10545     char output;
10546 
10547     /* Last group contains only two input bytes, one output byte. */
10548     if (ptr[0] == '=' || ptr[1] == '=' || ptr[3] != '=') {
10549       goto badpadding;
10550     }
10551 
10552     val = b64lookup(ptr[0]) << 18 |
10553           b64lookup(ptr[1]) << 12;
10554 
10555     assert(!(val & 0x80000000));
10556     output = val >> 16;
10557     upb_sink_putstring(&p->top->sink, sel, &output, 1, NULL);
10558     return true;
10559   } else {
10560     uint32_t val;
10561     char output[2];
10562 
10563     /* Last group contains only three input bytes, two output bytes. */
10564     if (ptr[0] == '=' || ptr[1] == '=' || ptr[2] == '=') {
10565       goto badpadding;
10566     }
10567 
10568     val = b64lookup(ptr[0]) << 18 |
10569           b64lookup(ptr[1]) << 12 |
10570           b64lookup(ptr[2]) << 6;
10571 
10572     output[0] = val >> 16;
10573     output[1] = (val >> 8) & 0xff;
10574     upb_sink_putstring(&p->top->sink, sel, output, 2, NULL);
10575     return true;
10576   }
10577 
10578 badpadding:
10579   upb_status_seterrf(&p->status,
10580                      "Incorrect base64 padding for field: %s (%.*s)",
10581                      upb_fielddef_name(p->top->f),
10582                      4, ptr);
10583   upb_env_reporterror(p->env, &p->status);
10584   return false;
10585 }
10586 
10587 
10588 /* Accumulate buffer **********************************************************/
10589 
10590 /* Functionality for accumulating a buffer.
10591  *
10592  * Some parts of the parser need an entire value as a contiguous string.  For
10593  * example, to look up a member name in a hash table, or to turn a string into
10594  * a number, the relevant library routines need the input string to be in
10595  * contiguous memory, even if the value spanned two or more buffers in the
10596  * input.  These routines handle that.
10597  *
10598  * In the common case we can just point to the input buffer to get this
10599  * contiguous string and avoid any actual copy.  So we optimistically begin
10600  * this way.  But there are a few cases where we must instead copy into a
10601  * separate buffer:
10602  *
10603  *   1. The string was not contiguous in the input (it spanned buffers).
10604  *
10605  *   2. The string included escape sequences that need to be interpreted to get
10606  *      the true value in a contiguous buffer. */
10607 
assert_accumulate_empty(upb_json_parser * p)10608 static void assert_accumulate_empty(upb_json_parser *p) {
10609   UPB_UNUSED(p);
10610   assert(p->accumulated == NULL);
10611   assert(p->accumulated_len == 0);
10612 }
10613 
accumulate_clear(upb_json_parser * p)10614 static void accumulate_clear(upb_json_parser *p) {
10615   p->accumulated = NULL;
10616   p->accumulated_len = 0;
10617 }
10618 
10619 /* Used internally by accumulate_append(). */
accumulate_realloc(upb_json_parser * p,size_t need)10620 static bool accumulate_realloc(upb_json_parser *p, size_t need) {
10621   void *mem;
10622   size_t old_size = p->accumulate_buf_size;
10623   size_t new_size = UPB_MAX(old_size, 128);
10624   while (new_size < need) {
10625     new_size = saturating_multiply(new_size, 2);
10626   }
10627 
10628   mem = upb_env_realloc(p->env, p->accumulate_buf, old_size, new_size);
10629   if (!mem) {
10630     upb_status_seterrmsg(&p->status, "Out of memory allocating buffer.");
10631     upb_env_reporterror(p->env, &p->status);
10632     return false;
10633   }
10634 
10635   p->accumulate_buf = mem;
10636   p->accumulate_buf_size = new_size;
10637   return true;
10638 }
10639 
10640 /* Logically appends the given data to the append buffer.
10641  * If "can_alias" is true, we will try to avoid actually copying, but the buffer
10642  * must be valid until the next accumulate_append() call (if any). */
accumulate_append(upb_json_parser * p,const char * buf,size_t len,bool can_alias)10643 static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len,
10644                               bool can_alias) {
10645   size_t need;
10646 
10647   if (!p->accumulated && can_alias) {
10648     p->accumulated = buf;
10649     p->accumulated_len = len;
10650     return true;
10651   }
10652 
10653   if (!checked_add(p->accumulated_len, len, &need)) {
10654     upb_status_seterrmsg(&p->status, "Integer overflow.");
10655     upb_env_reporterror(p->env, &p->status);
10656     return false;
10657   }
10658 
10659   if (need > p->accumulate_buf_size && !accumulate_realloc(p, need)) {
10660     return false;
10661   }
10662 
10663   if (p->accumulated != p->accumulate_buf) {
10664     memcpy(p->accumulate_buf, p->accumulated, p->accumulated_len);
10665     p->accumulated = p->accumulate_buf;
10666   }
10667 
10668   memcpy(p->accumulate_buf + p->accumulated_len, buf, len);
10669   p->accumulated_len += len;
10670   return true;
10671 }
10672 
10673 /* Returns a pointer to the data accumulated since the last accumulate_clear()
10674  * call, and writes the length to *len.  This with point either to the input
10675  * buffer or a temporary accumulate buffer. */
accumulate_getptr(upb_json_parser * p,size_t * len)10676 static const char *accumulate_getptr(upb_json_parser *p, size_t *len) {
10677   assert(p->accumulated);
10678   *len = p->accumulated_len;
10679   return p->accumulated;
10680 }
10681 
10682 
10683 /* Mult-part text data ********************************************************/
10684 
10685 /* When we have text data in the input, it can often come in multiple segments.
10686  * For example, there may be some raw string data followed by an escape
10687  * sequence.  The two segments are processed with different logic.  Also buffer
10688  * seams in the input can cause multiple segments.
10689  *
10690  * As we see segments, there are two main cases for how we want to process them:
10691  *
10692  *  1. we want to push the captured input directly to string handlers.
10693  *
10694  *  2. we need to accumulate all the parts into a contiguous buffer for further
10695  *     processing (field name lookup, string->number conversion, etc). */
10696 
10697 /* This is the set of states for p->multipart_state. */
10698 enum {
10699   /* We are not currently processing multipart data. */
10700   MULTIPART_INACTIVE = 0,
10701 
10702   /* We are processing multipart data by accumulating it into a contiguous
10703    * buffer. */
10704   MULTIPART_ACCUMULATE = 1,
10705 
10706   /* We are processing multipart data by pushing each part directly to the
10707    * current string handlers. */
10708   MULTIPART_PUSHEAGERLY = 2
10709 };
10710 
10711 /* Start a multi-part text value where we accumulate the data for processing at
10712  * the end. */
multipart_startaccum(upb_json_parser * p)10713 static void multipart_startaccum(upb_json_parser *p) {
10714   assert_accumulate_empty(p);
10715   assert(p->multipart_state == MULTIPART_INACTIVE);
10716   p->multipart_state = MULTIPART_ACCUMULATE;
10717 }
10718 
10719 /* Start a multi-part text value where we immediately push text data to a string
10720  * value with the given selector. */
multipart_start(upb_json_parser * p,upb_selector_t sel)10721 static void multipart_start(upb_json_parser *p, upb_selector_t sel) {
10722   assert_accumulate_empty(p);
10723   assert(p->multipart_state == MULTIPART_INACTIVE);
10724   p->multipart_state = MULTIPART_PUSHEAGERLY;
10725   p->string_selector = sel;
10726 }
10727 
multipart_text(upb_json_parser * p,const char * buf,size_t len,bool can_alias)10728 static bool multipart_text(upb_json_parser *p, const char *buf, size_t len,
10729                            bool can_alias) {
10730   switch (p->multipart_state) {
10731     case MULTIPART_INACTIVE:
10732       upb_status_seterrmsg(
10733           &p->status, "Internal error: unexpected state MULTIPART_INACTIVE");
10734       upb_env_reporterror(p->env, &p->status);
10735       return false;
10736 
10737     case MULTIPART_ACCUMULATE:
10738       if (!accumulate_append(p, buf, len, can_alias)) {
10739         return false;
10740       }
10741       break;
10742 
10743     case MULTIPART_PUSHEAGERLY: {
10744       const upb_bufhandle *handle = can_alias ? p->handle : NULL;
10745       upb_sink_putstring(&p->top->sink, p->string_selector, buf, len, handle);
10746       break;
10747     }
10748   }
10749 
10750   return true;
10751 }
10752 
10753 /* Note: this invalidates the accumulate buffer!  Call only after reading its
10754  * contents. */
multipart_end(upb_json_parser * p)10755 static void multipart_end(upb_json_parser *p) {
10756   assert(p->multipart_state != MULTIPART_INACTIVE);
10757   p->multipart_state = MULTIPART_INACTIVE;
10758   accumulate_clear(p);
10759 }
10760 
10761 
10762 /* Input capture **************************************************************/
10763 
10764 /* Functionality for capturing a region of the input as text.  Gracefully
10765  * handles the case where a buffer seam occurs in the middle of the captured
10766  * region. */
10767 
capture_begin(upb_json_parser * p,const char * ptr)10768 static void capture_begin(upb_json_parser *p, const char *ptr) {
10769   assert(p->multipart_state != MULTIPART_INACTIVE);
10770   assert(p->capture == NULL);
10771   p->capture = ptr;
10772 }
10773 
capture_end(upb_json_parser * p,const char * ptr)10774 static bool capture_end(upb_json_parser *p, const char *ptr) {
10775   assert(p->capture);
10776   if (multipart_text(p, p->capture, ptr - p->capture, true)) {
10777     p->capture = NULL;
10778     return true;
10779   } else {
10780     return false;
10781   }
10782 }
10783 
10784 /* This is called at the end of each input buffer (ie. when we have hit a
10785  * buffer seam).  If we are in the middle of capturing the input, this
10786  * processes the unprocessed capture region. */
capture_suspend(upb_json_parser * p,const char ** ptr)10787 static void capture_suspend(upb_json_parser *p, const char **ptr) {
10788   if (!p->capture) return;
10789 
10790   if (multipart_text(p, p->capture, *ptr - p->capture, false)) {
10791     /* We use this as a signal that we were in the middle of capturing, and
10792      * that capturing should resume at the beginning of the next buffer.
10793      *
10794      * We can't use *ptr here, because we have no guarantee that this pointer
10795      * will be valid when we resume (if the underlying memory is freed, then
10796      * using the pointer at all, even to compare to NULL, is likely undefined
10797      * behavior). */
10798     p->capture = &suspend_capture;
10799   } else {
10800     /* Need to back up the pointer to the beginning of the capture, since
10801      * we were not able to actually preserve it. */
10802     *ptr = p->capture;
10803   }
10804 }
10805 
capture_resume(upb_json_parser * p,const char * ptr)10806 static void capture_resume(upb_json_parser *p, const char *ptr) {
10807   if (p->capture) {
10808     assert(p->capture == &suspend_capture);
10809     p->capture = ptr;
10810   }
10811 }
10812 
10813 
10814 /* Callbacks from the parser **************************************************/
10815 
10816 /* These are the functions called directly from the parser itself.
10817  * We define these in the same order as their declarations in the parser. */
10818 
escape_char(char in)10819 static char escape_char(char in) {
10820   switch (in) {
10821     case 'r': return '\r';
10822     case 't': return '\t';
10823     case 'n': return '\n';
10824     case 'f': return '\f';
10825     case 'b': return '\b';
10826     case '/': return '/';
10827     case '"': return '"';
10828     case '\\': return '\\';
10829     default:
10830       assert(0);
10831       return 'x';
10832   }
10833 }
10834 
escape(upb_json_parser * p,const char * ptr)10835 static bool escape(upb_json_parser *p, const char *ptr) {
10836   char ch = escape_char(*ptr);
10837   return multipart_text(p, &ch, 1, false);
10838 }
10839 
start_hex(upb_json_parser * p)10840 static void start_hex(upb_json_parser *p) {
10841   p->digit = 0;
10842 }
10843 
hexdigit(upb_json_parser * p,const char * ptr)10844 static void hexdigit(upb_json_parser *p, const char *ptr) {
10845   char ch = *ptr;
10846 
10847   p->digit <<= 4;
10848 
10849   if (ch >= '0' && ch <= '9') {
10850     p->digit += (ch - '0');
10851   } else if (ch >= 'a' && ch <= 'f') {
10852     p->digit += ((ch - 'a') + 10);
10853   } else {
10854     assert(ch >= 'A' && ch <= 'F');
10855     p->digit += ((ch - 'A') + 10);
10856   }
10857 }
10858 
end_hex(upb_json_parser * p)10859 static bool end_hex(upb_json_parser *p) {
10860   uint32_t codepoint = p->digit;
10861 
10862   /* emit the codepoint as UTF-8. */
10863   char utf8[3]; /* support \u0000 -- \uFFFF -- need only three bytes. */
10864   int length = 0;
10865   if (codepoint <= 0x7F) {
10866     utf8[0] = codepoint;
10867     length = 1;
10868   } else if (codepoint <= 0x07FF) {
10869     utf8[1] = (codepoint & 0x3F) | 0x80;
10870     codepoint >>= 6;
10871     utf8[0] = (codepoint & 0x1F) | 0xC0;
10872     length = 2;
10873   } else /* codepoint <= 0xFFFF */ {
10874     utf8[2] = (codepoint & 0x3F) | 0x80;
10875     codepoint >>= 6;
10876     utf8[1] = (codepoint & 0x3F) | 0x80;
10877     codepoint >>= 6;
10878     utf8[0] = (codepoint & 0x0F) | 0xE0;
10879     length = 3;
10880   }
10881   /* TODO(haberman): Handle high surrogates: if codepoint is a high surrogate
10882    * we have to wait for the next escape to get the full code point). */
10883 
10884   return multipart_text(p, utf8, length, false);
10885 }
10886 
start_text(upb_json_parser * p,const char * ptr)10887 static void start_text(upb_json_parser *p, const char *ptr) {
10888   capture_begin(p, ptr);
10889 }
10890 
end_text(upb_json_parser * p,const char * ptr)10891 static bool end_text(upb_json_parser *p, const char *ptr) {
10892   return capture_end(p, ptr);
10893 }
10894 
start_number(upb_json_parser * p,const char * ptr)10895 static void start_number(upb_json_parser *p, const char *ptr) {
10896   multipart_startaccum(p);
10897   capture_begin(p, ptr);
10898 }
10899 
10900 static bool parse_number(upb_json_parser *p);
10901 
end_number(upb_json_parser * p,const char * ptr)10902 static bool end_number(upb_json_parser *p, const char *ptr) {
10903   if (!capture_end(p, ptr)) {
10904     return false;
10905   }
10906 
10907   return parse_number(p);
10908 }
10909 
parse_number(upb_json_parser * p)10910 static bool parse_number(upb_json_parser *p) {
10911   size_t len;
10912   const char *buf;
10913   const char *myend;
10914   char *end;
10915 
10916   /* strtol() and friends unfortunately do not support specifying the length of
10917    * the input string, so we need to force a copy into a NULL-terminated buffer. */
10918   if (!multipart_text(p, "\0", 1, false)) {
10919     return false;
10920   }
10921 
10922   buf = accumulate_getptr(p, &len);
10923   myend = buf + len - 1;  /* One for NULL. */
10924 
10925   /* XXX: We are using strtol to parse integers, but this is wrong as even
10926    * integers can be represented as 1e6 (for example), which strtol can't
10927    * handle correctly.
10928    *
10929    * XXX: Also, we can't handle large integers properly because strto[u]ll
10930    * isn't in C89.
10931    *
10932    * XXX: Also, we don't properly check floats for overflow, since strtof
10933    * isn't in C89. */
10934   switch (upb_fielddef_type(p->top->f)) {
10935     case UPB_TYPE_ENUM:
10936     case UPB_TYPE_INT32: {
10937       long val = strtol(p->accumulated, &end, 0);
10938       if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || end != myend)
10939         goto err;
10940       else
10941         upb_sink_putint32(&p->top->sink, parser_getsel(p), val);
10942       break;
10943     }
10944     case UPB_TYPE_INT64: {
10945       long long val = strtol(p->accumulated, &end, 0);
10946       if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || end != myend)
10947         goto err;
10948       else
10949         upb_sink_putint64(&p->top->sink, parser_getsel(p), val);
10950       break;
10951     }
10952     case UPB_TYPE_UINT32: {
10953       unsigned long val = strtoul(p->accumulated, &end, 0);
10954       if (val > UINT32_MAX || errno == ERANGE || end != myend)
10955         goto err;
10956       else
10957         upb_sink_putuint32(&p->top->sink, parser_getsel(p), val);
10958       break;
10959     }
10960     case UPB_TYPE_UINT64: {
10961       unsigned long long val = strtoul(p->accumulated, &end, 0);
10962       if (val > UINT64_MAX || errno == ERANGE || end != myend)
10963         goto err;
10964       else
10965         upb_sink_putuint64(&p->top->sink, parser_getsel(p), val);
10966       break;
10967     }
10968     case UPB_TYPE_DOUBLE: {
10969       double val = strtod(p->accumulated, &end);
10970       if (errno == ERANGE || end != myend)
10971         goto err;
10972       else
10973         upb_sink_putdouble(&p->top->sink, parser_getsel(p), val);
10974       break;
10975     }
10976     case UPB_TYPE_FLOAT: {
10977       float val = strtod(p->accumulated, &end);
10978       if (errno == ERANGE || end != myend)
10979         goto err;
10980       else
10981         upb_sink_putfloat(&p->top->sink, parser_getsel(p), val);
10982       break;
10983     }
10984     default:
10985       assert(false);
10986   }
10987 
10988   multipart_end(p);
10989 
10990   return true;
10991 
10992 err:
10993   upb_status_seterrf(&p->status, "error parsing number: %s", buf);
10994   upb_env_reporterror(p->env, &p->status);
10995   multipart_end(p);
10996   return false;
10997 }
10998 
parser_putbool(upb_json_parser * p,bool val)10999 static bool parser_putbool(upb_json_parser *p, bool val) {
11000   bool ok;
11001 
11002   if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) {
11003     upb_status_seterrf(&p->status,
11004                        "Boolean value specified for non-bool field: %s",
11005                        upb_fielddef_name(p->top->f));
11006     upb_env_reporterror(p->env, &p->status);
11007     return false;
11008   }
11009 
11010   ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val);
11011   UPB_ASSERT_VAR(ok, ok);
11012 
11013   return true;
11014 }
11015 
start_stringval(upb_json_parser * p)11016 static bool start_stringval(upb_json_parser *p) {
11017   assert(p->top->f);
11018 
11019   if (upb_fielddef_isstring(p->top->f)) {
11020     upb_jsonparser_frame *inner;
11021     upb_selector_t sel;
11022 
11023     if (!check_stack(p)) return false;
11024 
11025     /* Start a new parser frame: parser frames correspond one-to-one with
11026      * handler frames, and string events occur in a sub-frame. */
11027     inner = p->top + 1;
11028     sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
11029     upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
11030     inner->m = p->top->m;
11031     inner->f = p->top->f;
11032     inner->name_table = NULL;
11033     inner->is_map = false;
11034     inner->is_mapentry = false;
11035     p->top = inner;
11036 
11037     if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) {
11038       /* For STRING fields we push data directly to the handlers as it is
11039        * parsed.  We don't do this yet for BYTES fields, because our base64
11040        * decoder is not streaming.
11041        *
11042        * TODO(haberman): make base64 decoding streaming also. */
11043       multipart_start(p, getsel_for_handlertype(p, UPB_HANDLER_STRING));
11044       return true;
11045     } else {
11046       multipart_startaccum(p);
11047       return true;
11048     }
11049   } else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) {
11050     /* No need to push a frame -- symbolic enum names in quotes remain in the
11051      * current parser frame.
11052      *
11053      * Enum string values must accumulate so we can look up the value in a table
11054      * once it is complete. */
11055     multipart_startaccum(p);
11056     return true;
11057   } else {
11058     upb_status_seterrf(&p->status,
11059                        "String specified for non-string/non-enum field: %s",
11060                        upb_fielddef_name(p->top->f));
11061     upb_env_reporterror(p->env, &p->status);
11062     return false;
11063   }
11064 }
11065 
end_stringval(upb_json_parser * p)11066 static bool end_stringval(upb_json_parser *p) {
11067   bool ok = true;
11068 
11069   switch (upb_fielddef_type(p->top->f)) {
11070     case UPB_TYPE_BYTES:
11071       if (!base64_push(p, getsel_for_handlertype(p, UPB_HANDLER_STRING),
11072                        p->accumulated, p->accumulated_len)) {
11073         return false;
11074       }
11075       /* Fall through. */
11076 
11077     case UPB_TYPE_STRING: {
11078       upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
11079       upb_sink_endstr(&p->top->sink, sel);
11080       p->top--;
11081       break;
11082     }
11083 
11084     case UPB_TYPE_ENUM: {
11085       /* Resolve enum symbolic name to integer value. */
11086       const upb_enumdef *enumdef =
11087           (const upb_enumdef*)upb_fielddef_subdef(p->top->f);
11088 
11089       size_t len;
11090       const char *buf = accumulate_getptr(p, &len);
11091 
11092       int32_t int_val = 0;
11093       ok = upb_enumdef_ntoi(enumdef, buf, len, &int_val);
11094 
11095       if (ok) {
11096         upb_selector_t sel = parser_getsel(p);
11097         upb_sink_putint32(&p->top->sink, sel, int_val);
11098       } else {
11099         upb_status_seterrf(&p->status, "Enum value unknown: '%.*s'", len, buf);
11100         upb_env_reporterror(p->env, &p->status);
11101       }
11102 
11103       break;
11104     }
11105 
11106     default:
11107       assert(false);
11108       upb_status_seterrmsg(&p->status, "Internal error in JSON decoder");
11109       upb_env_reporterror(p->env, &p->status);
11110       ok = false;
11111       break;
11112   }
11113 
11114   multipart_end(p);
11115 
11116   return ok;
11117 }
11118 
start_member(upb_json_parser * p)11119 static void start_member(upb_json_parser *p) {
11120   assert(!p->top->f);
11121   multipart_startaccum(p);
11122 }
11123 
11124 /* Helper: invoked during parse_mapentry() to emit the mapentry message's key
11125  * field based on the current contents of the accumulate buffer. */
parse_mapentry_key(upb_json_parser * p)11126 static bool parse_mapentry_key(upb_json_parser *p) {
11127 
11128   size_t len;
11129   const char *buf = accumulate_getptr(p, &len);
11130 
11131   /* Emit the key field. We do a bit of ad-hoc parsing here because the
11132    * parser state machine has already decided that this is a string field
11133    * name, and we are reinterpreting it as some arbitrary key type. In
11134    * particular, integer and bool keys are quoted, so we need to parse the
11135    * quoted string contents here. */
11136 
11137   p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_KEY);
11138   if (p->top->f == NULL) {
11139     upb_status_seterrmsg(&p->status, "mapentry message has no key");
11140     upb_env_reporterror(p->env, &p->status);
11141     return false;
11142   }
11143   switch (upb_fielddef_type(p->top->f)) {
11144     case UPB_TYPE_INT32:
11145     case UPB_TYPE_INT64:
11146     case UPB_TYPE_UINT32:
11147     case UPB_TYPE_UINT64:
11148       /* Invoke end_number. The accum buffer has the number's text already. */
11149       if (!parse_number(p)) {
11150         return false;
11151       }
11152       break;
11153     case UPB_TYPE_BOOL:
11154       if (len == 4 && !strncmp(buf, "true", 4)) {
11155         if (!parser_putbool(p, true)) {
11156           return false;
11157         }
11158       } else if (len == 5 && !strncmp(buf, "false", 5)) {
11159         if (!parser_putbool(p, false)) {
11160           return false;
11161         }
11162       } else {
11163         upb_status_seterrmsg(&p->status,
11164                              "Map bool key not 'true' or 'false'");
11165         upb_env_reporterror(p->env, &p->status);
11166         return false;
11167       }
11168       multipart_end(p);
11169       break;
11170     case UPB_TYPE_STRING:
11171     case UPB_TYPE_BYTES: {
11172       upb_sink subsink;
11173       upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
11174       upb_sink_startstr(&p->top->sink, sel, len, &subsink);
11175       sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
11176       upb_sink_putstring(&subsink, sel, buf, len, NULL);
11177       sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
11178       upb_sink_endstr(&subsink, sel);
11179       multipart_end(p);
11180       break;
11181     }
11182     default:
11183       upb_status_seterrmsg(&p->status, "Invalid field type for map key");
11184       upb_env_reporterror(p->env, &p->status);
11185       return false;
11186   }
11187 
11188   return true;
11189 }
11190 
11191 /* Helper: emit one map entry (as a submessage in the map field sequence). This
11192  * is invoked from end_membername(), at the end of the map entry's key string,
11193  * with the map key in the accumulate buffer. It parses the key from that
11194  * buffer, emits the handler calls to start the mapentry submessage (setting up
11195  * its subframe in the process), and sets up state in the subframe so that the
11196  * value parser (invoked next) will emit the mapentry's value field and then
11197  * end the mapentry message. */
11198 
handle_mapentry(upb_json_parser * p)11199 static bool handle_mapentry(upb_json_parser *p) {
11200   const upb_fielddef *mapfield;
11201   const upb_msgdef *mapentrymsg;
11202   upb_jsonparser_frame *inner;
11203   upb_selector_t sel;
11204 
11205   /* Map entry: p->top->sink is the seq frame, so we need to start a frame
11206    * for the mapentry itself, and then set |f| in that frame so that the map
11207    * value field is parsed, and also set a flag to end the frame after the
11208    * map-entry value is parsed. */
11209   if (!check_stack(p)) return false;
11210 
11211   mapfield = p->top->mapfield;
11212   mapentrymsg = upb_fielddef_msgsubdef(mapfield);
11213 
11214   inner = p->top + 1;
11215   p->top->f = mapfield;
11216   sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
11217   upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
11218   inner->m = mapentrymsg;
11219   inner->name_table = NULL;
11220   inner->mapfield = mapfield;
11221   inner->is_map = false;
11222 
11223   /* Don't set this to true *yet* -- we reuse parsing handlers below to push
11224    * the key field value to the sink, and these handlers will pop the frame
11225    * if they see is_mapentry (when invoked by the parser state machine, they
11226    * would have just seen the map-entry value, not key). */
11227   inner->is_mapentry = false;
11228   p->top = inner;
11229 
11230   /* send STARTMSG in submsg frame. */
11231   upb_sink_startmsg(&p->top->sink);
11232 
11233   parse_mapentry_key(p);
11234 
11235   /* Set up the value field to receive the map-entry value. */
11236   p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_VALUE);
11237   p->top->is_mapentry = true;  /* set up to pop frame after value is parsed. */
11238   p->top->mapfield = mapfield;
11239   if (p->top->f == NULL) {
11240     upb_status_seterrmsg(&p->status, "mapentry message has no value");
11241     upb_env_reporterror(p->env, &p->status);
11242     return false;
11243   }
11244 
11245   return true;
11246 }
11247 
end_membername(upb_json_parser * p)11248 static bool end_membername(upb_json_parser *p) {
11249   assert(!p->top->f);
11250 
11251   if (p->top->is_map) {
11252     return handle_mapentry(p);
11253   } else {
11254     size_t len;
11255     const char *buf = accumulate_getptr(p, &len);
11256     upb_value v;
11257 
11258     if (upb_strtable_lookup2(p->top->name_table, buf, len, &v)) {
11259       p->top->f = upb_value_getconstptr(v);
11260       multipart_end(p);
11261 
11262       return true;
11263     } else {
11264       /* TODO(haberman): Ignore unknown fields if requested/configured to do
11265        * so. */
11266       upb_status_seterrf(&p->status, "No such field: %.*s\n", (int)len, buf);
11267       upb_env_reporterror(p->env, &p->status);
11268       return false;
11269     }
11270   }
11271 }
11272 
end_member(upb_json_parser * p)11273 static void end_member(upb_json_parser *p) {
11274   /* If we just parsed a map-entry value, end that frame too. */
11275   if (p->top->is_mapentry) {
11276     upb_status s = UPB_STATUS_INIT;
11277     upb_selector_t sel;
11278     bool ok;
11279     const upb_fielddef *mapfield;
11280 
11281     assert(p->top > p->stack);
11282     /* send ENDMSG on submsg. */
11283     upb_sink_endmsg(&p->top->sink, &s);
11284     mapfield = p->top->mapfield;
11285 
11286     /* send ENDSUBMSG in repeated-field-of-mapentries frame. */
11287     p->top--;
11288     ok = upb_handlers_getselector(mapfield, UPB_HANDLER_ENDSUBMSG, &sel);
11289     UPB_ASSERT_VAR(ok, ok);
11290     upb_sink_endsubmsg(&p->top->sink, sel);
11291   }
11292 
11293   p->top->f = NULL;
11294 }
11295 
start_subobject(upb_json_parser * p)11296 static bool start_subobject(upb_json_parser *p) {
11297   assert(p->top->f);
11298 
11299   if (upb_fielddef_ismap(p->top->f)) {
11300     upb_jsonparser_frame *inner;
11301     upb_selector_t sel;
11302 
11303     /* Beginning of a map. Start a new parser frame in a repeated-field
11304      * context. */
11305     if (!check_stack(p)) return false;
11306 
11307     inner = p->top + 1;
11308     sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
11309     upb_sink_startseq(&p->top->sink, sel, &inner->sink);
11310     inner->m = upb_fielddef_msgsubdef(p->top->f);
11311     inner->name_table = NULL;
11312     inner->mapfield = p->top->f;
11313     inner->f = NULL;
11314     inner->is_map = true;
11315     inner->is_mapentry = false;
11316     p->top = inner;
11317 
11318     return true;
11319   } else if (upb_fielddef_issubmsg(p->top->f)) {
11320     upb_jsonparser_frame *inner;
11321     upb_selector_t sel;
11322 
11323     /* Beginning of a subobject. Start a new parser frame in the submsg
11324      * context. */
11325     if (!check_stack(p)) return false;
11326 
11327     inner = p->top + 1;
11328 
11329     sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
11330     upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
11331     inner->m = upb_fielddef_msgsubdef(p->top->f);
11332     set_name_table(p, inner);
11333     inner->f = NULL;
11334     inner->is_map = false;
11335     inner->is_mapentry = false;
11336     p->top = inner;
11337 
11338     return true;
11339   } else {
11340     upb_status_seterrf(&p->status,
11341                        "Object specified for non-message/group field: %s",
11342                        upb_fielddef_name(p->top->f));
11343     upb_env_reporterror(p->env, &p->status);
11344     return false;
11345   }
11346 }
11347 
end_subobject(upb_json_parser * p)11348 static void end_subobject(upb_json_parser *p) {
11349   if (p->top->is_map) {
11350     upb_selector_t sel;
11351     p->top--;
11352     sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
11353     upb_sink_endseq(&p->top->sink, sel);
11354   } else {
11355     upb_selector_t sel;
11356     p->top--;
11357     sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
11358     upb_sink_endsubmsg(&p->top->sink, sel);
11359   }
11360 }
11361 
start_array(upb_json_parser * p)11362 static bool start_array(upb_json_parser *p) {
11363   upb_jsonparser_frame *inner;
11364   upb_selector_t sel;
11365 
11366   assert(p->top->f);
11367 
11368   if (!upb_fielddef_isseq(p->top->f)) {
11369     upb_status_seterrf(&p->status,
11370                        "Array specified for non-repeated field: %s",
11371                        upb_fielddef_name(p->top->f));
11372     upb_env_reporterror(p->env, &p->status);
11373     return false;
11374   }
11375 
11376   if (!check_stack(p)) return false;
11377 
11378   inner = p->top + 1;
11379   sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
11380   upb_sink_startseq(&p->top->sink, sel, &inner->sink);
11381   inner->m = p->top->m;
11382   inner->name_table = NULL;
11383   inner->f = p->top->f;
11384   inner->is_map = false;
11385   inner->is_mapentry = false;
11386   p->top = inner;
11387 
11388   return true;
11389 }
11390 
end_array(upb_json_parser * p)11391 static void end_array(upb_json_parser *p) {
11392   upb_selector_t sel;
11393 
11394   assert(p->top > p->stack);
11395 
11396   p->top--;
11397   sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
11398   upb_sink_endseq(&p->top->sink, sel);
11399 }
11400 
start_object(upb_json_parser * p)11401 static void start_object(upb_json_parser *p) {
11402   if (!p->top->is_map) {
11403     upb_sink_startmsg(&p->top->sink);
11404   }
11405 }
11406 
end_object(upb_json_parser * p)11407 static void end_object(upb_json_parser *p) {
11408   if (!p->top->is_map) {
11409     upb_status status;
11410     upb_status_clear(&status);
11411     upb_sink_endmsg(&p->top->sink, &status);
11412     if (!upb_ok(&status)) {
11413       upb_env_reporterror(p->env, &status);
11414     }
11415   }
11416 }
11417 
11418 
11419 #define CHECK_RETURN_TOP(x) if (!(x)) goto error
11420 
11421 
11422 /* The actual parser **********************************************************/
11423 
11424 /* What follows is the Ragel parser itself.  The language is specified in Ragel
11425  * and the actions call our C functions above.
11426  *
11427  * Ragel has an extensive set of functionality, and we use only a small part of
11428  * it.  There are many action types but we only use a few:
11429  *
11430  *   ">" -- transition into a machine
11431  *   "%" -- transition out of a machine
11432  *   "@" -- transition into a final state of a machine.
11433  *
11434  * "@" transitions are tricky because a machine can transition into a final
11435  * state repeatedly.  But in some cases we know this can't happen, for example
11436  * a string which is delimited by a final '"' can only transition into its
11437  * final state once, when the closing '"' is seen. */
11438 
11439 
11440 #line 1245 "upb/json/parser.rl"
11441 
11442 
11443 
11444 #line 1157 "upb/json/parser.c"
11445 static const char _json_actions[] = {
11446 	0, 1, 0, 1, 2, 1, 3, 1,
11447 	5, 1, 6, 1, 7, 1, 8, 1,
11448 	10, 1, 12, 1, 13, 1, 14, 1,
11449 	15, 1, 16, 1, 17, 1, 21, 1,
11450 	25, 1, 27, 2, 3, 8, 2, 4,
11451 	5, 2, 6, 2, 2, 6, 8, 2,
11452 	11, 9, 2, 13, 15, 2, 14, 15,
11453 	2, 18, 1, 2, 19, 27, 2, 20,
11454 	9, 2, 22, 27, 2, 23, 27, 2,
11455 	24, 27, 2, 26, 27, 3, 14, 11,
11456 	9
11457 };
11458 
11459 static const unsigned char _json_key_offsets[] = {
11460 	0, 0, 4, 9, 14, 15, 19, 24,
11461 	29, 34, 38, 42, 45, 48, 50, 54,
11462 	58, 60, 62, 67, 69, 71, 80, 86,
11463 	92, 98, 104, 106, 115, 116, 116, 116,
11464 	121, 126, 131, 132, 133, 134, 135, 135,
11465 	136, 137, 138, 138, 139, 140, 141, 141,
11466 	146, 151, 152, 156, 161, 166, 171, 175,
11467 	175, 178, 178, 178
11468 };
11469 
11470 static const char _json_trans_keys[] = {
11471 	32, 123, 9, 13, 32, 34, 125, 9,
11472 	13, 32, 34, 125, 9, 13, 34, 32,
11473 	58, 9, 13, 32, 93, 125, 9, 13,
11474 	32, 44, 125, 9, 13, 32, 44, 125,
11475 	9, 13, 32, 34, 9, 13, 45, 48,
11476 	49, 57, 48, 49, 57, 46, 69, 101,
11477 	48, 57, 69, 101, 48, 57, 43, 45,
11478 	48, 57, 48, 57, 48, 57, 46, 69,
11479 	101, 48, 57, 34, 92, 34, 92, 34,
11480 	47, 92, 98, 102, 110, 114, 116, 117,
11481 	48, 57, 65, 70, 97, 102, 48, 57,
11482 	65, 70, 97, 102, 48, 57, 65, 70,
11483 	97, 102, 48, 57, 65, 70, 97, 102,
11484 	34, 92, 34, 45, 91, 102, 110, 116,
11485 	123, 48, 57, 34, 32, 93, 125, 9,
11486 	13, 32, 44, 93, 9, 13, 32, 93,
11487 	125, 9, 13, 97, 108, 115, 101, 117,
11488 	108, 108, 114, 117, 101, 32, 34, 125,
11489 	9, 13, 32, 34, 125, 9, 13, 34,
11490 	32, 58, 9, 13, 32, 93, 125, 9,
11491 	13, 32, 44, 125, 9, 13, 32, 44,
11492 	125, 9, 13, 32, 34, 9, 13, 32,
11493 	9, 13, 0
11494 };
11495 
11496 static const char _json_single_lengths[] = {
11497 	0, 2, 3, 3, 1, 2, 3, 3,
11498 	3, 2, 2, 1, 3, 0, 2, 2,
11499 	0, 0, 3, 2, 2, 9, 0, 0,
11500 	0, 0, 2, 7, 1, 0, 0, 3,
11501 	3, 3, 1, 1, 1, 1, 0, 1,
11502 	1, 1, 0, 1, 1, 1, 0, 3,
11503 	3, 1, 2, 3, 3, 3, 2, 0,
11504 	1, 0, 0, 0
11505 };
11506 
11507 static const char _json_range_lengths[] = {
11508 	0, 1, 1, 1, 0, 1, 1, 1,
11509 	1, 1, 1, 1, 0, 1, 1, 1,
11510 	1, 1, 1, 0, 0, 0, 3, 3,
11511 	3, 3, 0, 1, 0, 0, 0, 1,
11512 	1, 1, 0, 0, 0, 0, 0, 0,
11513 	0, 0, 0, 0, 0, 0, 0, 1,
11514 	1, 0, 1, 1, 1, 1, 1, 0,
11515 	1, 0, 0, 0
11516 };
11517 
11518 static const short _json_index_offsets[] = {
11519 	0, 0, 4, 9, 14, 16, 20, 25,
11520 	30, 35, 39, 43, 46, 50, 52, 56,
11521 	60, 62, 64, 69, 72, 75, 85, 89,
11522 	93, 97, 101, 104, 113, 115, 116, 117,
11523 	122, 127, 132, 134, 136, 138, 140, 141,
11524 	143, 145, 147, 148, 150, 152, 154, 155,
11525 	160, 165, 167, 171, 176, 181, 186, 190,
11526 	191, 194, 195, 196
11527 };
11528 
11529 static const char _json_indicies[] = {
11530 	0, 2, 0, 1, 3, 4, 5, 3,
11531 	1, 6, 7, 8, 6, 1, 9, 1,
11532 	10, 11, 10, 1, 11, 1, 1, 11,
11533 	12, 13, 14, 15, 13, 1, 16, 17,
11534 	8, 16, 1, 17, 7, 17, 1, 18,
11535 	19, 20, 1, 19, 20, 1, 22, 23,
11536 	23, 21, 24, 1, 23, 23, 24, 21,
11537 	25, 25, 26, 1, 26, 1, 26, 21,
11538 	22, 23, 23, 20, 21, 28, 29, 27,
11539 	31, 32, 30, 33, 33, 33, 33, 33,
11540 	33, 33, 33, 34, 1, 35, 35, 35,
11541 	1, 36, 36, 36, 1, 37, 37, 37,
11542 	1, 38, 38, 38, 1, 40, 41, 39,
11543 	42, 43, 44, 45, 46, 47, 48, 43,
11544 	1, 49, 1, 50, 51, 53, 54, 1,
11545 	53, 52, 55, 56, 54, 55, 1, 56,
11546 	1, 1, 56, 52, 57, 1, 58, 1,
11547 	59, 1, 60, 1, 61, 62, 1, 63,
11548 	1, 64, 1, 65, 66, 1, 67, 1,
11549 	68, 1, 69, 70, 71, 72, 70, 1,
11550 	73, 74, 75, 73, 1, 76, 1, 77,
11551 	78, 77, 1, 78, 1, 1, 78, 79,
11552 	80, 81, 82, 80, 1, 83, 84, 75,
11553 	83, 1, 84, 74, 84, 1, 85, 86,
11554 	86, 1, 1, 1, 1, 0
11555 };
11556 
11557 static const char _json_trans_targs[] = {
11558 	1, 0, 2, 3, 4, 56, 3, 4,
11559 	56, 5, 5, 6, 7, 8, 9, 56,
11560 	8, 9, 11, 12, 18, 57, 13, 15,
11561 	14, 16, 17, 20, 58, 21, 20, 58,
11562 	21, 19, 22, 23, 24, 25, 26, 20,
11563 	58, 21, 28, 30, 31, 34, 39, 43,
11564 	47, 29, 59, 59, 32, 31, 29, 32,
11565 	33, 35, 36, 37, 38, 59, 40, 41,
11566 	42, 59, 44, 45, 46, 59, 48, 49,
11567 	55, 48, 49, 55, 50, 50, 51, 52,
11568 	53, 54, 55, 53, 54, 59, 56
11569 };
11570 
11571 static const char _json_trans_actions[] = {
11572 	0, 0, 0, 21, 77, 53, 0, 47,
11573 	23, 17, 0, 0, 15, 19, 19, 50,
11574 	0, 0, 0, 0, 0, 1, 0, 0,
11575 	0, 0, 0, 3, 13, 0, 0, 35,
11576 	5, 11, 0, 38, 7, 7, 7, 41,
11577 	44, 9, 62, 56, 25, 0, 0, 0,
11578 	31, 29, 33, 59, 15, 0, 27, 0,
11579 	0, 0, 0, 0, 0, 68, 0, 0,
11580 	0, 71, 0, 0, 0, 65, 21, 77,
11581 	53, 0, 47, 23, 17, 0, 0, 15,
11582 	19, 19, 50, 0, 0, 74, 0
11583 };
11584 
11585 static const int json_start = 1;
11586 
11587 static const int json_en_number_machine = 10;
11588 static const int json_en_string_machine = 19;
11589 static const int json_en_value_machine = 27;
11590 static const int json_en_main = 1;
11591 
11592 
11593 #line 1248 "upb/json/parser.rl"
11594 
parse(void * closure,const void * hd,const char * buf,size_t size,const upb_bufhandle * handle)11595 size_t parse(void *closure, const void *hd, const char *buf, size_t size,
11596              const upb_bufhandle *handle) {
11597   upb_json_parser *parser = closure;
11598 
11599   /* Variables used by Ragel's generated code. */
11600   int cs = parser->current_state;
11601   int *stack = parser->parser_stack;
11602   int top = parser->parser_top;
11603 
11604   const char *p = buf;
11605   const char *pe = buf + size;
11606 
11607   parser->handle = handle;
11608 
11609   UPB_UNUSED(hd);
11610   UPB_UNUSED(handle);
11611 
11612   capture_resume(parser, buf);
11613 
11614 
11615 #line 1328 "upb/json/parser.c"
11616 	{
11617 	int _klen;
11618 	unsigned int _trans;
11619 	const char *_acts;
11620 	unsigned int _nacts;
11621 	const char *_keys;
11622 
11623 	if ( p == pe )
11624 		goto _test_eof;
11625 	if ( cs == 0 )
11626 		goto _out;
11627 _resume:
11628 	_keys = _json_trans_keys + _json_key_offsets[cs];
11629 	_trans = _json_index_offsets[cs];
11630 
11631 	_klen = _json_single_lengths[cs];
11632 	if ( _klen > 0 ) {
11633 		const char *_lower = _keys;
11634 		const char *_mid;
11635 		const char *_upper = _keys + _klen - 1;
11636 		while (1) {
11637 			if ( _upper < _lower )
11638 				break;
11639 
11640 			_mid = _lower + ((_upper-_lower) >> 1);
11641 			if ( (*p) < *_mid )
11642 				_upper = _mid - 1;
11643 			else if ( (*p) > *_mid )
11644 				_lower = _mid + 1;
11645 			else {
11646 				_trans += (unsigned int)(_mid - _keys);
11647 				goto _match;
11648 			}
11649 		}
11650 		_keys += _klen;
11651 		_trans += _klen;
11652 	}
11653 
11654 	_klen = _json_range_lengths[cs];
11655 	if ( _klen > 0 ) {
11656 		const char *_lower = _keys;
11657 		const char *_mid;
11658 		const char *_upper = _keys + (_klen<<1) - 2;
11659 		while (1) {
11660 			if ( _upper < _lower )
11661 				break;
11662 
11663 			_mid = _lower + (((_upper-_lower) >> 1) & ~1);
11664 			if ( (*p) < _mid[0] )
11665 				_upper = _mid - 2;
11666 			else if ( (*p) > _mid[1] )
11667 				_lower = _mid + 2;
11668 			else {
11669 				_trans += (unsigned int)((_mid - _keys)>>1);
11670 				goto _match;
11671 			}
11672 		}
11673 		_trans += _klen;
11674 	}
11675 
11676 _match:
11677 	_trans = _json_indicies[_trans];
11678 	cs = _json_trans_targs[_trans];
11679 
11680 	if ( _json_trans_actions[_trans] == 0 )
11681 		goto _again;
11682 
11683 	_acts = _json_actions + _json_trans_actions[_trans];
11684 	_nacts = (unsigned int) *_acts++;
11685 	while ( _nacts-- > 0 )
11686 	{
11687 		switch ( *_acts++ )
11688 		{
11689 	case 0:
11690 #line 1160 "upb/json/parser.rl"
11691 	{ p--; {cs = stack[--top]; goto _again;} }
11692 	break;
11693 	case 1:
11694 #line 1161 "upb/json/parser.rl"
11695 	{ p--; {stack[top++] = cs; cs = 10; goto _again;} }
11696 	break;
11697 	case 2:
11698 #line 1165 "upb/json/parser.rl"
11699 	{ start_text(parser, p); }
11700 	break;
11701 	case 3:
11702 #line 1166 "upb/json/parser.rl"
11703 	{ CHECK_RETURN_TOP(end_text(parser, p)); }
11704 	break;
11705 	case 4:
11706 #line 1172 "upb/json/parser.rl"
11707 	{ start_hex(parser); }
11708 	break;
11709 	case 5:
11710 #line 1173 "upb/json/parser.rl"
11711 	{ hexdigit(parser, p); }
11712 	break;
11713 	case 6:
11714 #line 1174 "upb/json/parser.rl"
11715 	{ CHECK_RETURN_TOP(end_hex(parser)); }
11716 	break;
11717 	case 7:
11718 #line 1180 "upb/json/parser.rl"
11719 	{ CHECK_RETURN_TOP(escape(parser, p)); }
11720 	break;
11721 	case 8:
11722 #line 1186 "upb/json/parser.rl"
11723 	{ p--; {cs = stack[--top]; goto _again;} }
11724 	break;
11725 	case 9:
11726 #line 1189 "upb/json/parser.rl"
11727 	{ {stack[top++] = cs; cs = 19; goto _again;} }
11728 	break;
11729 	case 10:
11730 #line 1191 "upb/json/parser.rl"
11731 	{ p--; {stack[top++] = cs; cs = 27; goto _again;} }
11732 	break;
11733 	case 11:
11734 #line 1196 "upb/json/parser.rl"
11735 	{ start_member(parser); }
11736 	break;
11737 	case 12:
11738 #line 1197 "upb/json/parser.rl"
11739 	{ CHECK_RETURN_TOP(end_membername(parser)); }
11740 	break;
11741 	case 13:
11742 #line 1200 "upb/json/parser.rl"
11743 	{ end_member(parser); }
11744 	break;
11745 	case 14:
11746 #line 1206 "upb/json/parser.rl"
11747 	{ start_object(parser); }
11748 	break;
11749 	case 15:
11750 #line 1209 "upb/json/parser.rl"
11751 	{ end_object(parser); }
11752 	break;
11753 	case 16:
11754 #line 1215 "upb/json/parser.rl"
11755 	{ CHECK_RETURN_TOP(start_array(parser)); }
11756 	break;
11757 	case 17:
11758 #line 1219 "upb/json/parser.rl"
11759 	{ end_array(parser); }
11760 	break;
11761 	case 18:
11762 #line 1224 "upb/json/parser.rl"
11763 	{ start_number(parser, p); }
11764 	break;
11765 	case 19:
11766 #line 1225 "upb/json/parser.rl"
11767 	{ CHECK_RETURN_TOP(end_number(parser, p)); }
11768 	break;
11769 	case 20:
11770 #line 1227 "upb/json/parser.rl"
11771 	{ CHECK_RETURN_TOP(start_stringval(parser)); }
11772 	break;
11773 	case 21:
11774 #line 1228 "upb/json/parser.rl"
11775 	{ CHECK_RETURN_TOP(end_stringval(parser)); }
11776 	break;
11777 	case 22:
11778 #line 1230 "upb/json/parser.rl"
11779 	{ CHECK_RETURN_TOP(parser_putbool(parser, true)); }
11780 	break;
11781 	case 23:
11782 #line 1232 "upb/json/parser.rl"
11783 	{ CHECK_RETURN_TOP(parser_putbool(parser, false)); }
11784 	break;
11785 	case 24:
11786 #line 1234 "upb/json/parser.rl"
11787 	{ /* null value */ }
11788 	break;
11789 	case 25:
11790 #line 1236 "upb/json/parser.rl"
11791 	{ CHECK_RETURN_TOP(start_subobject(parser)); }
11792 	break;
11793 	case 26:
11794 #line 1237 "upb/json/parser.rl"
11795 	{ end_subobject(parser); }
11796 	break;
11797 	case 27:
11798 #line 1242 "upb/json/parser.rl"
11799 	{ p--; {cs = stack[--top]; goto _again;} }
11800 	break;
11801 #line 1514 "upb/json/parser.c"
11802 		}
11803 	}
11804 
11805 _again:
11806 	if ( cs == 0 )
11807 		goto _out;
11808 	if ( ++p != pe )
11809 		goto _resume;
11810 	_test_eof: {}
11811 	_out: {}
11812 	}
11813 
11814 #line 1269 "upb/json/parser.rl"
11815 
11816   if (p != pe) {
11817     upb_status_seterrf(&parser->status, "Parse error at '%.*s'\n", pe - p, p);
11818     upb_env_reporterror(parser->env, &parser->status);
11819   } else {
11820     capture_suspend(parser, &p);
11821   }
11822 
11823 error:
11824   /* Save parsing state back to parser. */
11825   parser->current_state = cs;
11826   parser->parser_top = top;
11827 
11828   return p - buf;
11829 }
11830 
end(void * closure,const void * hd)11831 bool end(void *closure, const void *hd) {
11832   UPB_UNUSED(closure);
11833   UPB_UNUSED(hd);
11834 
11835   /* Prevent compile warning on unused static constants. */
11836   UPB_UNUSED(json_start);
11837   UPB_UNUSED(json_en_number_machine);
11838   UPB_UNUSED(json_en_string_machine);
11839   UPB_UNUSED(json_en_value_machine);
11840   UPB_UNUSED(json_en_main);
11841   return true;
11842 }
11843 
json_parser_reset(upb_json_parser * p)11844 static void json_parser_reset(upb_json_parser *p) {
11845   int cs;
11846   int top;
11847 
11848   p->top = p->stack;
11849   p->top->f = NULL;
11850   p->top->is_map = false;
11851   p->top->is_mapentry = false;
11852 
11853   /* Emit Ragel initialization of the parser. */
11854 
11855 #line 1568 "upb/json/parser.c"
11856 	{
11857 	cs = json_start;
11858 	top = 0;
11859 	}
11860 
11861 #line 1309 "upb/json/parser.rl"
11862   p->current_state = cs;
11863   p->parser_top = top;
11864   accumulate_clear(p);
11865   p->multipart_state = MULTIPART_INACTIVE;
11866   p->capture = NULL;
11867   p->accumulated = NULL;
11868   upb_status_clear(&p->status);
11869 }
11870 
visit_json_parsermethod(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)11871 static void visit_json_parsermethod(const upb_refcounted *r,
11872                                     upb_refcounted_visit *visit,
11873                                     void *closure) {
11874   const upb_json_parsermethod *method = (upb_json_parsermethod*)r;
11875   visit(r, upb_msgdef_upcast2(method->msg), closure);
11876 }
11877 
free_json_parsermethod(upb_refcounted * r)11878 static void free_json_parsermethod(upb_refcounted *r) {
11879   upb_json_parsermethod *method = (upb_json_parsermethod*)r;
11880 
11881   upb_inttable_iter i;
11882   upb_inttable_begin(&i, &method->name_tables);
11883   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
11884     upb_value val = upb_inttable_iter_value(&i);
11885     upb_strtable *t = upb_value_getptr(val);
11886     upb_strtable_uninit(t);
11887     upb_gfree(t);
11888   }
11889 
11890   upb_inttable_uninit(&method->name_tables);
11891 
11892   upb_gfree(r);
11893 }
11894 
add_jsonname_table(upb_json_parsermethod * m,const upb_msgdef * md)11895 static void add_jsonname_table(upb_json_parsermethod *m, const upb_msgdef* md) {
11896   upb_msg_field_iter i;
11897   upb_strtable *t;
11898 
11899   /* It would be nice to stack-allocate this, but protobufs do not limit the
11900    * length of fields to any reasonable limit. */
11901   char *buf = NULL;
11902   size_t len = 0;
11903 
11904   if (upb_inttable_lookupptr(&m->name_tables, md, NULL)) {
11905     return;
11906   }
11907 
11908   /* TODO(haberman): handle malloc failure. */
11909   t = upb_gmalloc(sizeof(*t));
11910   upb_strtable_init(t, UPB_CTYPE_CONSTPTR);
11911   upb_inttable_insertptr(&m->name_tables, md, upb_value_ptr(t));
11912 
11913   for(upb_msg_field_begin(&i, md);
11914       !upb_msg_field_done(&i);
11915       upb_msg_field_next(&i)) {
11916     const upb_fielddef *f = upb_msg_iter_field(&i);
11917 
11918     /* Add an entry for the JSON name. */
11919     size_t field_len = upb_fielddef_getjsonname(f, buf, len);
11920     if (field_len > len) {
11921       size_t len2;
11922       buf = upb_grealloc(buf, 0, field_len);
11923       len = field_len;
11924       len2 = upb_fielddef_getjsonname(f, buf, len);
11925       UPB_ASSERT_VAR(len2, len == len2);
11926     }
11927     upb_strtable_insert(t, buf, upb_value_constptr(f));
11928 
11929     if (strcmp(buf, upb_fielddef_name(f)) != 0) {
11930       /* Since the JSON name is different from the regular field name, add an
11931        * entry for the raw name (compliant proto3 JSON parsers must accept
11932        * both). */
11933       upb_strtable_insert(t, upb_fielddef_name(f), upb_value_constptr(f));
11934     }
11935 
11936     if (upb_fielddef_issubmsg(f)) {
11937       add_jsonname_table(m, upb_fielddef_msgsubdef(f));
11938     }
11939   }
11940 
11941   upb_gfree(buf);
11942 }
11943 
11944 /* Public API *****************************************************************/
11945 
upb_json_parser_create(upb_env * env,const upb_json_parsermethod * method,upb_sink * output)11946 upb_json_parser *upb_json_parser_create(upb_env *env,
11947                                         const upb_json_parsermethod *method,
11948                                         upb_sink *output) {
11949 #ifndef NDEBUG
11950   const size_t size_before = upb_env_bytesallocated(env);
11951 #endif
11952   upb_json_parser *p = upb_env_malloc(env, sizeof(upb_json_parser));
11953   if (!p) return false;
11954 
11955   p->env = env;
11956   p->method = method;
11957   p->limit = p->stack + UPB_JSON_MAX_DEPTH;
11958   p->accumulate_buf = NULL;
11959   p->accumulate_buf_size = 0;
11960   upb_bytessink_reset(&p->input_, &method->input_handler_, p);
11961 
11962   json_parser_reset(p);
11963   upb_sink_reset(&p->top->sink, output->handlers, output->closure);
11964   p->top->m = upb_handlers_msgdef(output->handlers);
11965   set_name_table(p, p->top);
11966 
11967   /* If this fails, uncomment and increase the value in parser.h. */
11968   /* fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before); */
11969   assert(upb_env_bytesallocated(env) - size_before <= UPB_JSON_PARSER_SIZE);
11970   return p;
11971 }
11972 
upb_json_parser_input(upb_json_parser * p)11973 upb_bytessink *upb_json_parser_input(upb_json_parser *p) {
11974   return &p->input_;
11975 }
11976 
upb_json_parsermethod_new(const upb_msgdef * md,const void * owner)11977 upb_json_parsermethod *upb_json_parsermethod_new(const upb_msgdef* md,
11978                                                  const void* owner) {
11979   static const struct upb_refcounted_vtbl vtbl = {visit_json_parsermethod,
11980                                                   free_json_parsermethod};
11981   upb_json_parsermethod *ret = upb_gmalloc(sizeof(*ret));
11982   upb_refcounted_init(upb_json_parsermethod_upcast_mutable(ret), &vtbl, owner);
11983 
11984   ret->msg = md;
11985   upb_ref2(md, ret);
11986 
11987   upb_byteshandler_init(&ret->input_handler_);
11988   upb_byteshandler_setstring(&ret->input_handler_, parse, ret);
11989   upb_byteshandler_setendstr(&ret->input_handler_, end, ret);
11990 
11991   upb_inttable_init(&ret->name_tables, UPB_CTYPE_PTR);
11992 
11993   add_jsonname_table(ret, md);
11994 
11995   return ret;
11996 }
11997 
upb_json_parsermethod_inputhandler(const upb_json_parsermethod * m)11998 const upb_byteshandler *upb_json_parsermethod_inputhandler(
11999     const upb_json_parsermethod *m) {
12000   return &m->input_handler_;
12001 }
12002 /*
12003 ** This currently uses snprintf() to format primitives, and could be optimized
12004 ** further.
12005 */
12006 
12007 
12008 #include <string.h>
12009 #include <stdint.h>
12010 
12011 struct upb_json_printer {
12012   upb_sink input_;
12013   /* BytesSink closure. */
12014   void *subc_;
12015   upb_bytessink *output_;
12016 
12017   /* We track the depth so that we know when to emit startstr/endstr on the
12018    * output. */
12019   int depth_;
12020 
12021   /* Have we emitted the first element? This state is necessary to emit commas
12022    * without leaving a trailing comma in arrays/maps. We keep this state per
12023    * frame depth.
12024    *
12025    * Why max_depth * 2? UPB_MAX_HANDLER_DEPTH counts depth as nested messages.
12026    * We count frames (contexts in which we separate elements by commas) as both
12027    * repeated fields and messages (maps), and the worst case is a
12028    * message->repeated field->submessage->repeated field->... nesting. */
12029   bool first_elem_[UPB_MAX_HANDLER_DEPTH * 2];
12030 };
12031 
12032 /* StringPiece; a pointer plus a length. */
12033 typedef struct {
12034   char *ptr;
12035   size_t len;
12036 } strpc;
12037 
freestrpc(void * ptr)12038 void freestrpc(void *ptr) {
12039   strpc *pc = ptr;
12040   upb_gfree(pc->ptr);
12041   upb_gfree(pc);
12042 }
12043 
12044 /* Convert fielddef name to JSON name and return as a string piece. */
newstrpc(upb_handlers * h,const upb_fielddef * f,bool preserve_fieldnames)12045 strpc *newstrpc(upb_handlers *h, const upb_fielddef *f,
12046                 bool preserve_fieldnames) {
12047   /* TODO(haberman): handle malloc failure. */
12048   strpc *ret = upb_gmalloc(sizeof(*ret));
12049   if (preserve_fieldnames) {
12050     ret->ptr = upb_gstrdup(upb_fielddef_name(f));
12051     ret->len = strlen(ret->ptr);
12052   } else {
12053     size_t len;
12054     ret->len = upb_fielddef_getjsonname(f, NULL, 0);
12055     ret->ptr = upb_gmalloc(ret->len);
12056     len = upb_fielddef_getjsonname(f, ret->ptr, ret->len);
12057     UPB_ASSERT_VAR(len, len == ret->len);
12058     ret->len--;  /* NULL */
12059   }
12060 
12061   upb_handlers_addcleanup(h, ret, freestrpc);
12062   return ret;
12063 }
12064 
12065 /* ------------ JSON string printing: values, maps, arrays ------------------ */
12066 
print_data(upb_json_printer * p,const char * buf,unsigned int len)12067 static void print_data(
12068     upb_json_printer *p, const char *buf, unsigned int len) {
12069   /* TODO: Will need to change if we support pushback from the sink. */
12070   size_t n = upb_bytessink_putbuf(p->output_, p->subc_, buf, len, NULL);
12071   UPB_ASSERT_VAR(n, n == len);
12072 }
12073 
print_comma(upb_json_printer * p)12074 static void print_comma(upb_json_printer *p) {
12075   if (!p->first_elem_[p->depth_]) {
12076     print_data(p, ",", 1);
12077   }
12078   p->first_elem_[p->depth_] = false;
12079 }
12080 
12081 /* Helpers that print properly formatted elements to the JSON output stream. */
12082 
12083 /* Used for escaping control chars in strings. */
12084 static const char kControlCharLimit = 0x20;
12085 
is_json_escaped(char c)12086 UPB_INLINE bool is_json_escaped(char c) {
12087   /* See RFC 4627. */
12088   unsigned char uc = (unsigned char)c;
12089   return uc < kControlCharLimit || uc == '"' || uc == '\\';
12090 }
12091 
json_nice_escape(char c)12092 UPB_INLINE const char* json_nice_escape(char c) {
12093   switch (c) {
12094     case '"':  return "\\\"";
12095     case '\\': return "\\\\";
12096     case '\b': return "\\b";
12097     case '\f': return "\\f";
12098     case '\n': return "\\n";
12099     case '\r': return "\\r";
12100     case '\t': return "\\t";
12101     default:   return NULL;
12102   }
12103 }
12104 
12105 /* Write a properly escaped string chunk. The surrounding quotes are *not*
12106  * printed; this is so that the caller has the option of emitting the string
12107  * content in chunks. */
putstring(upb_json_printer * p,const char * buf,unsigned int len)12108 static void putstring(upb_json_printer *p, const char *buf, unsigned int len) {
12109   const char* unescaped_run = NULL;
12110   unsigned int i;
12111   for (i = 0; i < len; i++) {
12112     char c = buf[i];
12113     /* Handle escaping. */
12114     if (is_json_escaped(c)) {
12115       /* Use a "nice" escape, like \n, if one exists for this character. */
12116       const char* escape = json_nice_escape(c);
12117       /* If we don't have a specific 'nice' escape code, use a \uXXXX-style
12118        * escape. */
12119       char escape_buf[8];
12120       if (!escape) {
12121         unsigned char byte = (unsigned char)c;
12122         _upb_snprintf(escape_buf, sizeof(escape_buf), "\\u%04x", (int)byte);
12123         escape = escape_buf;
12124       }
12125 
12126       /* N.B. that we assume that the input encoding is equal to the output
12127        * encoding (both UTF-8 for  now), so for chars >= 0x20 and != \, ", we
12128        * can simply pass the bytes through. */
12129 
12130       /* If there's a current run of unescaped chars, print that run first. */
12131       if (unescaped_run) {
12132         print_data(p, unescaped_run, &buf[i] - unescaped_run);
12133         unescaped_run = NULL;
12134       }
12135       /* Then print the escape code. */
12136       print_data(p, escape, strlen(escape));
12137     } else {
12138       /* Add to the current unescaped run of characters. */
12139       if (unescaped_run == NULL) {
12140         unescaped_run = &buf[i];
12141       }
12142     }
12143   }
12144 
12145   /* If the string ended in a run of unescaped characters, print that last run. */
12146   if (unescaped_run) {
12147     print_data(p, unescaped_run, &buf[len] - unescaped_run);
12148   }
12149 }
12150 
12151 #define CHKLENGTH(x) if (!(x)) return -1;
12152 
12153 /* Helpers that format floating point values according to our custom formats.
12154  * Right now we use %.8g and %.17g for float/double, respectively, to match
12155  * proto2::util::JsonFormat's defaults.  May want to change this later. */
12156 
fmt_double(double val,char * buf,size_t length)12157 static size_t fmt_double(double val, char* buf, size_t length) {
12158   size_t n = _upb_snprintf(buf, length, "%.17g", val);
12159   CHKLENGTH(n > 0 && n < length);
12160   return n;
12161 }
12162 
fmt_float(float val,char * buf,size_t length)12163 static size_t fmt_float(float val, char* buf, size_t length) {
12164   size_t n = _upb_snprintf(buf, length, "%.8g", val);
12165   CHKLENGTH(n > 0 && n < length);
12166   return n;
12167 }
12168 
fmt_bool(bool val,char * buf,size_t length)12169 static size_t fmt_bool(bool val, char* buf, size_t length) {
12170   size_t n = _upb_snprintf(buf, length, "%s", (val ? "true" : "false"));
12171   CHKLENGTH(n > 0 && n < length);
12172   return n;
12173 }
12174 
fmt_int64(long val,char * buf,size_t length)12175 static size_t fmt_int64(long val, char* buf, size_t length) {
12176   size_t n = _upb_snprintf(buf, length, "%ld", val);
12177   CHKLENGTH(n > 0 && n < length);
12178   return n;
12179 }
12180 
fmt_uint64(unsigned long long val,char * buf,size_t length)12181 static size_t fmt_uint64(unsigned long long val, char* buf, size_t length) {
12182   size_t n = _upb_snprintf(buf, length, "%llu", val);
12183   CHKLENGTH(n > 0 && n < length);
12184   return n;
12185 }
12186 
12187 /* Print a map key given a field name. Called by scalar field handlers and by
12188  * startseq for repeated fields. */
putkey(void * closure,const void * handler_data)12189 static bool putkey(void *closure, const void *handler_data) {
12190   upb_json_printer *p = closure;
12191   const strpc *key = handler_data;
12192   print_comma(p);
12193   print_data(p, "\"", 1);
12194   putstring(p, key->ptr, key->len);
12195   print_data(p, "\":", 2);
12196   return true;
12197 }
12198 
12199 #define CHKFMT(val) if ((val) == (size_t)-1) return false;
12200 #define CHK(val)    if (!(val)) return false;
12201 
12202 #define TYPE_HANDLERS(type, fmt_func)                                        \
12203   static bool put##type(void *closure, const void *handler_data, type val) { \
12204     upb_json_printer *p = closure;                                           \
12205     char data[64];                                                           \
12206     size_t length = fmt_func(val, data, sizeof(data));                       \
12207     UPB_UNUSED(handler_data);                                                \
12208     CHKFMT(length);                                                          \
12209     print_data(p, data, length);                                             \
12210     return true;                                                             \
12211   }                                                                          \
12212   static bool scalar_##type(void *closure, const void *handler_data,         \
12213                             type val) {                                      \
12214     CHK(putkey(closure, handler_data));                                      \
12215     CHK(put##type(closure, handler_data, val));                              \
12216     return true;                                                             \
12217   }                                                                          \
12218   static bool repeated_##type(void *closure, const void *handler_data,       \
12219                               type val) {                                    \
12220     upb_json_printer *p = closure;                                           \
12221     print_comma(p);                                                          \
12222     CHK(put##type(closure, handler_data, val));                              \
12223     return true;                                                             \
12224   }
12225 
12226 #define TYPE_HANDLERS_MAPKEY(type, fmt_func)                                 \
12227   static bool putmapkey_##type(void *closure, const void *handler_data,      \
12228                             type val) {                                      \
12229     upb_json_printer *p = closure;                                           \
12230     print_data(p, "\"", 1);                                                  \
12231     CHK(put##type(closure, handler_data, val));                              \
12232     print_data(p, "\":", 2);                                                 \
12233     return true;                                                             \
12234   }
12235 
12236 TYPE_HANDLERS(double,   fmt_double)
12237 TYPE_HANDLERS(float,    fmt_float)
12238 TYPE_HANDLERS(bool,     fmt_bool)
12239 TYPE_HANDLERS(int32_t,  fmt_int64)
12240 TYPE_HANDLERS(uint32_t, fmt_int64)
12241 TYPE_HANDLERS(int64_t,  fmt_int64)
12242 TYPE_HANDLERS(uint64_t, fmt_uint64)
12243 
12244 /* double and float are not allowed to be map keys. */
12245 TYPE_HANDLERS_MAPKEY(bool,     fmt_bool)
12246 TYPE_HANDLERS_MAPKEY(int32_t,  fmt_int64)
12247 TYPE_HANDLERS_MAPKEY(uint32_t, fmt_int64)
12248 TYPE_HANDLERS_MAPKEY(int64_t,  fmt_int64)
12249 TYPE_HANDLERS_MAPKEY(uint64_t, fmt_uint64)
12250 
12251 #undef TYPE_HANDLERS
12252 #undef TYPE_HANDLERS_MAPKEY
12253 
12254 typedef struct {
12255   void *keyname;
12256   const upb_enumdef *enumdef;
12257 } EnumHandlerData;
12258 
scalar_enum(void * closure,const void * handler_data,int32_t val)12259 static bool scalar_enum(void *closure, const void *handler_data,
12260                         int32_t val) {
12261   const EnumHandlerData *hd = handler_data;
12262   upb_json_printer *p = closure;
12263   const char *symbolic_name;
12264 
12265   CHK(putkey(closure, hd->keyname));
12266 
12267   symbolic_name = upb_enumdef_iton(hd->enumdef, val);
12268   if (symbolic_name) {
12269     print_data(p, "\"", 1);
12270     putstring(p, symbolic_name, strlen(symbolic_name));
12271     print_data(p, "\"", 1);
12272   } else {
12273     putint32_t(closure, NULL, val);
12274   }
12275 
12276   return true;
12277 }
12278 
print_enum_symbolic_name(upb_json_printer * p,const upb_enumdef * def,int32_t val)12279 static void print_enum_symbolic_name(upb_json_printer *p,
12280                                      const upb_enumdef *def,
12281                                      int32_t val) {
12282   const char *symbolic_name = upb_enumdef_iton(def, val);
12283   if (symbolic_name) {
12284     print_data(p, "\"", 1);
12285     putstring(p, symbolic_name, strlen(symbolic_name));
12286     print_data(p, "\"", 1);
12287   } else {
12288     putint32_t(p, NULL, val);
12289   }
12290 }
12291 
repeated_enum(void * closure,const void * handler_data,int32_t val)12292 static bool repeated_enum(void *closure, const void *handler_data,
12293                           int32_t val) {
12294   const EnumHandlerData *hd = handler_data;
12295   upb_json_printer *p = closure;
12296   print_comma(p);
12297 
12298   print_enum_symbolic_name(p, hd->enumdef, val);
12299 
12300   return true;
12301 }
12302 
mapvalue_enum(void * closure,const void * handler_data,int32_t val)12303 static bool mapvalue_enum(void *closure, const void *handler_data,
12304                           int32_t val) {
12305   const EnumHandlerData *hd = handler_data;
12306   upb_json_printer *p = closure;
12307 
12308   print_enum_symbolic_name(p, hd->enumdef, val);
12309 
12310   return true;
12311 }
12312 
scalar_startsubmsg(void * closure,const void * handler_data)12313 static void *scalar_startsubmsg(void *closure, const void *handler_data) {
12314   return putkey(closure, handler_data) ? closure : UPB_BREAK;
12315 }
12316 
repeated_startsubmsg(void * closure,const void * handler_data)12317 static void *repeated_startsubmsg(void *closure, const void *handler_data) {
12318   upb_json_printer *p = closure;
12319   UPB_UNUSED(handler_data);
12320   print_comma(p);
12321   return closure;
12322 }
12323 
start_frame(upb_json_printer * p)12324 static void start_frame(upb_json_printer *p) {
12325   p->depth_++;
12326   p->first_elem_[p->depth_] = true;
12327   print_data(p, "{", 1);
12328 }
12329 
end_frame(upb_json_printer * p)12330 static void end_frame(upb_json_printer *p) {
12331   print_data(p, "}", 1);
12332   p->depth_--;
12333 }
12334 
printer_startmsg(void * closure,const void * handler_data)12335 static bool printer_startmsg(void *closure, const void *handler_data) {
12336   upb_json_printer *p = closure;
12337   UPB_UNUSED(handler_data);
12338   if (p->depth_ == 0) {
12339     upb_bytessink_start(p->output_, 0, &p->subc_);
12340   }
12341   start_frame(p);
12342   return true;
12343 }
12344 
printer_endmsg(void * closure,const void * handler_data,upb_status * s)12345 static bool printer_endmsg(void *closure, const void *handler_data, upb_status *s) {
12346   upb_json_printer *p = closure;
12347   UPB_UNUSED(handler_data);
12348   UPB_UNUSED(s);
12349   end_frame(p);
12350   if (p->depth_ == 0) {
12351     upb_bytessink_end(p->output_);
12352   }
12353   return true;
12354 }
12355 
startseq(void * closure,const void * handler_data)12356 static void *startseq(void *closure, const void *handler_data) {
12357   upb_json_printer *p = closure;
12358   CHK(putkey(closure, handler_data));
12359   p->depth_++;
12360   p->first_elem_[p->depth_] = true;
12361   print_data(p, "[", 1);
12362   return closure;
12363 }
12364 
endseq(void * closure,const void * handler_data)12365 static bool endseq(void *closure, const void *handler_data) {
12366   upb_json_printer *p = closure;
12367   UPB_UNUSED(handler_data);
12368   print_data(p, "]", 1);
12369   p->depth_--;
12370   return true;
12371 }
12372 
startmap(void * closure,const void * handler_data)12373 static void *startmap(void *closure, const void *handler_data) {
12374   upb_json_printer *p = closure;
12375   CHK(putkey(closure, handler_data));
12376   p->depth_++;
12377   p->first_elem_[p->depth_] = true;
12378   print_data(p, "{", 1);
12379   return closure;
12380 }
12381 
endmap(void * closure,const void * handler_data)12382 static bool endmap(void *closure, const void *handler_data) {
12383   upb_json_printer *p = closure;
12384   UPB_UNUSED(handler_data);
12385   print_data(p, "}", 1);
12386   p->depth_--;
12387   return true;
12388 }
12389 
putstr(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)12390 static size_t putstr(void *closure, const void *handler_data, const char *str,
12391                      size_t len, const upb_bufhandle *handle) {
12392   upb_json_printer *p = closure;
12393   UPB_UNUSED(handler_data);
12394   UPB_UNUSED(handle);
12395   putstring(p, str, len);
12396   return len;
12397 }
12398 
12399 /* This has to Base64 encode the bytes, because JSON has no "bytes" type. */
putbytes(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)12400 static size_t putbytes(void *closure, const void *handler_data, const char *str,
12401                        size_t len, const upb_bufhandle *handle) {
12402   upb_json_printer *p = closure;
12403 
12404   /* This is the regular base64, not the "web-safe" version. */
12405   static const char base64[] =
12406       "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
12407 
12408   /* Base64-encode. */
12409   char data[16000];
12410   const char *limit = data + sizeof(data);
12411   const unsigned char *from = (const unsigned char*)str;
12412   char *to = data;
12413   size_t remaining = len;
12414   size_t bytes;
12415 
12416   UPB_UNUSED(handler_data);
12417   UPB_UNUSED(handle);
12418 
12419   while (remaining > 2) {
12420     /* TODO(haberman): handle encoded lengths > sizeof(data) */
12421     UPB_ASSERT_VAR(limit, (limit - to) >= 4);
12422 
12423     to[0] = base64[from[0] >> 2];
12424     to[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)];
12425     to[2] = base64[((from[1] & 0xf) << 2) | (from[2] >> 6)];
12426     to[3] = base64[from[2] & 0x3f];
12427 
12428     remaining -= 3;
12429     to += 4;
12430     from += 3;
12431   }
12432 
12433   switch (remaining) {
12434     case 2:
12435       to[0] = base64[from[0] >> 2];
12436       to[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)];
12437       to[2] = base64[(from[1] & 0xf) << 2];
12438       to[3] = '=';
12439       to += 4;
12440       from += 2;
12441       break;
12442     case 1:
12443       to[0] = base64[from[0] >> 2];
12444       to[1] = base64[((from[0] & 0x3) << 4)];
12445       to[2] = '=';
12446       to[3] = '=';
12447       to += 4;
12448       from += 1;
12449       break;
12450   }
12451 
12452   bytes = to - data;
12453   print_data(p, "\"", 1);
12454   putstring(p, data, bytes);
12455   print_data(p, "\"", 1);
12456   return len;
12457 }
12458 
scalar_startstr(void * closure,const void * handler_data,size_t size_hint)12459 static void *scalar_startstr(void *closure, const void *handler_data,
12460                              size_t size_hint) {
12461   upb_json_printer *p = closure;
12462   UPB_UNUSED(handler_data);
12463   UPB_UNUSED(size_hint);
12464   CHK(putkey(closure, handler_data));
12465   print_data(p, "\"", 1);
12466   return p;
12467 }
12468 
scalar_str(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)12469 static size_t scalar_str(void *closure, const void *handler_data,
12470                          const char *str, size_t len,
12471                          const upb_bufhandle *handle) {
12472   CHK(putstr(closure, handler_data, str, len, handle));
12473   return len;
12474 }
12475 
scalar_endstr(void * closure,const void * handler_data)12476 static bool scalar_endstr(void *closure, const void *handler_data) {
12477   upb_json_printer *p = closure;
12478   UPB_UNUSED(handler_data);
12479   print_data(p, "\"", 1);
12480   return true;
12481 }
12482 
repeated_startstr(void * closure,const void * handler_data,size_t size_hint)12483 static void *repeated_startstr(void *closure, const void *handler_data,
12484                                size_t size_hint) {
12485   upb_json_printer *p = closure;
12486   UPB_UNUSED(handler_data);
12487   UPB_UNUSED(size_hint);
12488   print_comma(p);
12489   print_data(p, "\"", 1);
12490   return p;
12491 }
12492 
repeated_str(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)12493 static size_t repeated_str(void *closure, const void *handler_data,
12494                            const char *str, size_t len,
12495                            const upb_bufhandle *handle) {
12496   CHK(putstr(closure, handler_data, str, len, handle));
12497   return len;
12498 }
12499 
repeated_endstr(void * closure,const void * handler_data)12500 static bool repeated_endstr(void *closure, const void *handler_data) {
12501   upb_json_printer *p = closure;
12502   UPB_UNUSED(handler_data);
12503   print_data(p, "\"", 1);
12504   return true;
12505 }
12506 
mapkeyval_startstr(void * closure,const void * handler_data,size_t size_hint)12507 static void *mapkeyval_startstr(void *closure, const void *handler_data,
12508                                 size_t size_hint) {
12509   upb_json_printer *p = closure;
12510   UPB_UNUSED(handler_data);
12511   UPB_UNUSED(size_hint);
12512   print_data(p, "\"", 1);
12513   return p;
12514 }
12515 
mapkey_str(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)12516 static size_t mapkey_str(void *closure, const void *handler_data,
12517                          const char *str, size_t len,
12518                          const upb_bufhandle *handle) {
12519   CHK(putstr(closure, handler_data, str, len, handle));
12520   return len;
12521 }
12522 
mapkey_endstr(void * closure,const void * handler_data)12523 static bool mapkey_endstr(void *closure, const void *handler_data) {
12524   upb_json_printer *p = closure;
12525   UPB_UNUSED(handler_data);
12526   print_data(p, "\":", 2);
12527   return true;
12528 }
12529 
mapvalue_endstr(void * closure,const void * handler_data)12530 static bool mapvalue_endstr(void *closure, const void *handler_data) {
12531   upb_json_printer *p = closure;
12532   UPB_UNUSED(handler_data);
12533   print_data(p, "\"", 1);
12534   return true;
12535 }
12536 
scalar_bytes(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)12537 static size_t scalar_bytes(void *closure, const void *handler_data,
12538                            const char *str, size_t len,
12539                            const upb_bufhandle *handle) {
12540   CHK(putkey(closure, handler_data));
12541   CHK(putbytes(closure, handler_data, str, len, handle));
12542   return len;
12543 }
12544 
repeated_bytes(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)12545 static size_t repeated_bytes(void *closure, const void *handler_data,
12546                              const char *str, size_t len,
12547                              const upb_bufhandle *handle) {
12548   upb_json_printer *p = closure;
12549   print_comma(p);
12550   CHK(putbytes(closure, handler_data, str, len, handle));
12551   return len;
12552 }
12553 
mapkey_bytes(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)12554 static size_t mapkey_bytes(void *closure, const void *handler_data,
12555                            const char *str, size_t len,
12556                            const upb_bufhandle *handle) {
12557   upb_json_printer *p = closure;
12558   CHK(putbytes(closure, handler_data, str, len, handle));
12559   print_data(p, ":", 1);
12560   return len;
12561 }
12562 
set_enum_hd(upb_handlers * h,const upb_fielddef * f,bool preserve_fieldnames,upb_handlerattr * attr)12563 static void set_enum_hd(upb_handlers *h,
12564                         const upb_fielddef *f,
12565                         bool preserve_fieldnames,
12566                         upb_handlerattr *attr) {
12567   EnumHandlerData *hd = upb_gmalloc(sizeof(EnumHandlerData));
12568   hd->enumdef = (const upb_enumdef *)upb_fielddef_subdef(f);
12569   hd->keyname = newstrpc(h, f, preserve_fieldnames);
12570   upb_handlers_addcleanup(h, hd, upb_gfree);
12571   upb_handlerattr_sethandlerdata(attr, hd);
12572 }
12573 
12574 /* Set up handlers for a mapentry submessage (i.e., an individual key/value pair
12575  * in a map).
12576  *
12577  * TODO: Handle missing key, missing value, out-of-order key/value, or repeated
12578  * key or value cases properly. The right way to do this is to allocate a
12579  * temporary structure at the start of a mapentry submessage, store key and
12580  * value data in it as key and value handlers are called, and then print the
12581  * key/value pair once at the end of the submessage. If we don't do this, we
12582  * should at least detect the case and throw an error. However, so far all of
12583  * our sources that emit mapentry messages do so canonically (with one key
12584  * field, and then one value field), so this is not a pressing concern at the
12585  * moment. */
printer_sethandlers_mapentry(const void * closure,bool preserve_fieldnames,upb_handlers * h)12586 void printer_sethandlers_mapentry(const void *closure, bool preserve_fieldnames,
12587                                   upb_handlers *h) {
12588   const upb_msgdef *md = upb_handlers_msgdef(h);
12589 
12590   /* A mapentry message is printed simply as '"key": value'. Rather than
12591    * special-case key and value for every type below, we just handle both
12592    * fields explicitly here. */
12593   const upb_fielddef* key_field = upb_msgdef_itof(md, UPB_MAPENTRY_KEY);
12594   const upb_fielddef* value_field = upb_msgdef_itof(md, UPB_MAPENTRY_VALUE);
12595 
12596   upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
12597 
12598   UPB_UNUSED(closure);
12599 
12600   switch (upb_fielddef_type(key_field)) {
12601     case UPB_TYPE_INT32:
12602       upb_handlers_setint32(h, key_field, putmapkey_int32_t, &empty_attr);
12603       break;
12604     case UPB_TYPE_INT64:
12605       upb_handlers_setint64(h, key_field, putmapkey_int64_t, &empty_attr);
12606       break;
12607     case UPB_TYPE_UINT32:
12608       upb_handlers_setuint32(h, key_field, putmapkey_uint32_t, &empty_attr);
12609       break;
12610     case UPB_TYPE_UINT64:
12611       upb_handlers_setuint64(h, key_field, putmapkey_uint64_t, &empty_attr);
12612       break;
12613     case UPB_TYPE_BOOL:
12614       upb_handlers_setbool(h, key_field, putmapkey_bool, &empty_attr);
12615       break;
12616     case UPB_TYPE_STRING:
12617       upb_handlers_setstartstr(h, key_field, mapkeyval_startstr, &empty_attr);
12618       upb_handlers_setstring(h, key_field, mapkey_str, &empty_attr);
12619       upb_handlers_setendstr(h, key_field, mapkey_endstr, &empty_attr);
12620       break;
12621     case UPB_TYPE_BYTES:
12622       upb_handlers_setstring(h, key_field, mapkey_bytes, &empty_attr);
12623       break;
12624     default:
12625       assert(false);
12626       break;
12627   }
12628 
12629   switch (upb_fielddef_type(value_field)) {
12630     case UPB_TYPE_INT32:
12631       upb_handlers_setint32(h, value_field, putint32_t, &empty_attr);
12632       break;
12633     case UPB_TYPE_INT64:
12634       upb_handlers_setint64(h, value_field, putint64_t, &empty_attr);
12635       break;
12636     case UPB_TYPE_UINT32:
12637       upb_handlers_setuint32(h, value_field, putuint32_t, &empty_attr);
12638       break;
12639     case UPB_TYPE_UINT64:
12640       upb_handlers_setuint64(h, value_field, putuint64_t, &empty_attr);
12641       break;
12642     case UPB_TYPE_BOOL:
12643       upb_handlers_setbool(h, value_field, putbool, &empty_attr);
12644       break;
12645     case UPB_TYPE_FLOAT:
12646       upb_handlers_setfloat(h, value_field, putfloat, &empty_attr);
12647       break;
12648     case UPB_TYPE_DOUBLE:
12649       upb_handlers_setdouble(h, value_field, putdouble, &empty_attr);
12650       break;
12651     case UPB_TYPE_STRING:
12652       upb_handlers_setstartstr(h, value_field, mapkeyval_startstr, &empty_attr);
12653       upb_handlers_setstring(h, value_field, putstr, &empty_attr);
12654       upb_handlers_setendstr(h, value_field, mapvalue_endstr, &empty_attr);
12655       break;
12656     case UPB_TYPE_BYTES:
12657       upb_handlers_setstring(h, value_field, putbytes, &empty_attr);
12658       break;
12659     case UPB_TYPE_ENUM: {
12660       upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER;
12661       set_enum_hd(h, value_field, preserve_fieldnames, &enum_attr);
12662       upb_handlers_setint32(h, value_field, mapvalue_enum, &enum_attr);
12663       upb_handlerattr_uninit(&enum_attr);
12664       break;
12665     }
12666     case UPB_TYPE_MESSAGE:
12667       /* No handler necessary -- the submsg handlers will print the message
12668        * as appropriate. */
12669       break;
12670   }
12671 
12672   upb_handlerattr_uninit(&empty_attr);
12673 }
12674 
printer_sethandlers(const void * closure,upb_handlers * h)12675 void printer_sethandlers(const void *closure, upb_handlers *h) {
12676   const upb_msgdef *md = upb_handlers_msgdef(h);
12677   bool is_mapentry = upb_msgdef_mapentry(md);
12678   upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
12679   upb_msg_field_iter i;
12680   const bool *preserve_fieldnames_ptr = closure;
12681   const bool preserve_fieldnames = *preserve_fieldnames_ptr;
12682 
12683   if (is_mapentry) {
12684     /* mapentry messages are sufficiently different that we handle them
12685      * separately. */
12686     printer_sethandlers_mapentry(closure, preserve_fieldnames, h);
12687     return;
12688   }
12689 
12690   upb_handlers_setstartmsg(h, printer_startmsg, &empty_attr);
12691   upb_handlers_setendmsg(h, printer_endmsg, &empty_attr);
12692 
12693 #define TYPE(type, name, ctype)                                               \
12694   case type:                                                                  \
12695     if (upb_fielddef_isseq(f)) {                                              \
12696       upb_handlers_set##name(h, f, repeated_##ctype, &empty_attr);            \
12697     } else {                                                                  \
12698       upb_handlers_set##name(h, f, scalar_##ctype, &name_attr);               \
12699     }                                                                         \
12700     break;
12701 
12702   upb_msg_field_begin(&i, md);
12703   for(; !upb_msg_field_done(&i); upb_msg_field_next(&i)) {
12704     const upb_fielddef *f = upb_msg_iter_field(&i);
12705 
12706     upb_handlerattr name_attr = UPB_HANDLERATTR_INITIALIZER;
12707     upb_handlerattr_sethandlerdata(&name_attr,
12708                                    newstrpc(h, f, preserve_fieldnames));
12709 
12710     if (upb_fielddef_ismap(f)) {
12711       upb_handlers_setstartseq(h, f, startmap, &name_attr);
12712       upb_handlers_setendseq(h, f, endmap, &name_attr);
12713     } else if (upb_fielddef_isseq(f)) {
12714       upb_handlers_setstartseq(h, f, startseq, &name_attr);
12715       upb_handlers_setendseq(h, f, endseq, &empty_attr);
12716     }
12717 
12718     switch (upb_fielddef_type(f)) {
12719       TYPE(UPB_TYPE_FLOAT,  float,  float);
12720       TYPE(UPB_TYPE_DOUBLE, double, double);
12721       TYPE(UPB_TYPE_BOOL,   bool,   bool);
12722       TYPE(UPB_TYPE_INT32,  int32,  int32_t);
12723       TYPE(UPB_TYPE_UINT32, uint32, uint32_t);
12724       TYPE(UPB_TYPE_INT64,  int64,  int64_t);
12725       TYPE(UPB_TYPE_UINT64, uint64, uint64_t);
12726       case UPB_TYPE_ENUM: {
12727         /* For now, we always emit symbolic names for enums. We may want an
12728          * option later to control this behavior, but we will wait for a real
12729          * need first. */
12730         upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER;
12731         set_enum_hd(h, f, preserve_fieldnames, &enum_attr);
12732 
12733         if (upb_fielddef_isseq(f)) {
12734           upb_handlers_setint32(h, f, repeated_enum, &enum_attr);
12735         } else {
12736           upb_handlers_setint32(h, f, scalar_enum, &enum_attr);
12737         }
12738 
12739         upb_handlerattr_uninit(&enum_attr);
12740         break;
12741       }
12742       case UPB_TYPE_STRING:
12743         if (upb_fielddef_isseq(f)) {
12744           upb_handlers_setstartstr(h, f, repeated_startstr, &empty_attr);
12745           upb_handlers_setstring(h, f, repeated_str, &empty_attr);
12746           upb_handlers_setendstr(h, f, repeated_endstr, &empty_attr);
12747         } else {
12748           upb_handlers_setstartstr(h, f, scalar_startstr, &name_attr);
12749           upb_handlers_setstring(h, f, scalar_str, &empty_attr);
12750           upb_handlers_setendstr(h, f, scalar_endstr, &empty_attr);
12751         }
12752         break;
12753       case UPB_TYPE_BYTES:
12754         /* XXX: this doesn't support strings that span buffers yet. The base64
12755          * encoder will need to be made resumable for this to work properly. */
12756         if (upb_fielddef_isseq(f)) {
12757           upb_handlers_setstring(h, f, repeated_bytes, &empty_attr);
12758         } else {
12759           upb_handlers_setstring(h, f, scalar_bytes, &name_attr);
12760         }
12761         break;
12762       case UPB_TYPE_MESSAGE:
12763         if (upb_fielddef_isseq(f)) {
12764           upb_handlers_setstartsubmsg(h, f, repeated_startsubmsg, &name_attr);
12765         } else {
12766           upb_handlers_setstartsubmsg(h, f, scalar_startsubmsg, &name_attr);
12767         }
12768         break;
12769     }
12770 
12771     upb_handlerattr_uninit(&name_attr);
12772   }
12773 
12774   upb_handlerattr_uninit(&empty_attr);
12775 #undef TYPE
12776 }
12777 
json_printer_reset(upb_json_printer * p)12778 static void json_printer_reset(upb_json_printer *p) {
12779   p->depth_ = 0;
12780 }
12781 
12782 
12783 /* Public API *****************************************************************/
12784 
upb_json_printer_create(upb_env * e,const upb_handlers * h,upb_bytessink * output)12785 upb_json_printer *upb_json_printer_create(upb_env *e, const upb_handlers *h,
12786                                           upb_bytessink *output) {
12787 #ifndef NDEBUG
12788   size_t size_before = upb_env_bytesallocated(e);
12789 #endif
12790 
12791   upb_json_printer *p = upb_env_malloc(e, sizeof(upb_json_printer));
12792   if (!p) return NULL;
12793 
12794   p->output_ = output;
12795   json_printer_reset(p);
12796   upb_sink_reset(&p->input_, h, p);
12797 
12798   /* If this fails, increase the value in printer.h. */
12799   assert(upb_env_bytesallocated(e) - size_before <= UPB_JSON_PRINTER_SIZE);
12800   return p;
12801 }
12802 
upb_json_printer_input(upb_json_printer * p)12803 upb_sink *upb_json_printer_input(upb_json_printer *p) {
12804   return &p->input_;
12805 }
12806 
upb_json_printer_newhandlers(const upb_msgdef * md,bool preserve_fieldnames,const void * owner)12807 const upb_handlers *upb_json_printer_newhandlers(const upb_msgdef *md,
12808                                                  bool preserve_fieldnames,
12809                                                  const void *owner) {
12810   return upb_handlers_newfrozen(
12811       md, owner, printer_sethandlers, &preserve_fieldnames);
12812 }
12813