1 // Amalgamated source file
2 #include "upb.h"
3 
4 
5 #include <stdlib.h>
6 #include <string.h>
7 
8 typedef struct {
9   size_t len;
10   char str[1];  /* Null-terminated string data follows. */
11 } str_t;
12 
newstr(const char * data,size_t len)13 static str_t *newstr(const char *data, size_t len) {
14   str_t *ret = malloc(sizeof(*ret) + len);
15   if (!ret) return NULL;
16   ret->len = len;
17   memcpy(ret->str, data, len);
18   ret->str[len] = '\0';
19   return ret;
20 }
21 
freestr(str_t * s)22 static void freestr(str_t *s) { free(s); }
23 
24 /* isalpha() etc. from <ctype.h> are locale-dependent, which we don't want. */
upb_isbetween(char c,char low,char high)25 static bool upb_isbetween(char c, char low, char high) {
26   return c >= low && c <= high;
27 }
28 
upb_isletter(char c)29 static bool upb_isletter(char c) {
30   return upb_isbetween(c, 'A', 'Z') || upb_isbetween(c, 'a', 'z') || c == '_';
31 }
32 
upb_isalphanum(char c)33 static bool upb_isalphanum(char c) {
34   return upb_isletter(c) || upb_isbetween(c, '0', '9');
35 }
36 
upb_isident(const char * str,size_t len,bool full,upb_status * s)37 static bool upb_isident(const char *str, size_t len, bool full, upb_status *s) {
38   bool start = true;
39   size_t i;
40   for (i = 0; i < len; i++) {
41     char c = str[i];
42     if (c == '.') {
43       if (start || !full) {
44         upb_status_seterrf(s, "invalid name: unexpected '.' (%s)", str);
45         return false;
46       }
47       start = true;
48     } else if (start) {
49       if (!upb_isletter(c)) {
50         upb_status_seterrf(
51             s, "invalid name: path components must start with a letter (%s)",
52             str);
53         return false;
54       }
55       start = false;
56     } else {
57       if (!upb_isalphanum(c)) {
58         upb_status_seterrf(s, "invalid name: non-alphanumeric character (%s)",
59                            str);
60         return false;
61       }
62     }
63   }
64   return !start;
65 }
66 
67 
68 /* upb_def ********************************************************************/
69 
upb_def_type(const upb_def * d)70 upb_deftype_t upb_def_type(const upb_def *d) { return d->type; }
71 
upb_def_fullname(const upb_def * d)72 const char *upb_def_fullname(const upb_def *d) { return d->fullname; }
73 
upb_def_setfullname(upb_def * def,const char * fullname,upb_status * s)74 bool upb_def_setfullname(upb_def *def, const char *fullname, upb_status *s) {
75   assert(!upb_def_isfrozen(def));
76   if (!upb_isident(fullname, strlen(fullname), true, s)) return false;
77   free((void*)def->fullname);
78   def->fullname = upb_strdup(fullname);
79   return true;
80 }
81 
upb_def_dup(const upb_def * def,const void * o)82 upb_def *upb_def_dup(const upb_def *def, const void *o) {
83   switch (def->type) {
84     case UPB_DEF_MSG:
85       return upb_msgdef_upcast_mutable(
86           upb_msgdef_dup(upb_downcast_msgdef(def), o));
87     case UPB_DEF_FIELD:
88       return upb_fielddef_upcast_mutable(
89           upb_fielddef_dup(upb_downcast_fielddef(def), o));
90     case UPB_DEF_ENUM:
91       return upb_enumdef_upcast_mutable(
92           upb_enumdef_dup(upb_downcast_enumdef(def), o));
93     default: assert(false); return NULL;
94   }
95 }
96 
upb_def_init(upb_def * def,upb_deftype_t type,const struct upb_refcounted_vtbl * vtbl,const void * owner)97 static bool upb_def_init(upb_def *def, upb_deftype_t type,
98                          const struct upb_refcounted_vtbl *vtbl,
99                          const void *owner) {
100   if (!upb_refcounted_init(upb_def_upcast_mutable(def), vtbl, owner)) return false;
101   def->type = type;
102   def->fullname = NULL;
103   def->came_from_user = false;
104   return true;
105 }
106 
upb_def_uninit(upb_def * def)107 static void upb_def_uninit(upb_def *def) {
108   free((void*)def->fullname);
109 }
110 
msgdef_name(const upb_msgdef * m)111 static const char *msgdef_name(const upb_msgdef *m) {
112   const char *name = upb_def_fullname(upb_msgdef_upcast(m));
113   return name ? name : "(anonymous)";
114 }
115 
upb_validate_field(upb_fielddef * f,upb_status * s)116 static bool upb_validate_field(upb_fielddef *f, upb_status *s) {
117   if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) {
118     upb_status_seterrmsg(s, "fielddef must have name and number set");
119     return false;
120   }
121 
122   if (!f->type_is_set_) {
123     upb_status_seterrmsg(s, "fielddef type was not initialized");
124     return false;
125   }
126 
127   if (upb_fielddef_lazy(f) &&
128       upb_fielddef_descriptortype(f) != UPB_DESCRIPTOR_TYPE_MESSAGE) {
129     upb_status_seterrmsg(s,
130                          "only length-delimited submessage fields may be lazy");
131     return false;
132   }
133 
134   if (upb_fielddef_hassubdef(f)) {
135     const upb_def *subdef;
136 
137     if (f->subdef_is_symbolic) {
138       upb_status_seterrf(s, "field '%s.%s' has not been resolved",
139                          msgdef_name(f->msg.def), upb_fielddef_name(f));
140       return false;
141     }
142 
143     subdef = upb_fielddef_subdef(f);
144     if (subdef == NULL) {
145       upb_status_seterrf(s, "field %s.%s is missing required subdef",
146                          msgdef_name(f->msg.def), upb_fielddef_name(f));
147       return false;
148     }
149 
150     if (!upb_def_isfrozen(subdef) && !subdef->came_from_user) {
151       upb_status_seterrf(s,
152                          "subdef of field %s.%s is not frozen or being frozen",
153                          msgdef_name(f->msg.def), upb_fielddef_name(f));
154       return false;
155     }
156   }
157 
158   if (upb_fielddef_type(f) == UPB_TYPE_ENUM) {
159     bool has_default_name = upb_fielddef_enumhasdefaultstr(f);
160     bool has_default_number = upb_fielddef_enumhasdefaultint32(f);
161 
162     /* Previously verified by upb_validate_enumdef(). */
163     assert(upb_enumdef_numvals(upb_fielddef_enumsubdef(f)) > 0);
164 
165     /* We've already validated that we have an associated enumdef and that it
166      * has at least one member, so at least one of these should be true.
167      * Because if the user didn't set anything, we'll pick up the enum's
168      * default, but if the user *did* set something we should at least pick up
169      * the one they set (int32 or string). */
170     assert(has_default_name || has_default_number);
171 
172     if (!has_default_name) {
173       upb_status_seterrf(s,
174                          "enum default for field %s.%s (%d) is not in the enum",
175                          msgdef_name(f->msg.def), upb_fielddef_name(f),
176                          upb_fielddef_defaultint32(f));
177       return false;
178     }
179 
180     if (!has_default_number) {
181       upb_status_seterrf(s,
182                          "enum default for field %s.%s (%s) is not in the enum",
183                          msgdef_name(f->msg.def), upb_fielddef_name(f),
184                          upb_fielddef_defaultstr(f, NULL));
185       return false;
186     }
187 
188     /* Lift the effective numeric default into the field's default slot, in case
189      * we were only getting it "by reference" from the enumdef. */
190     upb_fielddef_setdefaultint32(f, upb_fielddef_defaultint32(f));
191   }
192 
193   /* Ensure that MapEntry submessages only appear as repeated fields, not
194    * optional/required (singular) fields. */
195   if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
196       upb_fielddef_msgsubdef(f) != NULL) {
197     const upb_msgdef *subdef = upb_fielddef_msgsubdef(f);
198     if (upb_msgdef_mapentry(subdef) && !upb_fielddef_isseq(f)) {
199       upb_status_seterrf(s,
200                          "Field %s refers to mapentry message but is not "
201                          "a repeated field",
202                          upb_fielddef_name(f) ? upb_fielddef_name(f) :
203                          "(unnamed)");
204       return false;
205     }
206   }
207 
208   return true;
209 }
210 
upb_validate_enumdef(const upb_enumdef * e,upb_status * s)211 static bool upb_validate_enumdef(const upb_enumdef *e, upb_status *s) {
212   if (upb_enumdef_numvals(e) == 0) {
213     upb_status_seterrf(s, "enum %s has no members (must have at least one)",
214                        upb_enumdef_fullname(e));
215     return false;
216   }
217 
218   return true;
219 }
220 
221 /* All submessage fields are lower than all other fields.
222  * Secondly, fields are increasing in order. */
field_rank(const upb_fielddef * f)223 uint32_t field_rank(const upb_fielddef *f) {
224   uint32_t ret = upb_fielddef_number(f);
225   const uint32_t high_bit = 1 << 30;
226   assert(ret < high_bit);
227   if (!upb_fielddef_issubmsg(f))
228     ret |= high_bit;
229   return ret;
230 }
231 
cmp_fields(const void * p1,const void * p2)232 int cmp_fields(const void *p1, const void *p2) {
233   const upb_fielddef *f1 = *(upb_fielddef*const*)p1;
234   const upb_fielddef *f2 = *(upb_fielddef*const*)p2;
235   return field_rank(f1) - field_rank(f2);
236 }
237 
assign_msg_indices(upb_msgdef * m,upb_status * s)238 static bool assign_msg_indices(upb_msgdef *m, upb_status *s) {
239   /* Sort fields.  upb internally relies on UPB_TYPE_MESSAGE fields having the
240    * lowest indexes, but we do not publicly guarantee this. */
241   upb_msg_field_iter j;
242   int i;
243   uint32_t selector;
244   int n = upb_msgdef_numfields(m);
245   upb_fielddef **fields = malloc(n * sizeof(*fields));
246   if (!fields) return false;
247 
248   m->submsg_field_count = 0;
249   for(i = 0, upb_msg_field_begin(&j, m);
250       !upb_msg_field_done(&j);
251       upb_msg_field_next(&j), i++) {
252     upb_fielddef *f = upb_msg_iter_field(&j);
253     assert(f->msg.def == m);
254     if (!upb_validate_field(f, s)) {
255       free(fields);
256       return false;
257     }
258     if (upb_fielddef_issubmsg(f)) {
259       m->submsg_field_count++;
260     }
261     fields[i] = f;
262   }
263 
264   qsort(fields, n, sizeof(*fields), cmp_fields);
265 
266   selector = UPB_STATIC_SELECTOR_COUNT + m->submsg_field_count;
267   for (i = 0; i < n; i++) {
268     upb_fielddef *f = fields[i];
269     f->index_ = i;
270     f->selector_base = selector + upb_handlers_selectorbaseoffset(f);
271     selector += upb_handlers_selectorcount(f);
272   }
273   m->selector_count = selector;
274 
275 #ifndef NDEBUG
276   {
277     /* Verify that all selectors for the message are distinct. */
278 #define TRY(type) \
279     if (upb_handlers_getselector(f, type, &sel)) upb_inttable_insert(&t, sel, v);
280 
281     upb_inttable t;
282     upb_value v;
283     upb_selector_t sel;
284 
285     upb_inttable_init(&t, UPB_CTYPE_BOOL);
286     v = upb_value_bool(true);
287     upb_inttable_insert(&t, UPB_STARTMSG_SELECTOR, v);
288     upb_inttable_insert(&t, UPB_ENDMSG_SELECTOR, v);
289     for(upb_msg_field_begin(&j, m);
290         !upb_msg_field_done(&j);
291         upb_msg_field_next(&j)) {
292       upb_fielddef *f = upb_msg_iter_field(&j);
293       /* These calls will assert-fail in upb_table if the value already
294        * exists. */
295       TRY(UPB_HANDLER_INT32);
296       TRY(UPB_HANDLER_INT64)
297       TRY(UPB_HANDLER_UINT32)
298       TRY(UPB_HANDLER_UINT64)
299       TRY(UPB_HANDLER_FLOAT)
300       TRY(UPB_HANDLER_DOUBLE)
301       TRY(UPB_HANDLER_BOOL)
302       TRY(UPB_HANDLER_STARTSTR)
303       TRY(UPB_HANDLER_STRING)
304       TRY(UPB_HANDLER_ENDSTR)
305       TRY(UPB_HANDLER_STARTSUBMSG)
306       TRY(UPB_HANDLER_ENDSUBMSG)
307       TRY(UPB_HANDLER_STARTSEQ)
308       TRY(UPB_HANDLER_ENDSEQ)
309     }
310     upb_inttable_uninit(&t);
311   }
312 #undef TRY
313 #endif
314 
315   free(fields);
316   return true;
317 }
318 
upb_def_freeze(upb_def * const * defs,int n,upb_status * s)319 bool upb_def_freeze(upb_def *const* defs, int n, upb_status *s) {
320   int i;
321   int maxdepth;
322   bool ret;
323   upb_status_clear(s);
324 
325   /* First perform validation, in two passes so we can check that we have a
326    * transitive closure without needing to search. */
327   for (i = 0; i < n; i++) {
328     upb_def *def = defs[i];
329     if (upb_def_isfrozen(def)) {
330       /* Could relax this requirement if it's annoying. */
331       upb_status_seterrmsg(s, "def is already frozen");
332       goto err;
333     } else if (def->type == UPB_DEF_FIELD) {
334       upb_status_seterrmsg(s, "standalone fielddefs can not be frozen");
335       goto err;
336     } else if (def->type == UPB_DEF_ENUM) {
337       if (!upb_validate_enumdef(upb_dyncast_enumdef(def), s)) {
338         goto err;
339       }
340     } else {
341       /* Set now to detect transitive closure in the second pass. */
342       def->came_from_user = true;
343     }
344   }
345 
346   /* Second pass of validation.  Also assign selector bases and indexes, and
347    * compact tables. */
348   for (i = 0; i < n; i++) {
349     upb_msgdef *m = upb_dyncast_msgdef_mutable(defs[i]);
350     upb_enumdef *e = upb_dyncast_enumdef_mutable(defs[i]);
351     if (m) {
352       upb_inttable_compact(&m->itof);
353       if (!assign_msg_indices(m, s)) {
354         goto err;
355       }
356     } else if (e) {
357       upb_inttable_compact(&e->iton);
358     }
359   }
360 
361   /* Def graph contains FieldDefs between each MessageDef, so double the
362    * limit. */
363   maxdepth = UPB_MAX_MESSAGE_DEPTH * 2;
364 
365   /* Validation all passed; freeze the defs. */
366   ret = upb_refcounted_freeze((upb_refcounted * const *)defs, n, s, maxdepth);
367   assert(!(s && ret != upb_ok(s)));
368   return ret;
369 
370 err:
371   for (i = 0; i < n; i++) {
372     defs[i]->came_from_user = false;
373   }
374   assert(!(s && upb_ok(s)));
375   return false;
376 }
377 
378 
379 /* upb_enumdef ****************************************************************/
380 
upb_enumdef_free(upb_refcounted * r)381 static void upb_enumdef_free(upb_refcounted *r) {
382   upb_enumdef *e = (upb_enumdef*)r;
383   upb_inttable_iter i;
384   upb_inttable_begin(&i, &e->iton);
385   for( ; !upb_inttable_done(&i); upb_inttable_next(&i)) {
386     /* To clean up the upb_strdup() from upb_enumdef_addval(). */
387     free(upb_value_getcstr(upb_inttable_iter_value(&i)));
388   }
389   upb_strtable_uninit(&e->ntoi);
390   upb_inttable_uninit(&e->iton);
391   upb_def_uninit(upb_enumdef_upcast_mutable(e));
392   free(e);
393 }
394 
upb_enumdef_new(const void * owner)395 upb_enumdef *upb_enumdef_new(const void *owner) {
396   static const struct upb_refcounted_vtbl vtbl = {NULL, &upb_enumdef_free};
397   upb_enumdef *e = malloc(sizeof(*e));
398   if (!e) return NULL;
399   if (!upb_def_init(upb_enumdef_upcast_mutable(e), UPB_DEF_ENUM, &vtbl, owner))
400     goto err2;
401   if (!upb_strtable_init(&e->ntoi, UPB_CTYPE_INT32)) goto err2;
402   if (!upb_inttable_init(&e->iton, UPB_CTYPE_CSTR)) goto err1;
403   return e;
404 
405 err1:
406   upb_strtable_uninit(&e->ntoi);
407 err2:
408   free(e);
409   return NULL;
410 }
411 
upb_enumdef_dup(const upb_enumdef * e,const void * owner)412 upb_enumdef *upb_enumdef_dup(const upb_enumdef *e, const void *owner) {
413   upb_enum_iter i;
414   upb_enumdef *new_e = upb_enumdef_new(owner);
415   if (!new_e) return NULL;
416   for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) {
417     bool success = upb_enumdef_addval(
418         new_e, upb_enum_iter_name(&i),upb_enum_iter_number(&i), NULL);
419     if (!success) {
420       upb_enumdef_unref(new_e, owner);
421       return NULL;
422     }
423   }
424   return new_e;
425 }
426 
upb_enumdef_freeze(upb_enumdef * e,upb_status * status)427 bool upb_enumdef_freeze(upb_enumdef *e, upb_status *status) {
428   upb_def *d = upb_enumdef_upcast_mutable(e);
429   return upb_def_freeze(&d, 1, status);
430 }
431 
upb_enumdef_fullname(const upb_enumdef * e)432 const char *upb_enumdef_fullname(const upb_enumdef *e) {
433   return upb_def_fullname(upb_enumdef_upcast(e));
434 }
435 
upb_enumdef_setfullname(upb_enumdef * e,const char * fullname,upb_status * s)436 bool upb_enumdef_setfullname(upb_enumdef *e, const char *fullname,
437                              upb_status *s) {
438   return upb_def_setfullname(upb_enumdef_upcast_mutable(e), fullname, s);
439 }
440 
upb_enumdef_addval(upb_enumdef * e,const char * name,int32_t num,upb_status * status)441 bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num,
442                         upb_status *status) {
443   if (!upb_isident(name, strlen(name), false, status)) {
444     return false;
445   }
446   if (upb_enumdef_ntoiz(e, name, NULL)) {
447     upb_status_seterrf(status, "name '%s' is already defined", name);
448     return false;
449   }
450   if (!upb_strtable_insert(&e->ntoi, name, upb_value_int32(num))) {
451     upb_status_seterrmsg(status, "out of memory");
452     return false;
453   }
454   if (!upb_inttable_lookup(&e->iton, num, NULL) &&
455       !upb_inttable_insert(&e->iton, num, upb_value_cstr(upb_strdup(name)))) {
456     upb_status_seterrmsg(status, "out of memory");
457     upb_strtable_remove(&e->ntoi, name, NULL);
458     return false;
459   }
460   if (upb_enumdef_numvals(e) == 1) {
461     bool ok = upb_enumdef_setdefault(e, num, NULL);
462     UPB_ASSERT_VAR(ok, ok);
463   }
464   return true;
465 }
466 
upb_enumdef_default(const upb_enumdef * e)467 int32_t upb_enumdef_default(const upb_enumdef *e) {
468   assert(upb_enumdef_iton(e, e->defaultval));
469   return e->defaultval;
470 }
471 
upb_enumdef_setdefault(upb_enumdef * e,int32_t val,upb_status * s)472 bool upb_enumdef_setdefault(upb_enumdef *e, int32_t val, upb_status *s) {
473   assert(!upb_enumdef_isfrozen(e));
474   if (!upb_enumdef_iton(e, val)) {
475     upb_status_seterrf(s, "number '%d' is not in the enum.", val);
476     return false;
477   }
478   e->defaultval = val;
479   return true;
480 }
481 
upb_enumdef_numvals(const upb_enumdef * e)482 int upb_enumdef_numvals(const upb_enumdef *e) {
483   return upb_strtable_count(&e->ntoi);
484 }
485 
upb_enum_begin(upb_enum_iter * i,const upb_enumdef * e)486 void upb_enum_begin(upb_enum_iter *i, const upb_enumdef *e) {
487   /* We iterate over the ntoi table, to account for duplicate numbers. */
488   upb_strtable_begin(i, &e->ntoi);
489 }
490 
upb_enum_next(upb_enum_iter * iter)491 void upb_enum_next(upb_enum_iter *iter) { upb_strtable_next(iter); }
upb_enum_done(upb_enum_iter * iter)492 bool upb_enum_done(upb_enum_iter *iter) { return upb_strtable_done(iter); }
493 
upb_enumdef_ntoi(const upb_enumdef * def,const char * name,size_t len,int32_t * num)494 bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name,
495                       size_t len, int32_t *num) {
496   upb_value v;
497   if (!upb_strtable_lookup2(&def->ntoi, name, len, &v)) {
498     return false;
499   }
500   if (num) *num = upb_value_getint32(v);
501   return true;
502 }
503 
upb_enumdef_iton(const upb_enumdef * def,int32_t num)504 const char *upb_enumdef_iton(const upb_enumdef *def, int32_t num) {
505   upb_value v;
506   return upb_inttable_lookup32(&def->iton, num, &v) ?
507       upb_value_getcstr(v) : NULL;
508 }
509 
upb_enum_iter_name(upb_enum_iter * iter)510 const char *upb_enum_iter_name(upb_enum_iter *iter) {
511   return upb_strtable_iter_key(iter);
512 }
513 
upb_enum_iter_number(upb_enum_iter * iter)514 int32_t upb_enum_iter_number(upb_enum_iter *iter) {
515   return upb_value_getint32(upb_strtable_iter_value(iter));
516 }
517 
518 
519 /* upb_fielddef ***************************************************************/
520 
521 static void upb_fielddef_init_default(upb_fielddef *f);
522 
upb_fielddef_uninit_default(upb_fielddef * f)523 static void upb_fielddef_uninit_default(upb_fielddef *f) {
524   if (f->type_is_set_ && f->default_is_string && f->defaultval.bytes)
525     freestr(f->defaultval.bytes);
526 }
527 
visitfield(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)528 static void visitfield(const upb_refcounted *r, upb_refcounted_visit *visit,
529                        void *closure) {
530   const upb_fielddef *f = (const upb_fielddef*)r;
531   if (upb_fielddef_containingtype(f)) {
532     visit(r, upb_msgdef_upcast2(upb_fielddef_containingtype(f)), closure);
533   }
534   if (upb_fielddef_containingoneof(f)) {
535     visit(r, upb_oneofdef_upcast2(upb_fielddef_containingoneof(f)), closure);
536   }
537   if (upb_fielddef_subdef(f)) {
538     visit(r, upb_def_upcast(upb_fielddef_subdef(f)), closure);
539   }
540 }
541 
freefield(upb_refcounted * r)542 static void freefield(upb_refcounted *r) {
543   upb_fielddef *f = (upb_fielddef*)r;
544   upb_fielddef_uninit_default(f);
545   if (f->subdef_is_symbolic)
546     free(f->sub.name);
547   upb_def_uninit(upb_fielddef_upcast_mutable(f));
548   free(f);
549 }
550 
enumdefaultstr(const upb_fielddef * f)551 static const char *enumdefaultstr(const upb_fielddef *f) {
552   const upb_enumdef *e;
553   assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
554   e = upb_fielddef_enumsubdef(f);
555   if (f->default_is_string && f->defaultval.bytes) {
556     /* Default was explicitly set as a string. */
557     str_t *s = f->defaultval.bytes;
558     return s->str;
559   } else if (e) {
560     if (!f->default_is_string) {
561       /* Default was explicitly set as an integer; look it up in enumdef. */
562       const char *name = upb_enumdef_iton(e, f->defaultval.sint);
563       if (name) {
564         return name;
565       }
566     } else {
567       /* Default is completely unset; pull enumdef default. */
568       if (upb_enumdef_numvals(e) > 0) {
569         const char *name = upb_enumdef_iton(e, upb_enumdef_default(e));
570         assert(name);
571         return name;
572       }
573     }
574   }
575   return NULL;
576 }
577 
enumdefaultint32(const upb_fielddef * f,int32_t * val)578 static bool enumdefaultint32(const upb_fielddef *f, int32_t *val) {
579   const upb_enumdef *e;
580   assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
581   e = upb_fielddef_enumsubdef(f);
582   if (!f->default_is_string) {
583     /* Default was explicitly set as an integer. */
584     *val = f->defaultval.sint;
585     return true;
586   } else if (e) {
587     if (f->defaultval.bytes) {
588       /* Default was explicitly set as a str; try to lookup corresponding int. */
589       str_t *s = f->defaultval.bytes;
590       if (upb_enumdef_ntoiz(e, s->str, val)) {
591         return true;
592       }
593     } else {
594       /* Default is unset; try to pull in enumdef default. */
595       if (upb_enumdef_numvals(e) > 0) {
596         *val = upb_enumdef_default(e);
597         return true;
598       }
599     }
600   }
601   return false;
602 }
603 
upb_fielddef_new(const void * o)604 upb_fielddef *upb_fielddef_new(const void *o) {
605   static const struct upb_refcounted_vtbl vtbl = {visitfield, freefield};
606   upb_fielddef *f = malloc(sizeof(*f));
607   if (!f) return NULL;
608   if (!upb_def_init(upb_fielddef_upcast_mutable(f), UPB_DEF_FIELD, &vtbl, o)) {
609     free(f);
610     return NULL;
611   }
612   f->msg.def = NULL;
613   f->sub.def = NULL;
614   f->oneof = NULL;
615   f->subdef_is_symbolic = false;
616   f->msg_is_symbolic = false;
617   f->label_ = UPB_LABEL_OPTIONAL;
618   f->type_ = UPB_TYPE_INT32;
619   f->number_ = 0;
620   f->type_is_set_ = false;
621   f->tagdelim = false;
622   f->is_extension_ = false;
623   f->lazy_ = false;
624   f->packed_ = true;
625 
626   /* For the moment we default this to UPB_INTFMT_VARIABLE, since it will work
627    * with all integer types and is in some since more "default" since the most
628    * normal-looking proto2 types int32/int64/uint32/uint64 use variable.
629    *
630    * Other options to consider:
631    * - there is no default; users must set this manually (like type).
632    * - default signed integers to UPB_INTFMT_ZIGZAG, since it's more likely to
633    *   be an optimal default for signed integers. */
634   f->intfmt = UPB_INTFMT_VARIABLE;
635   return f;
636 }
637 
upb_fielddef_dup(const upb_fielddef * f,const void * owner)638 upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, const void *owner) {
639   const char *srcname;
640   upb_fielddef *newf = upb_fielddef_new(owner);
641   if (!newf) return NULL;
642   upb_fielddef_settype(newf, upb_fielddef_type(f));
643   upb_fielddef_setlabel(newf, upb_fielddef_label(f));
644   upb_fielddef_setnumber(newf, upb_fielddef_number(f), NULL);
645   upb_fielddef_setname(newf, upb_fielddef_name(f), NULL);
646   if (f->default_is_string && f->defaultval.bytes) {
647     str_t *s = f->defaultval.bytes;
648     upb_fielddef_setdefaultstr(newf, s->str, s->len, NULL);
649   } else {
650     newf->default_is_string = f->default_is_string;
651     newf->defaultval = f->defaultval;
652   }
653 
654   if (f->subdef_is_symbolic) {
655     srcname = f->sub.name;  /* Might be NULL. */
656   } else {
657     srcname = f->sub.def ? upb_def_fullname(f->sub.def) : NULL;
658   }
659   if (srcname) {
660     char *newname = malloc(strlen(f->sub.def->fullname) + 2);
661     if (!newname) {
662       upb_fielddef_unref(newf, owner);
663       return NULL;
664     }
665     strcpy(newname, ".");
666     strcat(newname, f->sub.def->fullname);
667     upb_fielddef_setsubdefname(newf, newname, NULL);
668     free(newname);
669   }
670 
671   return newf;
672 }
673 
upb_fielddef_typeisset(const upb_fielddef * f)674 bool upb_fielddef_typeisset(const upb_fielddef *f) {
675   return f->type_is_set_;
676 }
677 
upb_fielddef_type(const upb_fielddef * f)678 upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f) {
679   assert(f->type_is_set_);
680   return f->type_;
681 }
682 
upb_fielddef_index(const upb_fielddef * f)683 uint32_t upb_fielddef_index(const upb_fielddef *f) {
684   return f->index_;
685 }
686 
upb_fielddef_label(const upb_fielddef * f)687 upb_label_t upb_fielddef_label(const upb_fielddef *f) {
688   return f->label_;
689 }
690 
upb_fielddef_intfmt(const upb_fielddef * f)691 upb_intfmt_t upb_fielddef_intfmt(const upb_fielddef *f) {
692   return f->intfmt;
693 }
694 
upb_fielddef_istagdelim(const upb_fielddef * f)695 bool upb_fielddef_istagdelim(const upb_fielddef *f) {
696   return f->tagdelim;
697 }
698 
upb_fielddef_number(const upb_fielddef * f)699 uint32_t upb_fielddef_number(const upb_fielddef *f) {
700   return f->number_;
701 }
702 
upb_fielddef_isextension(const upb_fielddef * f)703 bool upb_fielddef_isextension(const upb_fielddef *f) {
704   return f->is_extension_;
705 }
706 
upb_fielddef_lazy(const upb_fielddef * f)707 bool upb_fielddef_lazy(const upb_fielddef *f) {
708   return f->lazy_;
709 }
710 
upb_fielddef_packed(const upb_fielddef * f)711 bool upb_fielddef_packed(const upb_fielddef *f) {
712   return f->packed_;
713 }
714 
upb_fielddef_name(const upb_fielddef * f)715 const char *upb_fielddef_name(const upb_fielddef *f) {
716   return upb_def_fullname(upb_fielddef_upcast(f));
717 }
718 
upb_fielddef_containingtype(const upb_fielddef * f)719 const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f) {
720   return f->msg_is_symbolic ? NULL : f->msg.def;
721 }
722 
upb_fielddef_containingoneof(const upb_fielddef * f)723 const upb_oneofdef *upb_fielddef_containingoneof(const upb_fielddef *f) {
724   return f->oneof;
725 }
726 
upb_fielddef_containingtype_mutable(upb_fielddef * f)727 upb_msgdef *upb_fielddef_containingtype_mutable(upb_fielddef *f) {
728   return (upb_msgdef*)upb_fielddef_containingtype(f);
729 }
730 
upb_fielddef_containingtypename(upb_fielddef * f)731 const char *upb_fielddef_containingtypename(upb_fielddef *f) {
732   return f->msg_is_symbolic ? f->msg.name : NULL;
733 }
734 
release_containingtype(upb_fielddef * f)735 static void release_containingtype(upb_fielddef *f) {
736   if (f->msg_is_symbolic) free(f->msg.name);
737 }
738 
upb_fielddef_setcontainingtypename(upb_fielddef * f,const char * name,upb_status * s)739 bool upb_fielddef_setcontainingtypename(upb_fielddef *f, const char *name,
740                                         upb_status *s) {
741   assert(!upb_fielddef_isfrozen(f));
742   if (upb_fielddef_containingtype(f)) {
743     upb_status_seterrmsg(s, "field has already been added to a message.");
744     return false;
745   }
746   /* TODO: validate name (upb_isident() doesn't quite work atm because this name
747    * may have a leading "."). */
748   release_containingtype(f);
749   f->msg.name = upb_strdup(name);
750   f->msg_is_symbolic = true;
751   return true;
752 }
753 
upb_fielddef_setname(upb_fielddef * f,const char * name,upb_status * s)754 bool upb_fielddef_setname(upb_fielddef *f, const char *name, upb_status *s) {
755   if (upb_fielddef_containingtype(f) || upb_fielddef_containingoneof(f)) {
756     upb_status_seterrmsg(s, "Already added to message or oneof");
757     return false;
758   }
759   return upb_def_setfullname(upb_fielddef_upcast_mutable(f), name, s);
760 }
761 
chkdefaulttype(const upb_fielddef * f,upb_fieldtype_t type)762 static void chkdefaulttype(const upb_fielddef *f, upb_fieldtype_t type) {
763   UPB_UNUSED(f);
764   UPB_UNUSED(type);
765   assert(f->type_is_set_ && upb_fielddef_type(f) == type);
766 }
767 
upb_fielddef_defaultint64(const upb_fielddef * f)768 int64_t upb_fielddef_defaultint64(const upb_fielddef *f) {
769   chkdefaulttype(f, UPB_TYPE_INT64);
770   return f->defaultval.sint;
771 }
772 
upb_fielddef_defaultint32(const upb_fielddef * f)773 int32_t upb_fielddef_defaultint32(const upb_fielddef *f) {
774   if (f->type_is_set_ && upb_fielddef_type(f) == UPB_TYPE_ENUM) {
775     int32_t val;
776     bool ok = enumdefaultint32(f, &val);
777     UPB_ASSERT_VAR(ok, ok);
778     return val;
779   } else {
780     chkdefaulttype(f, UPB_TYPE_INT32);
781     return f->defaultval.sint;
782   }
783 }
784 
upb_fielddef_defaultuint64(const upb_fielddef * f)785 uint64_t upb_fielddef_defaultuint64(const upb_fielddef *f) {
786   chkdefaulttype(f, UPB_TYPE_UINT64);
787   return f->defaultval.uint;
788 }
789 
upb_fielddef_defaultuint32(const upb_fielddef * f)790 uint32_t upb_fielddef_defaultuint32(const upb_fielddef *f) {
791   chkdefaulttype(f, UPB_TYPE_UINT32);
792   return f->defaultval.uint;
793 }
794 
upb_fielddef_defaultbool(const upb_fielddef * f)795 bool upb_fielddef_defaultbool(const upb_fielddef *f) {
796   chkdefaulttype(f, UPB_TYPE_BOOL);
797   return f->defaultval.uint;
798 }
799 
upb_fielddef_defaultfloat(const upb_fielddef * f)800 float upb_fielddef_defaultfloat(const upb_fielddef *f) {
801   chkdefaulttype(f, UPB_TYPE_FLOAT);
802   return f->defaultval.flt;
803 }
804 
upb_fielddef_defaultdouble(const upb_fielddef * f)805 double upb_fielddef_defaultdouble(const upb_fielddef *f) {
806   chkdefaulttype(f, UPB_TYPE_DOUBLE);
807   return f->defaultval.dbl;
808 }
809 
upb_fielddef_defaultstr(const upb_fielddef * f,size_t * len)810 const char *upb_fielddef_defaultstr(const upb_fielddef *f, size_t *len) {
811   assert(f->type_is_set_);
812   assert(upb_fielddef_type(f) == UPB_TYPE_STRING ||
813          upb_fielddef_type(f) == UPB_TYPE_BYTES ||
814          upb_fielddef_type(f) == UPB_TYPE_ENUM);
815 
816   if (upb_fielddef_type(f) == UPB_TYPE_ENUM) {
817     const char *ret = enumdefaultstr(f);
818     assert(ret);
819     /* Enum defaults can't have embedded NULLs. */
820     if (len) *len = strlen(ret);
821     return ret;
822   }
823 
824   if (f->default_is_string) {
825     str_t *str = f->defaultval.bytes;
826     if (len) *len = str->len;
827     return str->str;
828   }
829 
830   return NULL;
831 }
832 
upb_fielddef_init_default(upb_fielddef * f)833 static void upb_fielddef_init_default(upb_fielddef *f) {
834   f->default_is_string = false;
835   switch (upb_fielddef_type(f)) {
836     case UPB_TYPE_DOUBLE: f->defaultval.dbl = 0; break;
837     case UPB_TYPE_FLOAT: f->defaultval.flt = 0; break;
838     case UPB_TYPE_INT32:
839     case UPB_TYPE_INT64: f->defaultval.sint = 0; break;
840     case UPB_TYPE_UINT64:
841     case UPB_TYPE_UINT32:
842     case UPB_TYPE_BOOL: f->defaultval.uint = 0; break;
843     case UPB_TYPE_STRING:
844     case UPB_TYPE_BYTES:
845       f->defaultval.bytes = newstr("", 0);
846       f->default_is_string = true;
847       break;
848     case UPB_TYPE_MESSAGE: break;
849     case UPB_TYPE_ENUM:
850       /* This is our special sentinel that indicates "not set" for an enum. */
851       f->default_is_string = true;
852       f->defaultval.bytes = NULL;
853       break;
854   }
855 }
856 
upb_fielddef_subdef(const upb_fielddef * f)857 const upb_def *upb_fielddef_subdef(const upb_fielddef *f) {
858   return f->subdef_is_symbolic ? NULL : f->sub.def;
859 }
860 
upb_fielddef_msgsubdef(const upb_fielddef * f)861 const upb_msgdef *upb_fielddef_msgsubdef(const upb_fielddef *f) {
862   const upb_def *def = upb_fielddef_subdef(f);
863   return def ? upb_dyncast_msgdef(def) : NULL;
864 }
865 
upb_fielddef_enumsubdef(const upb_fielddef * f)866 const upb_enumdef *upb_fielddef_enumsubdef(const upb_fielddef *f) {
867   const upb_def *def = upb_fielddef_subdef(f);
868   return def ? upb_dyncast_enumdef(def) : NULL;
869 }
870 
upb_fielddef_subdef_mutable(upb_fielddef * f)871 upb_def *upb_fielddef_subdef_mutable(upb_fielddef *f) {
872   return (upb_def*)upb_fielddef_subdef(f);
873 }
874 
upb_fielddef_subdefname(const upb_fielddef * f)875 const char *upb_fielddef_subdefname(const upb_fielddef *f) {
876   if (f->subdef_is_symbolic) {
877     return f->sub.name;
878   } else if (f->sub.def) {
879     return upb_def_fullname(f->sub.def);
880   } else {
881     return NULL;
882   }
883 }
884 
upb_fielddef_setnumber(upb_fielddef * f,uint32_t number,upb_status * s)885 bool upb_fielddef_setnumber(upb_fielddef *f, uint32_t number, upb_status *s) {
886   if (upb_fielddef_containingtype(f)) {
887     upb_status_seterrmsg(
888         s, "cannot change field number after adding to a message");
889     return false;
890   }
891   if (number == 0 || number > UPB_MAX_FIELDNUMBER) {
892     upb_status_seterrf(s, "invalid field number (%u)", number);
893     return false;
894   }
895   f->number_ = number;
896   return true;
897 }
898 
upb_fielddef_settype(upb_fielddef * f,upb_fieldtype_t type)899 void upb_fielddef_settype(upb_fielddef *f, upb_fieldtype_t type) {
900   assert(!upb_fielddef_isfrozen(f));
901   assert(upb_fielddef_checktype(type));
902   upb_fielddef_uninit_default(f);
903   f->type_ = type;
904   f->type_is_set_ = true;
905   upb_fielddef_init_default(f);
906 }
907 
upb_fielddef_setdescriptortype(upb_fielddef * f,int type)908 void upb_fielddef_setdescriptortype(upb_fielddef *f, int type) {
909   assert(!upb_fielddef_isfrozen(f));
910   switch (type) {
911     case UPB_DESCRIPTOR_TYPE_DOUBLE:
912       upb_fielddef_settype(f, UPB_TYPE_DOUBLE);
913       break;
914     case UPB_DESCRIPTOR_TYPE_FLOAT:
915       upb_fielddef_settype(f, UPB_TYPE_FLOAT);
916       break;
917     case UPB_DESCRIPTOR_TYPE_INT64:
918     case UPB_DESCRIPTOR_TYPE_SFIXED64:
919     case UPB_DESCRIPTOR_TYPE_SINT64:
920       upb_fielddef_settype(f, UPB_TYPE_INT64);
921       break;
922     case UPB_DESCRIPTOR_TYPE_UINT64:
923     case UPB_DESCRIPTOR_TYPE_FIXED64:
924       upb_fielddef_settype(f, UPB_TYPE_UINT64);
925       break;
926     case UPB_DESCRIPTOR_TYPE_INT32:
927     case UPB_DESCRIPTOR_TYPE_SFIXED32:
928     case UPB_DESCRIPTOR_TYPE_SINT32:
929       upb_fielddef_settype(f, UPB_TYPE_INT32);
930       break;
931     case UPB_DESCRIPTOR_TYPE_UINT32:
932     case UPB_DESCRIPTOR_TYPE_FIXED32:
933       upb_fielddef_settype(f, UPB_TYPE_UINT32);
934       break;
935     case UPB_DESCRIPTOR_TYPE_BOOL:
936       upb_fielddef_settype(f, UPB_TYPE_BOOL);
937       break;
938     case UPB_DESCRIPTOR_TYPE_STRING:
939       upb_fielddef_settype(f, UPB_TYPE_STRING);
940       break;
941     case UPB_DESCRIPTOR_TYPE_BYTES:
942       upb_fielddef_settype(f, UPB_TYPE_BYTES);
943       break;
944     case UPB_DESCRIPTOR_TYPE_GROUP:
945     case UPB_DESCRIPTOR_TYPE_MESSAGE:
946       upb_fielddef_settype(f, UPB_TYPE_MESSAGE);
947       break;
948     case UPB_DESCRIPTOR_TYPE_ENUM:
949       upb_fielddef_settype(f, UPB_TYPE_ENUM);
950       break;
951     default: assert(false);
952   }
953 
954   if (type == UPB_DESCRIPTOR_TYPE_FIXED64 ||
955       type == UPB_DESCRIPTOR_TYPE_FIXED32 ||
956       type == UPB_DESCRIPTOR_TYPE_SFIXED64 ||
957       type == UPB_DESCRIPTOR_TYPE_SFIXED32) {
958     upb_fielddef_setintfmt(f, UPB_INTFMT_FIXED);
959   } else if (type == UPB_DESCRIPTOR_TYPE_SINT64 ||
960              type == UPB_DESCRIPTOR_TYPE_SINT32) {
961     upb_fielddef_setintfmt(f, UPB_INTFMT_ZIGZAG);
962   } else {
963     upb_fielddef_setintfmt(f, UPB_INTFMT_VARIABLE);
964   }
965 
966   upb_fielddef_settagdelim(f, type == UPB_DESCRIPTOR_TYPE_GROUP);
967 }
968 
upb_fielddef_descriptortype(const upb_fielddef * f)969 upb_descriptortype_t upb_fielddef_descriptortype(const upb_fielddef *f) {
970   switch (upb_fielddef_type(f)) {
971     case UPB_TYPE_FLOAT:  return UPB_DESCRIPTOR_TYPE_FLOAT;
972     case UPB_TYPE_DOUBLE: return UPB_DESCRIPTOR_TYPE_DOUBLE;
973     case UPB_TYPE_BOOL:   return UPB_DESCRIPTOR_TYPE_BOOL;
974     case UPB_TYPE_STRING: return UPB_DESCRIPTOR_TYPE_STRING;
975     case UPB_TYPE_BYTES:  return UPB_DESCRIPTOR_TYPE_BYTES;
976     case UPB_TYPE_ENUM:   return UPB_DESCRIPTOR_TYPE_ENUM;
977     case UPB_TYPE_INT32:
978       switch (upb_fielddef_intfmt(f)) {
979         case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_INT32;
980         case UPB_INTFMT_FIXED:    return UPB_DESCRIPTOR_TYPE_SFIXED32;
981         case UPB_INTFMT_ZIGZAG:   return UPB_DESCRIPTOR_TYPE_SINT32;
982       }
983     case UPB_TYPE_INT64:
984       switch (upb_fielddef_intfmt(f)) {
985         case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_INT64;
986         case UPB_INTFMT_FIXED:    return UPB_DESCRIPTOR_TYPE_SFIXED64;
987         case UPB_INTFMT_ZIGZAG:   return UPB_DESCRIPTOR_TYPE_SINT64;
988       }
989     case UPB_TYPE_UINT32:
990       switch (upb_fielddef_intfmt(f)) {
991         case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_UINT32;
992         case UPB_INTFMT_FIXED:    return UPB_DESCRIPTOR_TYPE_FIXED32;
993         case UPB_INTFMT_ZIGZAG:   return -1;
994       }
995     case UPB_TYPE_UINT64:
996       switch (upb_fielddef_intfmt(f)) {
997         case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_UINT64;
998         case UPB_INTFMT_FIXED:    return UPB_DESCRIPTOR_TYPE_FIXED64;
999         case UPB_INTFMT_ZIGZAG:   return -1;
1000       }
1001     case UPB_TYPE_MESSAGE:
1002       return upb_fielddef_istagdelim(f) ?
1003           UPB_DESCRIPTOR_TYPE_GROUP : UPB_DESCRIPTOR_TYPE_MESSAGE;
1004   }
1005   return 0;
1006 }
1007 
upb_fielddef_setisextension(upb_fielddef * f,bool is_extension)1008 void upb_fielddef_setisextension(upb_fielddef *f, bool is_extension) {
1009   assert(!upb_fielddef_isfrozen(f));
1010   f->is_extension_ = is_extension;
1011 }
1012 
upb_fielddef_setlazy(upb_fielddef * f,bool lazy)1013 void upb_fielddef_setlazy(upb_fielddef *f, bool lazy) {
1014   assert(!upb_fielddef_isfrozen(f));
1015   f->lazy_ = lazy;
1016 }
1017 
upb_fielddef_setpacked(upb_fielddef * f,bool packed)1018 void upb_fielddef_setpacked(upb_fielddef *f, bool packed) {
1019   assert(!upb_fielddef_isfrozen(f));
1020   f->packed_ = packed;
1021 }
1022 
upb_fielddef_setlabel(upb_fielddef * f,upb_label_t label)1023 void upb_fielddef_setlabel(upb_fielddef *f, upb_label_t label) {
1024   assert(!upb_fielddef_isfrozen(f));
1025   assert(upb_fielddef_checklabel(label));
1026   f->label_ = label;
1027 }
1028 
upb_fielddef_setintfmt(upb_fielddef * f,upb_intfmt_t fmt)1029 void upb_fielddef_setintfmt(upb_fielddef *f, upb_intfmt_t fmt) {
1030   assert(!upb_fielddef_isfrozen(f));
1031   assert(upb_fielddef_checkintfmt(fmt));
1032   f->intfmt = fmt;
1033 }
1034 
upb_fielddef_settagdelim(upb_fielddef * f,bool tag_delim)1035 void upb_fielddef_settagdelim(upb_fielddef *f, bool tag_delim) {
1036   assert(!upb_fielddef_isfrozen(f));
1037   f->tagdelim = tag_delim;
1038   f->tagdelim = tag_delim;
1039 }
1040 
checksetdefault(upb_fielddef * f,upb_fieldtype_t type)1041 static bool checksetdefault(upb_fielddef *f, upb_fieldtype_t type) {
1042   if (!f->type_is_set_ || upb_fielddef_isfrozen(f) ||
1043       upb_fielddef_type(f) != type) {
1044     assert(false);
1045     return false;
1046   }
1047   if (f->default_is_string) {
1048     str_t *s = f->defaultval.bytes;
1049     assert(s || type == UPB_TYPE_ENUM);
1050     if (s) freestr(s);
1051   }
1052   f->default_is_string = false;
1053   return true;
1054 }
1055 
upb_fielddef_setdefaultint64(upb_fielddef * f,int64_t value)1056 void upb_fielddef_setdefaultint64(upb_fielddef *f, int64_t value) {
1057   if (checksetdefault(f, UPB_TYPE_INT64))
1058     f->defaultval.sint = value;
1059 }
1060 
upb_fielddef_setdefaultint32(upb_fielddef * f,int32_t value)1061 void upb_fielddef_setdefaultint32(upb_fielddef *f, int32_t value) {
1062   if ((upb_fielddef_type(f) == UPB_TYPE_ENUM &&
1063        checksetdefault(f, UPB_TYPE_ENUM)) ||
1064       checksetdefault(f, UPB_TYPE_INT32)) {
1065     f->defaultval.sint = value;
1066   }
1067 }
1068 
upb_fielddef_setdefaultuint64(upb_fielddef * f,uint64_t value)1069 void upb_fielddef_setdefaultuint64(upb_fielddef *f, uint64_t value) {
1070   if (checksetdefault(f, UPB_TYPE_UINT64))
1071     f->defaultval.uint = value;
1072 }
1073 
upb_fielddef_setdefaultuint32(upb_fielddef * f,uint32_t value)1074 void upb_fielddef_setdefaultuint32(upb_fielddef *f, uint32_t value) {
1075   if (checksetdefault(f, UPB_TYPE_UINT32))
1076     f->defaultval.uint = value;
1077 }
1078 
upb_fielddef_setdefaultbool(upb_fielddef * f,bool value)1079 void upb_fielddef_setdefaultbool(upb_fielddef *f, bool value) {
1080   if (checksetdefault(f, UPB_TYPE_BOOL))
1081     f->defaultval.uint = value;
1082 }
1083 
upb_fielddef_setdefaultfloat(upb_fielddef * f,float value)1084 void upb_fielddef_setdefaultfloat(upb_fielddef *f, float value) {
1085   if (checksetdefault(f, UPB_TYPE_FLOAT))
1086     f->defaultval.flt = value;
1087 }
1088 
upb_fielddef_setdefaultdouble(upb_fielddef * f,double value)1089 void upb_fielddef_setdefaultdouble(upb_fielddef *f, double value) {
1090   if (checksetdefault(f, UPB_TYPE_DOUBLE))
1091     f->defaultval.dbl = value;
1092 }
1093 
upb_fielddef_setdefaultstr(upb_fielddef * f,const void * str,size_t len,upb_status * s)1094 bool upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len,
1095                                 upb_status *s) {
1096   str_t *str2;
1097   assert(upb_fielddef_isstring(f) || f->type_ == UPB_TYPE_ENUM);
1098   if (f->type_ == UPB_TYPE_ENUM && !upb_isident(str, len, false, s))
1099     return false;
1100 
1101   if (f->default_is_string) {
1102     str_t *s = f->defaultval.bytes;
1103     assert(s || f->type_ == UPB_TYPE_ENUM);
1104     if (s) freestr(s);
1105   } else {
1106     assert(f->type_ == UPB_TYPE_ENUM);
1107   }
1108 
1109   str2 = newstr(str, len);
1110   f->defaultval.bytes = str2;
1111   f->default_is_string = true;
1112   return true;
1113 }
1114 
upb_fielddef_setdefaultcstr(upb_fielddef * f,const char * str,upb_status * s)1115 void upb_fielddef_setdefaultcstr(upb_fielddef *f, const char *str,
1116                                  upb_status *s) {
1117   assert(f->type_is_set_);
1118   upb_fielddef_setdefaultstr(f, str, str ? strlen(str) : 0, s);
1119 }
1120 
upb_fielddef_enumhasdefaultint32(const upb_fielddef * f)1121 bool upb_fielddef_enumhasdefaultint32(const upb_fielddef *f) {
1122   int32_t val;
1123   assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
1124   return enumdefaultint32(f, &val);
1125 }
1126 
upb_fielddef_enumhasdefaultstr(const upb_fielddef * f)1127 bool upb_fielddef_enumhasdefaultstr(const upb_fielddef *f) {
1128   assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
1129   return enumdefaultstr(f) != NULL;
1130 }
1131 
upb_subdef_typecheck(upb_fielddef * f,const upb_def * subdef,upb_status * s)1132 static bool upb_subdef_typecheck(upb_fielddef *f, const upb_def *subdef,
1133                                  upb_status *s) {
1134   if (f->type_ == UPB_TYPE_MESSAGE) {
1135     if (upb_dyncast_msgdef(subdef)) return true;
1136     upb_status_seterrmsg(s, "invalid subdef type for this submessage field");
1137     return false;
1138   } else if (f->type_ == UPB_TYPE_ENUM) {
1139     if (upb_dyncast_enumdef(subdef)) return true;
1140     upb_status_seterrmsg(s, "invalid subdef type for this enum field");
1141     return false;
1142   } else {
1143     upb_status_seterrmsg(s, "only message and enum fields can have a subdef");
1144     return false;
1145   }
1146 }
1147 
release_subdef(upb_fielddef * f)1148 static void release_subdef(upb_fielddef *f) {
1149   if (f->subdef_is_symbolic) {
1150     free(f->sub.name);
1151   } else if (f->sub.def) {
1152     upb_unref2(f->sub.def, f);
1153   }
1154 }
1155 
upb_fielddef_setsubdef(upb_fielddef * f,const upb_def * subdef,upb_status * s)1156 bool upb_fielddef_setsubdef(upb_fielddef *f, const upb_def *subdef,
1157                             upb_status *s) {
1158   assert(!upb_fielddef_isfrozen(f));
1159   assert(upb_fielddef_hassubdef(f));
1160   if (subdef && !upb_subdef_typecheck(f, subdef, s)) return false;
1161   release_subdef(f);
1162   f->sub.def = subdef;
1163   f->subdef_is_symbolic = false;
1164   if (f->sub.def) upb_ref2(f->sub.def, f);
1165   return true;
1166 }
1167 
upb_fielddef_setmsgsubdef(upb_fielddef * f,const upb_msgdef * subdef,upb_status * s)1168 bool upb_fielddef_setmsgsubdef(upb_fielddef *f, const upb_msgdef *subdef,
1169                                upb_status *s) {
1170   return upb_fielddef_setsubdef(f, upb_msgdef_upcast(subdef), s);
1171 }
1172 
upb_fielddef_setenumsubdef(upb_fielddef * f,const upb_enumdef * subdef,upb_status * s)1173 bool upb_fielddef_setenumsubdef(upb_fielddef *f, const upb_enumdef *subdef,
1174                                 upb_status *s) {
1175   return upb_fielddef_setsubdef(f, upb_enumdef_upcast(subdef), s);
1176 }
1177 
upb_fielddef_setsubdefname(upb_fielddef * f,const char * name,upb_status * s)1178 bool upb_fielddef_setsubdefname(upb_fielddef *f, const char *name,
1179                                 upb_status *s) {
1180   assert(!upb_fielddef_isfrozen(f));
1181   if (!upb_fielddef_hassubdef(f)) {
1182     upb_status_seterrmsg(s, "field type does not accept a subdef");
1183     return false;
1184   }
1185   /* TODO: validate name (upb_isident() doesn't quite work atm because this name
1186    * may have a leading "."). */
1187   release_subdef(f);
1188   f->sub.name = upb_strdup(name);
1189   f->subdef_is_symbolic = true;
1190   return true;
1191 }
1192 
upb_fielddef_issubmsg(const upb_fielddef * f)1193 bool upb_fielddef_issubmsg(const upb_fielddef *f) {
1194   return upb_fielddef_type(f) == UPB_TYPE_MESSAGE;
1195 }
1196 
upb_fielddef_isstring(const upb_fielddef * f)1197 bool upb_fielddef_isstring(const upb_fielddef *f) {
1198   return upb_fielddef_type(f) == UPB_TYPE_STRING ||
1199          upb_fielddef_type(f) == UPB_TYPE_BYTES;
1200 }
1201 
upb_fielddef_isseq(const upb_fielddef * f)1202 bool upb_fielddef_isseq(const upb_fielddef *f) {
1203   return upb_fielddef_label(f) == UPB_LABEL_REPEATED;
1204 }
1205 
upb_fielddef_isprimitive(const upb_fielddef * f)1206 bool upb_fielddef_isprimitive(const upb_fielddef *f) {
1207   return !upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f);
1208 }
1209 
upb_fielddef_ismap(const upb_fielddef * f)1210 bool upb_fielddef_ismap(const upb_fielddef *f) {
1211   return upb_fielddef_isseq(f) && upb_fielddef_issubmsg(f) &&
1212          upb_msgdef_mapentry(upb_fielddef_msgsubdef(f));
1213 }
1214 
upb_fielddef_hassubdef(const upb_fielddef * f)1215 bool upb_fielddef_hassubdef(const upb_fielddef *f) {
1216   return upb_fielddef_issubmsg(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM;
1217 }
1218 
between(int32_t x,int32_t low,int32_t high)1219 static bool between(int32_t x, int32_t low, int32_t high) {
1220   return x >= low && x <= high;
1221 }
1222 
upb_fielddef_checklabel(int32_t label)1223 bool upb_fielddef_checklabel(int32_t label) { return between(label, 1, 3); }
upb_fielddef_checktype(int32_t type)1224 bool upb_fielddef_checktype(int32_t type) { return between(type, 1, 11); }
upb_fielddef_checkintfmt(int32_t fmt)1225 bool upb_fielddef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); }
1226 
upb_fielddef_checkdescriptortype(int32_t type)1227 bool upb_fielddef_checkdescriptortype(int32_t type) {
1228   return between(type, 1, 18);
1229 }
1230 
1231 /* upb_msgdef *****************************************************************/
1232 
visitmsg(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)1233 static void visitmsg(const upb_refcounted *r, upb_refcounted_visit *visit,
1234                      void *closure) {
1235   upb_msg_oneof_iter o;
1236   const upb_msgdef *m = (const upb_msgdef*)r;
1237   upb_msg_field_iter i;
1238   for(upb_msg_field_begin(&i, m);
1239       !upb_msg_field_done(&i);
1240       upb_msg_field_next(&i)) {
1241     upb_fielddef *f = upb_msg_iter_field(&i);
1242     visit(r, upb_fielddef_upcast2(f), closure);
1243   }
1244   for(upb_msg_oneof_begin(&o, m);
1245       !upb_msg_oneof_done(&o);
1246       upb_msg_oneof_next(&o)) {
1247     upb_oneofdef *f = upb_msg_iter_oneof(&o);
1248     visit(r, upb_oneofdef_upcast2(f), closure);
1249   }
1250 }
1251 
freemsg(upb_refcounted * r)1252 static void freemsg(upb_refcounted *r) {
1253   upb_msgdef *m = (upb_msgdef*)r;
1254   upb_strtable_uninit(&m->ntoo);
1255   upb_strtable_uninit(&m->ntof);
1256   upb_inttable_uninit(&m->itof);
1257   upb_def_uninit(upb_msgdef_upcast_mutable(m));
1258   free(m);
1259 }
1260 
upb_msgdef_new(const void * owner)1261 upb_msgdef *upb_msgdef_new(const void *owner) {
1262   static const struct upb_refcounted_vtbl vtbl = {visitmsg, freemsg};
1263   upb_msgdef *m = malloc(sizeof(*m));
1264   if (!m) return NULL;
1265   if (!upb_def_init(upb_msgdef_upcast_mutable(m), UPB_DEF_MSG, &vtbl, owner))
1266     goto err2;
1267   if (!upb_inttable_init(&m->itof, UPB_CTYPE_PTR)) goto err3;
1268   if (!upb_strtable_init(&m->ntof, UPB_CTYPE_PTR)) goto err2;
1269   if (!upb_strtable_init(&m->ntoo, UPB_CTYPE_PTR)) goto err1;
1270   m->map_entry = false;
1271   return m;
1272 
1273 err1:
1274   upb_strtable_uninit(&m->ntof);
1275 err2:
1276   upb_inttable_uninit(&m->itof);
1277 err3:
1278   free(m);
1279   return NULL;
1280 }
1281 
upb_msgdef_dup(const upb_msgdef * m,const void * owner)1282 upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner) {
1283   bool ok;
1284   upb_msg_field_iter i;
1285   upb_msg_oneof_iter o;
1286 
1287   upb_msgdef *newm = upb_msgdef_new(owner);
1288   if (!newm) return NULL;
1289   ok = upb_def_setfullname(upb_msgdef_upcast_mutable(newm),
1290                            upb_def_fullname(upb_msgdef_upcast(m)),
1291                            NULL);
1292   newm->map_entry = m->map_entry;
1293   UPB_ASSERT_VAR(ok, ok);
1294   for(upb_msg_field_begin(&i, m);
1295       !upb_msg_field_done(&i);
1296       upb_msg_field_next(&i)) {
1297     upb_fielddef *f = upb_fielddef_dup(upb_msg_iter_field(&i), &f);
1298     /* Fields in oneofs are dup'd below. */
1299     if (upb_fielddef_containingoneof(f)) continue;
1300     if (!f || !upb_msgdef_addfield(newm, f, &f, NULL)) {
1301       upb_msgdef_unref(newm, owner);
1302       return NULL;
1303     }
1304   }
1305   for(upb_msg_oneof_begin(&o, m);
1306       !upb_msg_oneof_done(&o);
1307       upb_msg_oneof_next(&o)) {
1308     upb_oneofdef *f = upb_oneofdef_dup(upb_msg_iter_oneof(&o), &f);
1309     if (!f || !upb_msgdef_addoneof(newm, f, &f, NULL)) {
1310       upb_msgdef_unref(newm, owner);
1311       return NULL;
1312     }
1313   }
1314   return newm;
1315 }
1316 
upb_msgdef_freeze(upb_msgdef * m,upb_status * status)1317 bool upb_msgdef_freeze(upb_msgdef *m, upb_status *status) {
1318   upb_def *d = upb_msgdef_upcast_mutable(m);
1319   return upb_def_freeze(&d, 1, status);
1320 }
1321 
upb_msgdef_fullname(const upb_msgdef * m)1322 const char *upb_msgdef_fullname(const upb_msgdef *m) {
1323   return upb_def_fullname(upb_msgdef_upcast(m));
1324 }
1325 
upb_msgdef_setfullname(upb_msgdef * m,const char * fullname,upb_status * s)1326 bool upb_msgdef_setfullname(upb_msgdef *m, const char *fullname,
1327                             upb_status *s) {
1328   return upb_def_setfullname(upb_msgdef_upcast_mutable(m), fullname, s);
1329 }
1330 
1331 /* Helper: check that the field |f| is safe to add to msgdef |m|. Set an error
1332  * on status |s| and return false if not. */
check_field_add(const upb_msgdef * m,const upb_fielddef * f,upb_status * s)1333 static bool check_field_add(const upb_msgdef *m, const upb_fielddef *f,
1334                             upb_status *s) {
1335   if (upb_fielddef_containingtype(f) != NULL) {
1336     upb_status_seterrmsg(s, "fielddef already belongs to a message");
1337     return false;
1338   } else if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) {
1339     upb_status_seterrmsg(s, "field name or number were not set");
1340     return false;
1341   } else if (upb_msgdef_ntofz(m, upb_fielddef_name(f)) ||
1342              upb_msgdef_itof(m, upb_fielddef_number(f))) {
1343     upb_status_seterrmsg(s, "duplicate field name or number for field");
1344     return false;
1345   }
1346   return true;
1347 }
1348 
add_field(upb_msgdef * m,upb_fielddef * f,const void * ref_donor)1349 static void add_field(upb_msgdef *m, upb_fielddef *f, const void *ref_donor) {
1350   release_containingtype(f);
1351   f->msg.def = m;
1352   f->msg_is_symbolic = false;
1353   upb_inttable_insert(&m->itof, upb_fielddef_number(f), upb_value_ptr(f));
1354   upb_strtable_insert(&m->ntof, upb_fielddef_name(f), upb_value_ptr(f));
1355   upb_ref2(f, m);
1356   upb_ref2(m, f);
1357   if (ref_donor) upb_fielddef_unref(f, ref_donor);
1358 }
1359 
upb_msgdef_addfield(upb_msgdef * m,upb_fielddef * f,const void * ref_donor,upb_status * s)1360 bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f, const void *ref_donor,
1361                          upb_status *s) {
1362   /* TODO: extensions need to have a separate namespace, because proto2 allows a
1363    * top-level extension (ie. one not in any package) to have the same name as a
1364    * field from the message.
1365    *
1366    * This also implies that there needs to be a separate lookup-by-name method
1367    * for extensions.  It seems desirable for iteration to return both extensions
1368    * and non-extensions though.
1369    *
1370    * We also need to validate that the field number is in an extension range iff
1371    * it is an extension.
1372    *
1373    * This method is idempotent. Check if |f| is already part of this msgdef and
1374    * return immediately if so. */
1375   if (upb_fielddef_containingtype(f) == m) {
1376     return true;
1377   }
1378 
1379   /* Check constraints for all fields before performing any action. */
1380   if (!check_field_add(m, f, s)) {
1381     return false;
1382   } else if (upb_fielddef_containingoneof(f) != NULL) {
1383     /* Fields in a oneof can only be added by adding the oneof to the msgdef. */
1384     upb_status_seterrmsg(s, "fielddef is part of a oneof");
1385     return false;
1386   }
1387 
1388   /* Constraint checks ok, perform the action. */
1389   add_field(m, f, ref_donor);
1390   return true;
1391 }
1392 
upb_msgdef_addoneof(upb_msgdef * m,upb_oneofdef * o,const void * ref_donor,upb_status * s)1393 bool upb_msgdef_addoneof(upb_msgdef *m, upb_oneofdef *o, const void *ref_donor,
1394                          upb_status *s) {
1395   upb_oneof_iter it;
1396 
1397   /* Check various conditions that would prevent this oneof from being added. */
1398   if (upb_oneofdef_containingtype(o)) {
1399     upb_status_seterrmsg(s, "oneofdef already belongs to a message");
1400     return false;
1401   } else if (upb_oneofdef_name(o) == NULL) {
1402     upb_status_seterrmsg(s, "oneofdef name was not set");
1403     return false;
1404   } else if (upb_msgdef_ntooz(m, upb_oneofdef_name(o))) {
1405     upb_status_seterrmsg(s, "duplicate oneof name");
1406     return false;
1407   }
1408 
1409   /* Check that all of the oneof's fields do not conflict with names or numbers
1410    * of fields already in the message. */
1411   for (upb_oneof_begin(&it, o); !upb_oneof_done(&it); upb_oneof_next(&it)) {
1412     const upb_fielddef *f = upb_oneof_iter_field(&it);
1413     if (!check_field_add(m, f, s)) {
1414       return false;
1415     }
1416   }
1417 
1418   /* Everything checks out -- commit now. */
1419 
1420   /* Add oneof itself first. */
1421   o->parent = m;
1422   upb_strtable_insert(&m->ntoo, upb_oneofdef_name(o), upb_value_ptr(o));
1423   upb_ref2(o, m);
1424   upb_ref2(m, o);
1425 
1426   /* Add each field of the oneof directly to the msgdef. */
1427   for (upb_oneof_begin(&it, o); !upb_oneof_done(&it); upb_oneof_next(&it)) {
1428     upb_fielddef *f = upb_oneof_iter_field(&it);
1429     add_field(m, f, NULL);
1430   }
1431 
1432   if (ref_donor) upb_oneofdef_unref(o, ref_donor);
1433 
1434   return true;
1435 }
1436 
upb_msgdef_itof(const upb_msgdef * m,uint32_t i)1437 const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) {
1438   upb_value val;
1439   return upb_inttable_lookup32(&m->itof, i, &val) ?
1440       upb_value_getptr(val) : NULL;
1441 }
1442 
upb_msgdef_ntof(const upb_msgdef * m,const char * name,size_t len)1443 const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name,
1444                                     size_t len) {
1445   upb_value val;
1446   return upb_strtable_lookup2(&m->ntof, name, len, &val) ?
1447       upb_value_getptr(val) : NULL;
1448 }
1449 
upb_msgdef_ntoo(const upb_msgdef * m,const char * name,size_t len)1450 const upb_oneofdef *upb_msgdef_ntoo(const upb_msgdef *m, const char *name,
1451                                     size_t len) {
1452   upb_value val;
1453   return upb_strtable_lookup2(&m->ntoo, name, len, &val) ?
1454       upb_value_getptr(val) : NULL;
1455 }
1456 
upb_msgdef_numfields(const upb_msgdef * m)1457 int upb_msgdef_numfields(const upb_msgdef *m) {
1458   return upb_strtable_count(&m->ntof);
1459 }
1460 
upb_msgdef_numoneofs(const upb_msgdef * m)1461 int upb_msgdef_numoneofs(const upb_msgdef *m) {
1462   return upb_strtable_count(&m->ntoo);
1463 }
1464 
upb_msgdef_setmapentry(upb_msgdef * m,bool map_entry)1465 void upb_msgdef_setmapentry(upb_msgdef *m, bool map_entry) {
1466   assert(!upb_msgdef_isfrozen(m));
1467   m->map_entry = map_entry;
1468 }
1469 
upb_msgdef_mapentry(const upb_msgdef * m)1470 bool upb_msgdef_mapentry(const upb_msgdef *m) {
1471   return m->map_entry;
1472 }
1473 
upb_msg_field_begin(upb_msg_field_iter * iter,const upb_msgdef * m)1474 void upb_msg_field_begin(upb_msg_field_iter *iter, const upb_msgdef *m) {
1475   upb_inttable_begin(iter, &m->itof);
1476 }
1477 
upb_msg_field_next(upb_msg_field_iter * iter)1478 void upb_msg_field_next(upb_msg_field_iter *iter) { upb_inttable_next(iter); }
1479 
upb_msg_field_done(const upb_msg_field_iter * iter)1480 bool upb_msg_field_done(const upb_msg_field_iter *iter) {
1481   return upb_inttable_done(iter);
1482 }
1483 
upb_msg_iter_field(const upb_msg_field_iter * iter)1484 upb_fielddef *upb_msg_iter_field(const upb_msg_field_iter *iter) {
1485   return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter));
1486 }
1487 
upb_msg_field_iter_setdone(upb_msg_field_iter * iter)1488 void upb_msg_field_iter_setdone(upb_msg_field_iter *iter) {
1489   upb_inttable_iter_setdone(iter);
1490 }
1491 
upb_msg_oneof_begin(upb_msg_oneof_iter * iter,const upb_msgdef * m)1492 void upb_msg_oneof_begin(upb_msg_oneof_iter *iter, const upb_msgdef *m) {
1493   upb_strtable_begin(iter, &m->ntoo);
1494 }
1495 
upb_msg_oneof_next(upb_msg_oneof_iter * iter)1496 void upb_msg_oneof_next(upb_msg_oneof_iter *iter) { upb_strtable_next(iter); }
1497 
upb_msg_oneof_done(const upb_msg_oneof_iter * iter)1498 bool upb_msg_oneof_done(const upb_msg_oneof_iter *iter) {
1499   return upb_strtable_done(iter);
1500 }
1501 
upb_msg_iter_oneof(const upb_msg_oneof_iter * iter)1502 upb_oneofdef *upb_msg_iter_oneof(const upb_msg_oneof_iter *iter) {
1503   return (upb_oneofdef*)upb_value_getptr(upb_strtable_iter_value(iter));
1504 }
1505 
upb_msg_oneof_iter_setdone(upb_msg_oneof_iter * iter)1506 void upb_msg_oneof_iter_setdone(upb_msg_oneof_iter *iter) {
1507   upb_strtable_iter_setdone(iter);
1508 }
1509 
1510 /* upb_oneofdef ***************************************************************/
1511 
visitoneof(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)1512 static void visitoneof(const upb_refcounted *r, upb_refcounted_visit *visit,
1513                        void *closure) {
1514   const upb_oneofdef *o = (const upb_oneofdef*)r;
1515   upb_oneof_iter i;
1516   for (upb_oneof_begin(&i, o); !upb_oneof_done(&i); upb_oneof_next(&i)) {
1517     const upb_fielddef *f = upb_oneof_iter_field(&i);
1518     visit(r, upb_fielddef_upcast2(f), closure);
1519   }
1520   if (o->parent) {
1521     visit(r, upb_msgdef_upcast2(o->parent), closure);
1522   }
1523 }
1524 
freeoneof(upb_refcounted * r)1525 static void freeoneof(upb_refcounted *r) {
1526   upb_oneofdef *o = (upb_oneofdef*)r;
1527   upb_strtable_uninit(&o->ntof);
1528   upb_inttable_uninit(&o->itof);
1529   upb_def_uninit(upb_oneofdef_upcast_mutable(o));
1530   free(o);
1531 }
1532 
upb_oneofdef_new(const void * owner)1533 upb_oneofdef *upb_oneofdef_new(const void *owner) {
1534   static const struct upb_refcounted_vtbl vtbl = {visitoneof, freeoneof};
1535   upb_oneofdef *o = malloc(sizeof(*o));
1536   o->parent = NULL;
1537   if (!o) return NULL;
1538   if (!upb_def_init(upb_oneofdef_upcast_mutable(o), UPB_DEF_ONEOF, &vtbl,
1539                     owner))
1540     goto err2;
1541   if (!upb_inttable_init(&o->itof, UPB_CTYPE_PTR)) goto err2;
1542   if (!upb_strtable_init(&o->ntof, UPB_CTYPE_PTR)) goto err1;
1543   return o;
1544 
1545 err1:
1546   upb_inttable_uninit(&o->itof);
1547 err2:
1548   free(o);
1549   return NULL;
1550 }
1551 
upb_oneofdef_dup(const upb_oneofdef * o,const void * owner)1552 upb_oneofdef *upb_oneofdef_dup(const upb_oneofdef *o, const void *owner) {
1553   bool ok;
1554   upb_oneof_iter i;
1555   upb_oneofdef *newo = upb_oneofdef_new(owner);
1556   if (!newo) return NULL;
1557   ok = upb_def_setfullname(upb_oneofdef_upcast_mutable(newo),
1558                            upb_def_fullname(upb_oneofdef_upcast(o)), NULL);
1559   UPB_ASSERT_VAR(ok, ok);
1560   for (upb_oneof_begin(&i, o); !upb_oneof_done(&i); upb_oneof_next(&i)) {
1561     upb_fielddef *f = upb_fielddef_dup(upb_oneof_iter_field(&i), &f);
1562     if (!f || !upb_oneofdef_addfield(newo, f, &f, NULL)) {
1563       upb_oneofdef_unref(newo, owner);
1564       return NULL;
1565     }
1566   }
1567   return newo;
1568 }
1569 
upb_oneofdef_name(const upb_oneofdef * o)1570 const char *upb_oneofdef_name(const upb_oneofdef *o) {
1571   return upb_def_fullname(upb_oneofdef_upcast(o));
1572 }
1573 
upb_oneofdef_setname(upb_oneofdef * o,const char * fullname,upb_status * s)1574 bool upb_oneofdef_setname(upb_oneofdef *o, const char *fullname,
1575                              upb_status *s) {
1576   if (upb_oneofdef_containingtype(o)) {
1577     upb_status_seterrmsg(s, "oneof already added to a message");
1578     return false;
1579   }
1580   return upb_def_setfullname(upb_oneofdef_upcast_mutable(o), fullname, s);
1581 }
1582 
upb_oneofdef_containingtype(const upb_oneofdef * o)1583 const upb_msgdef *upb_oneofdef_containingtype(const upb_oneofdef *o) {
1584   return o->parent;
1585 }
1586 
upb_oneofdef_numfields(const upb_oneofdef * o)1587 int upb_oneofdef_numfields(const upb_oneofdef *o) {
1588   return upb_strtable_count(&o->ntof);
1589 }
1590 
upb_oneofdef_addfield(upb_oneofdef * o,upb_fielddef * f,const void * ref_donor,upb_status * s)1591 bool upb_oneofdef_addfield(upb_oneofdef *o, upb_fielddef *f,
1592                            const void *ref_donor,
1593                            upb_status *s) {
1594   assert(!upb_oneofdef_isfrozen(o));
1595   assert(!o->parent || !upb_msgdef_isfrozen(o->parent));
1596 
1597   /* This method is idempotent. Check if |f| is already part of this oneofdef
1598    * and return immediately if so. */
1599   if (upb_fielddef_containingoneof(f) == o) {
1600     return true;
1601   }
1602 
1603   /* The field must have an OPTIONAL label. */
1604   if (upb_fielddef_label(f) != UPB_LABEL_OPTIONAL) {
1605     upb_status_seterrmsg(s, "fields in oneof must have OPTIONAL label");
1606     return false;
1607   }
1608 
1609   /* Check that no field with this name or number exists already in the oneof.
1610    * Also check that the field is not already part of a oneof. */
1611   if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) {
1612     upb_status_seterrmsg(s, "field name or number were not set");
1613     return false;
1614   } else if (upb_oneofdef_itof(o, upb_fielddef_number(f)) ||
1615              upb_oneofdef_ntofz(o, upb_fielddef_name(f))) {
1616     upb_status_seterrmsg(s, "duplicate field name or number");
1617     return false;
1618   } else if (upb_fielddef_containingoneof(f) != NULL) {
1619     upb_status_seterrmsg(s, "fielddef already belongs to a oneof");
1620     return false;
1621   }
1622 
1623   /* We allow adding a field to the oneof either if the field is not part of a
1624    * msgdef, or if it is and we are also part of the same msgdef. */
1625   if (o->parent == NULL) {
1626     /* If we're not in a msgdef, the field cannot be either. Otherwise we would
1627      * need to magically add this oneof to a msgdef to remain consistent, which
1628      * is surprising behavior. */
1629     if (upb_fielddef_containingtype(f) != NULL) {
1630       upb_status_seterrmsg(s, "fielddef already belongs to a message, but "
1631                               "oneof does not");
1632       return false;
1633     }
1634   } else {
1635     /* If we're in a msgdef, the user can add fields that either aren't in any
1636      * msgdef (in which case they're added to our msgdef) or already a part of
1637      * our msgdef. */
1638     if (upb_fielddef_containingtype(f) != NULL &&
1639         upb_fielddef_containingtype(f) != o->parent) {
1640       upb_status_seterrmsg(s, "fielddef belongs to a different message "
1641                               "than oneof");
1642       return false;
1643     }
1644   }
1645 
1646   /* Commit phase. First add the field to our parent msgdef, if any, because
1647    * that may fail; then add the field to our own tables. */
1648 
1649   if (o->parent != NULL && upb_fielddef_containingtype(f) == NULL) {
1650     if (!upb_msgdef_addfield((upb_msgdef*)o->parent, f, NULL, s)) {
1651       return false;
1652     }
1653   }
1654 
1655   release_containingtype(f);
1656   f->oneof = o;
1657   upb_inttable_insert(&o->itof, upb_fielddef_number(f), upb_value_ptr(f));
1658   upb_strtable_insert(&o->ntof, upb_fielddef_name(f), upb_value_ptr(f));
1659   upb_ref2(f, o);
1660   upb_ref2(o, f);
1661   if (ref_donor) upb_fielddef_unref(f, ref_donor);
1662 
1663   return true;
1664 }
1665 
upb_oneofdef_ntof(const upb_oneofdef * o,const char * name,size_t length)1666 const upb_fielddef *upb_oneofdef_ntof(const upb_oneofdef *o,
1667                                       const char *name, size_t length) {
1668   upb_value val;
1669   return upb_strtable_lookup2(&o->ntof, name, length, &val) ?
1670       upb_value_getptr(val) : NULL;
1671 }
1672 
upb_oneofdef_itof(const upb_oneofdef * o,uint32_t num)1673 const upb_fielddef *upb_oneofdef_itof(const upb_oneofdef *o, uint32_t num) {
1674   upb_value val;
1675   return upb_inttable_lookup32(&o->itof, num, &val) ?
1676       upb_value_getptr(val) : NULL;
1677 }
1678 
upb_oneof_begin(upb_oneof_iter * iter,const upb_oneofdef * o)1679 void upb_oneof_begin(upb_oneof_iter *iter, const upb_oneofdef *o) {
1680   upb_inttable_begin(iter, &o->itof);
1681 }
1682 
upb_oneof_next(upb_oneof_iter * iter)1683 void upb_oneof_next(upb_oneof_iter *iter) {
1684   upb_inttable_next(iter);
1685 }
1686 
upb_oneof_done(upb_oneof_iter * iter)1687 bool upb_oneof_done(upb_oneof_iter *iter) {
1688   return upb_inttable_done(iter);
1689 }
1690 
upb_oneof_iter_field(const upb_oneof_iter * iter)1691 upb_fielddef *upb_oneof_iter_field(const upb_oneof_iter *iter) {
1692   return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter));
1693 }
1694 
upb_oneof_iter_setdone(upb_oneof_iter * iter)1695 void upb_oneof_iter_setdone(upb_oneof_iter *iter) {
1696   upb_inttable_iter_setdone(iter);
1697 }
1698 
1699 
1700 #include <stdlib.h>
1701 #include <stdio.h>
1702 #include <string.h>
1703 
1704 typedef struct cleanup_ent {
1705   upb_cleanup_func *cleanup;
1706   void *ud;
1707   struct cleanup_ent *next;
1708 } cleanup_ent;
1709 
1710 static void *seeded_alloc(void *ud, void *ptr, size_t oldsize, size_t size);
1711 
1712 /* Default allocator **********************************************************/
1713 
1714 /* Just use realloc, keeping all allocated blocks in a linked list to destroy at
1715  * the end. */
1716 
1717 typedef struct mem_block {
1718   /* List is doubly-linked, because in cases where realloc() moves an existing
1719    * block, we need to be able to remove the old pointer from the list
1720    * efficiently. */
1721   struct mem_block *prev, *next;
1722 #ifndef NDEBUG
1723   size_t size;  /* Doesn't include mem_block structure. */
1724 #endif
1725 } mem_block;
1726 
1727 typedef struct {
1728   mem_block *head;
1729 } default_alloc_ud;
1730 
default_alloc(void * _ud,void * ptr,size_t oldsize,size_t size)1731 static void *default_alloc(void *_ud, void *ptr, size_t oldsize, size_t size) {
1732   default_alloc_ud *ud = _ud;
1733   mem_block *from, *block;
1734   void *ret;
1735   UPB_UNUSED(oldsize);
1736 
1737   from = ptr ? (void*)((char*)ptr - sizeof(mem_block)) : NULL;
1738 
1739 #ifndef NDEBUG
1740   if (from) {
1741     assert(oldsize <= from->size);
1742   }
1743 #endif
1744 
1745   /* TODO(haberman): we probably need to provide even better alignment here,
1746    * like 16-byte alignment of the returned data pointer. */
1747   block = realloc(from, size + sizeof(mem_block));
1748   if (!block) return NULL;
1749   ret = (char*)block + sizeof(*block);
1750 
1751 #ifndef NDEBUG
1752   block->size = size;
1753 #endif
1754 
1755   if (from) {
1756     if (block != from) {
1757       /* The block was moved, so pointers in next and prev blocks must be
1758        * updated to its new location. */
1759       if (block->next) block->next->prev = block;
1760       if (block->prev) block->prev->next = block;
1761       if (ud->head == from) ud->head = block;
1762     }
1763   } else {
1764     /* Insert at head of linked list. */
1765     block->prev = NULL;
1766     block->next = ud->head;
1767     if (block->next) block->next->prev = block;
1768     ud->head = block;
1769   }
1770 
1771   return ret;
1772 }
1773 
default_alloc_cleanup(void * _ud)1774 static void default_alloc_cleanup(void *_ud) {
1775   default_alloc_ud *ud = _ud;
1776   mem_block *block = ud->head;
1777 
1778   while (block) {
1779     void *to_free = block;
1780     block = block->next;
1781     free(to_free);
1782   }
1783 }
1784 
1785 
1786 /* Standard error functions ***************************************************/
1787 
default_err(void * ud,const upb_status * status)1788 static bool default_err(void *ud, const upb_status *status) {
1789   UPB_UNUSED(ud);
1790   UPB_UNUSED(status);
1791   return false;
1792 }
1793 
write_err_to(void * ud,const upb_status * status)1794 static bool write_err_to(void *ud, const upb_status *status) {
1795   upb_status *copy_to = ud;
1796   upb_status_copy(copy_to, status);
1797   return false;
1798 }
1799 
1800 
1801 /* upb_env ********************************************************************/
1802 
upb_env_init(upb_env * e)1803 void upb_env_init(upb_env *e) {
1804   default_alloc_ud *ud = (default_alloc_ud*)&e->default_alloc_ud;
1805   e->ok_ = true;
1806   e->bytes_allocated = 0;
1807   e->cleanup_head = NULL;
1808 
1809   ud->head = NULL;
1810 
1811   /* Set default functions. */
1812   upb_env_setallocfunc(e, default_alloc, ud);
1813   upb_env_seterrorfunc(e, default_err, NULL);
1814 }
1815 
upb_env_uninit(upb_env * e)1816 void upb_env_uninit(upb_env *e) {
1817   cleanup_ent *ent = e->cleanup_head;
1818 
1819   while (ent) {
1820     ent->cleanup(ent->ud);
1821     ent = ent->next;
1822   }
1823 
1824   /* Must do this after running cleanup functions, because this will delete
1825      the memory we store our cleanup entries in! */
1826   if (e->alloc == default_alloc) {
1827     default_alloc_cleanup(e->alloc_ud);
1828   }
1829 }
1830 
upb_env_setallocfunc(upb_env * e,upb_alloc_func * alloc,void * ud)1831 UPB_FORCEINLINE void upb_env_setallocfunc(upb_env *e, upb_alloc_func *alloc,
1832                                           void *ud) {
1833   e->alloc = alloc;
1834   e->alloc_ud = ud;
1835 }
1836 
upb_env_seterrorfunc(upb_env * e,upb_error_func * func,void * ud)1837 UPB_FORCEINLINE void upb_env_seterrorfunc(upb_env *e, upb_error_func *func,
1838                                           void *ud) {
1839   e->err = func;
1840   e->err_ud = ud;
1841 }
1842 
upb_env_reporterrorsto(upb_env * e,upb_status * status)1843 void upb_env_reporterrorsto(upb_env *e, upb_status *status) {
1844   e->err = write_err_to;
1845   e->err_ud = status;
1846 }
1847 
upb_env_ok(const upb_env * e)1848 bool upb_env_ok(const upb_env *e) {
1849   return e->ok_;
1850 }
1851 
upb_env_reporterror(upb_env * e,const upb_status * status)1852 bool upb_env_reporterror(upb_env *e, const upb_status *status) {
1853   e->ok_ = false;
1854   return e->err(e->err_ud, status);
1855 }
1856 
upb_env_addcleanup(upb_env * e,upb_cleanup_func * func,void * ud)1857 bool upb_env_addcleanup(upb_env *e, upb_cleanup_func *func, void *ud) {
1858   cleanup_ent *ent = upb_env_malloc(e, sizeof(cleanup_ent));
1859   if (!ent) return false;
1860 
1861   ent->cleanup = func;
1862   ent->ud = ud;
1863   ent->next = e->cleanup_head;
1864   e->cleanup_head = ent;
1865 
1866   return true;
1867 }
1868 
upb_env_malloc(upb_env * e,size_t size)1869 void *upb_env_malloc(upb_env *e, size_t size) {
1870   e->bytes_allocated += size;
1871   if (e->alloc == seeded_alloc) {
1872     /* This is equivalent to the next branch, but allows inlining for a
1873      * measurable perf benefit. */
1874     return seeded_alloc(e->alloc_ud, NULL, 0, size);
1875   } else {
1876     return e->alloc(e->alloc_ud, NULL, 0, size);
1877   }
1878 }
1879 
upb_env_realloc(upb_env * e,void * ptr,size_t oldsize,size_t size)1880 void *upb_env_realloc(upb_env *e, void *ptr, size_t oldsize, size_t size) {
1881   char *ret;
1882   assert(oldsize <= size);
1883   ret = e->alloc(e->alloc_ud, ptr, oldsize, size);
1884 
1885 #ifndef NDEBUG
1886   /* Overwrite non-preserved memory to ensure callers are passing the oldsize
1887    * that they truly require. */
1888   memset(ret + oldsize, 0xff, size - oldsize);
1889 #endif
1890 
1891   return ret;
1892 }
1893 
upb_env_bytesallocated(const upb_env * e)1894 size_t upb_env_bytesallocated(const upb_env *e) {
1895   return e->bytes_allocated;
1896 }
1897 
1898 
1899 /* upb_seededalloc ************************************************************/
1900 
1901 /* Be conservative and choose 16 in case anyone is using SSE. */
1902 static const size_t maxalign = 16;
1903 
align_up(size_t size)1904 static size_t align_up(size_t size) {
1905   return ((size + maxalign - 1) / maxalign) * maxalign;
1906 }
1907 
seeded_alloc(void * ud,void * ptr,size_t oldsize,size_t size)1908 UPB_FORCEINLINE static void *seeded_alloc(void *ud, void *ptr, size_t oldsize,
1909                                           size_t size) {
1910   upb_seededalloc *a = ud;
1911 
1912   size = align_up(size);
1913 
1914   assert(a->mem_limit >= a->mem_ptr);
1915 
1916   if (oldsize == 0 && size <= (size_t)(a->mem_limit - a->mem_ptr)) {
1917     /* Fast path: we can satisfy from the initial allocation. */
1918     void *ret = a->mem_ptr;
1919     a->mem_ptr += size;
1920     return ret;
1921   } else {
1922     char *chptr = ptr;
1923     /* Slow path: fallback to other allocator. */
1924     a->need_cleanup = true;
1925     /* Is `ptr` part of the user-provided initial block? Don't pass it to the
1926      * default allocator if so; otherwise, it may try to realloc() the block. */
1927     if (chptr >= a->mem_base && chptr < a->mem_limit) {
1928       void *ret;
1929       assert(chptr + oldsize <= a->mem_limit);
1930       ret = a->alloc(a->alloc_ud, NULL, 0, size);
1931       if (ret) memcpy(ret, ptr, oldsize);
1932       return ret;
1933     } else {
1934       return a->alloc(a->alloc_ud, ptr, oldsize, size);
1935     }
1936   }
1937 }
1938 
upb_seededalloc_init(upb_seededalloc * a,void * mem,size_t len)1939 void upb_seededalloc_init(upb_seededalloc *a, void *mem, size_t len) {
1940   default_alloc_ud *ud = (default_alloc_ud*)&a->default_alloc_ud;
1941   a->mem_base = mem;
1942   a->mem_ptr = mem;
1943   a->mem_limit = (char*)mem + len;
1944   a->need_cleanup = false;
1945   a->returned_allocfunc = false;
1946 
1947   ud->head = NULL;
1948 
1949   upb_seededalloc_setfallbackalloc(a, default_alloc, ud);
1950 }
1951 
upb_seededalloc_uninit(upb_seededalloc * a)1952 void upb_seededalloc_uninit(upb_seededalloc *a) {
1953   if (a->alloc == default_alloc && a->need_cleanup) {
1954     default_alloc_cleanup(a->alloc_ud);
1955   }
1956 }
1957 
upb_seededalloc_setfallbackalloc(upb_seededalloc * a,upb_alloc_func * alloc,void * ud)1958 UPB_FORCEINLINE void upb_seededalloc_setfallbackalloc(upb_seededalloc *a,
1959                                                       upb_alloc_func *alloc,
1960                                                       void *ud) {
1961   assert(!a->returned_allocfunc);
1962   a->alloc = alloc;
1963   a->alloc_ud = ud;
1964 }
1965 
upb_seededalloc_getallocfunc(upb_seededalloc * a)1966 upb_alloc_func *upb_seededalloc_getallocfunc(upb_seededalloc *a) {
1967   a->returned_allocfunc = true;
1968   return seeded_alloc;
1969 }
1970 /*
1971 ** TODO(haberman): it's unclear whether a lot of the consistency checks should
1972 ** assert() or return false.
1973 */
1974 
1975 
1976 #include <stdlib.h>
1977 #include <string.h>
1978 
1979 
1980 
1981 /* Defined for the sole purpose of having a unique pointer value for
1982  * UPB_NO_CLOSURE. */
1983 char _upb_noclosure;
1984 
freehandlers(upb_refcounted * r)1985 static void freehandlers(upb_refcounted *r) {
1986   upb_handlers *h = (upb_handlers*)r;
1987 
1988   upb_inttable_iter i;
1989   upb_inttable_begin(&i, &h->cleanup_);
1990   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
1991     void *val = (void*)upb_inttable_iter_key(&i);
1992     upb_value func_val = upb_inttable_iter_value(&i);
1993     upb_handlerfree *func = upb_value_getfptr(func_val);
1994     func(val);
1995   }
1996 
1997   upb_inttable_uninit(&h->cleanup_);
1998   upb_msgdef_unref(h->msg, h);
1999   free(h->sub);
2000   free(h);
2001 }
2002 
visithandlers(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)2003 static void visithandlers(const upb_refcounted *r, upb_refcounted_visit *visit,
2004                           void *closure) {
2005   const upb_handlers *h = (const upb_handlers*)r;
2006   upb_msg_field_iter i;
2007   for(upb_msg_field_begin(&i, h->msg);
2008       !upb_msg_field_done(&i);
2009       upb_msg_field_next(&i)) {
2010     upb_fielddef *f = upb_msg_iter_field(&i);
2011     const upb_handlers *sub;
2012     if (!upb_fielddef_issubmsg(f)) continue;
2013     sub = upb_handlers_getsubhandlers(h, f);
2014     if (sub) visit(r, upb_handlers_upcast(sub), closure);
2015   }
2016 }
2017 
2018 static const struct upb_refcounted_vtbl vtbl = {visithandlers, freehandlers};
2019 
2020 typedef struct {
2021   upb_inttable tab;  /* maps upb_msgdef* -> upb_handlers*. */
2022   upb_handlers_callback *callback;
2023   const void *closure;
2024 } dfs_state;
2025 
2026 /* TODO(haberman): discard upb_handlers* objects that do not actually have any
2027  * handlers set and cannot reach any upb_handlers* object that does.  This is
2028  * slightly tricky to do correctly. */
newformsg(const upb_msgdef * m,const void * owner,dfs_state * s)2029 static upb_handlers *newformsg(const upb_msgdef *m, const void *owner,
2030                                dfs_state *s) {
2031   upb_msg_field_iter i;
2032   upb_handlers *h = upb_handlers_new(m, owner);
2033   if (!h) return NULL;
2034   if (!upb_inttable_insertptr(&s->tab, m, upb_value_ptr(h))) goto oom;
2035 
2036   s->callback(s->closure, h);
2037 
2038   /* For each submessage field, get or create a handlers object and set it as
2039    * the subhandlers. */
2040   for(upb_msg_field_begin(&i, m);
2041       !upb_msg_field_done(&i);
2042       upb_msg_field_next(&i)) {
2043     upb_fielddef *f = upb_msg_iter_field(&i);
2044     const upb_msgdef *subdef;
2045     upb_value subm_ent;
2046 
2047     if (!upb_fielddef_issubmsg(f)) continue;
2048 
2049     subdef = upb_downcast_msgdef(upb_fielddef_subdef(f));
2050     if (upb_inttable_lookupptr(&s->tab, subdef, &subm_ent)) {
2051       upb_handlers_setsubhandlers(h, f, upb_value_getptr(subm_ent));
2052     } else {
2053       upb_handlers *sub_mh = newformsg(subdef, &sub_mh, s);
2054       if (!sub_mh) goto oom;
2055       upb_handlers_setsubhandlers(h, f, sub_mh);
2056       upb_handlers_unref(sub_mh, &sub_mh);
2057     }
2058   }
2059   return h;
2060 
2061 oom:
2062   upb_handlers_unref(h, owner);
2063   return NULL;
2064 }
2065 
2066 /* Given a selector for a STARTSUBMSG handler, resolves to a pointer to the
2067  * subhandlers for this submessage field. */
2068 #define SUBH(h, selector) (h->sub[selector])
2069 
2070 /* The selector for a submessage field is the field index. */
2071 #define SUBH_F(h, f) SUBH(h, f->index_)
2072 
trygetsel(upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)2073 static int32_t trygetsel(upb_handlers *h, const upb_fielddef *f,
2074                          upb_handlertype_t type) {
2075   upb_selector_t sel;
2076   assert(!upb_handlers_isfrozen(h));
2077   if (upb_handlers_msgdef(h) != upb_fielddef_containingtype(f)) {
2078     upb_status_seterrf(
2079         &h->status_, "type mismatch: field %s does not belong to message %s",
2080         upb_fielddef_name(f), upb_msgdef_fullname(upb_handlers_msgdef(h)));
2081     return -1;
2082   }
2083   if (!upb_handlers_getselector(f, type, &sel)) {
2084     upb_status_seterrf(
2085         &h->status_,
2086         "type mismatch: cannot register handler type %d for field %s",
2087         type, upb_fielddef_name(f));
2088     return -1;
2089   }
2090   return sel;
2091 }
2092 
handlers_getsel(upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)2093 static upb_selector_t handlers_getsel(upb_handlers *h, const upb_fielddef *f,
2094                              upb_handlertype_t type) {
2095   int32_t sel = trygetsel(h, f, type);
2096   assert(sel >= 0);
2097   return sel;
2098 }
2099 
returntype(upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)2100 static const void **returntype(upb_handlers *h, const upb_fielddef *f,
2101                                upb_handlertype_t type) {
2102   return &h->table[handlers_getsel(h, f, type)].attr.return_closure_type_;
2103 }
2104 
doset(upb_handlers * h,int32_t sel,const upb_fielddef * f,upb_handlertype_t type,upb_func * func,upb_handlerattr * attr)2105 static bool doset(upb_handlers *h, int32_t sel, const upb_fielddef *f,
2106                   upb_handlertype_t type, upb_func *func,
2107                   upb_handlerattr *attr) {
2108   upb_handlerattr set_attr = UPB_HANDLERATTR_INITIALIZER;
2109   const void *closure_type;
2110   const void **context_closure_type;
2111 
2112   assert(!upb_handlers_isfrozen(h));
2113 
2114   if (sel < 0) {
2115     upb_status_seterrmsg(&h->status_,
2116                          "incorrect handler type for this field.");
2117     return false;
2118   }
2119 
2120   if (h->table[sel].func) {
2121     upb_status_seterrmsg(&h->status_,
2122                          "cannot change handler once it has been set.");
2123     return false;
2124   }
2125 
2126   if (attr) {
2127     set_attr = *attr;
2128   }
2129 
2130   /* Check that the given closure type matches the closure type that has been
2131    * established for this context (if any). */
2132   closure_type = upb_handlerattr_closuretype(&set_attr);
2133 
2134   if (type == UPB_HANDLER_STRING) {
2135     context_closure_type = returntype(h, f, UPB_HANDLER_STARTSTR);
2136   } else if (f && upb_fielddef_isseq(f) &&
2137              type != UPB_HANDLER_STARTSEQ &&
2138              type != UPB_HANDLER_ENDSEQ) {
2139     context_closure_type = returntype(h, f, UPB_HANDLER_STARTSEQ);
2140   } else {
2141     context_closure_type = &h->top_closure_type;
2142   }
2143 
2144   if (closure_type && *context_closure_type &&
2145       closure_type != *context_closure_type) {
2146     /* TODO(haberman): better message for debugging. */
2147     if (f) {
2148       upb_status_seterrf(&h->status_,
2149                          "closure type does not match for field %s",
2150                          upb_fielddef_name(f));
2151     } else {
2152       upb_status_seterrmsg(
2153           &h->status_, "closure type does not match for message-level handler");
2154     }
2155     return false;
2156   }
2157 
2158   if (closure_type)
2159     *context_closure_type = closure_type;
2160 
2161   /* If this is a STARTSEQ or STARTSTR handler, check that the returned pointer
2162    * matches any pre-existing expectations about what type is expected. */
2163   if (type == UPB_HANDLER_STARTSEQ || type == UPB_HANDLER_STARTSTR) {
2164     const void *return_type = upb_handlerattr_returnclosuretype(&set_attr);
2165     const void *table_return_type =
2166         upb_handlerattr_returnclosuretype(&h->table[sel].attr);
2167     if (return_type && table_return_type && return_type != table_return_type) {
2168       upb_status_seterrmsg(&h->status_, "closure return type does not match");
2169       return false;
2170     }
2171 
2172     if (table_return_type && !return_type)
2173       upb_handlerattr_setreturnclosuretype(&set_attr, table_return_type);
2174   }
2175 
2176   h->table[sel].func = (upb_func*)func;
2177   h->table[sel].attr = set_attr;
2178   return true;
2179 }
2180 
2181 /* Returns the effective closure type for this handler (which will propagate
2182  * from outer frames if this frame has no START* handler).  Not implemented for
2183  * UPB_HANDLER_STRING at the moment since this is not needed.  Returns NULL is
2184  * the effective closure type is unspecified (either no handler was registered
2185  * to specify it or the handler that was registered did not specify the closure
2186  * type). */
effective_closure_type(upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)2187 const void *effective_closure_type(upb_handlers *h, const upb_fielddef *f,
2188                                    upb_handlertype_t type) {
2189   const void *ret;
2190   upb_selector_t sel;
2191 
2192   assert(type != UPB_HANDLER_STRING);
2193   ret = h->top_closure_type;
2194 
2195   if (upb_fielddef_isseq(f) &&
2196       type != UPB_HANDLER_STARTSEQ &&
2197       type != UPB_HANDLER_ENDSEQ &&
2198       h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSEQ)].func) {
2199     ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr);
2200   }
2201 
2202   if (type == UPB_HANDLER_STRING &&
2203       h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSTR)].func) {
2204     ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr);
2205   }
2206 
2207   /* The effective type of the submessage; not used yet.
2208    * if (type == SUBMESSAGE &&
2209    *     h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSUBMSG)].func) {
2210    *   ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr);
2211    * } */
2212 
2213   return ret;
2214 }
2215 
2216 /* Checks whether the START* handler specified by f & type is missing even
2217  * though it is required to convert the established type of an outer frame
2218  * ("closure_type") into the established type of an inner frame (represented in
2219  * the return closure type of this handler's attr. */
checkstart(upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type,upb_status * status)2220 bool checkstart(upb_handlers *h, const upb_fielddef *f, upb_handlertype_t type,
2221                 upb_status *status) {
2222   const void *closure_type;
2223   const upb_handlerattr *attr;
2224   const void *return_closure_type;
2225 
2226   upb_selector_t sel = handlers_getsel(h, f, type);
2227   if (h->table[sel].func) return true;
2228   closure_type = effective_closure_type(h, f, type);
2229   attr = &h->table[sel].attr;
2230   return_closure_type = upb_handlerattr_returnclosuretype(attr);
2231   if (closure_type && return_closure_type &&
2232       closure_type != return_closure_type) {
2233     upb_status_seterrf(status,
2234                        "expected start handler to return sub type for field %f",
2235                        upb_fielddef_name(f));
2236     return false;
2237   }
2238   return true;
2239 }
2240 
2241 /* Public interface ***********************************************************/
2242 
upb_handlers_new(const upb_msgdef * md,const void * owner)2243 upb_handlers *upb_handlers_new(const upb_msgdef *md, const void *owner) {
2244   int extra;
2245   upb_handlers *h;
2246 
2247   assert(upb_msgdef_isfrozen(md));
2248 
2249   extra = sizeof(upb_handlers_tabent) * (md->selector_count - 1);
2250   h = calloc(sizeof(*h) + extra, 1);
2251   if (!h) return NULL;
2252 
2253   h->msg = md;
2254   upb_msgdef_ref(h->msg, h);
2255   upb_status_clear(&h->status_);
2256   h->sub = calloc(md->submsg_field_count, sizeof(*h->sub));
2257   if (!h->sub) goto oom;
2258   if (!upb_refcounted_init(upb_handlers_upcast_mutable(h), &vtbl, owner))
2259     goto oom;
2260   if (!upb_inttable_init(&h->cleanup_, UPB_CTYPE_FPTR)) goto oom;
2261 
2262   /* calloc() above initialized all handlers to NULL. */
2263   return h;
2264 
2265 oom:
2266   freehandlers(upb_handlers_upcast_mutable(h));
2267   return NULL;
2268 }
2269 
upb_handlers_newfrozen(const upb_msgdef * m,const void * owner,upb_handlers_callback * callback,const void * closure)2270 const upb_handlers *upb_handlers_newfrozen(const upb_msgdef *m,
2271                                            const void *owner,
2272                                            upb_handlers_callback *callback,
2273                                            const void *closure) {
2274   dfs_state state;
2275   upb_handlers *ret;
2276   bool ok;
2277   upb_refcounted *r;
2278 
2279   state.callback = callback;
2280   state.closure = closure;
2281   if (!upb_inttable_init(&state.tab, UPB_CTYPE_PTR)) return NULL;
2282 
2283   ret = newformsg(m, owner, &state);
2284 
2285   upb_inttable_uninit(&state.tab);
2286   if (!ret) return NULL;
2287 
2288   r = upb_handlers_upcast_mutable(ret);
2289   ok = upb_refcounted_freeze(&r, 1, NULL, UPB_MAX_HANDLER_DEPTH);
2290   UPB_ASSERT_VAR(ok, ok);
2291 
2292   return ret;
2293 }
2294 
upb_handlers_status(upb_handlers * h)2295 const upb_status *upb_handlers_status(upb_handlers *h) {
2296   assert(!upb_handlers_isfrozen(h));
2297   return &h->status_;
2298 }
2299 
upb_handlers_clearerr(upb_handlers * h)2300 void upb_handlers_clearerr(upb_handlers *h) {
2301   assert(!upb_handlers_isfrozen(h));
2302   upb_status_clear(&h->status_);
2303 }
2304 
2305 #define SETTER(name, handlerctype, handlertype) \
2306   bool upb_handlers_set ## name(upb_handlers *h, const upb_fielddef *f, \
2307                                 handlerctype func, upb_handlerattr *attr) { \
2308     int32_t sel = trygetsel(h, f, handlertype); \
2309     return doset(h, sel, f, handlertype, (upb_func*)func, attr); \
2310   }
2311 
SETTER(int32,upb_int32_handlerfunc *,UPB_HANDLER_INT32)2312 SETTER(int32,       upb_int32_handlerfunc*,       UPB_HANDLER_INT32)
2313 SETTER(int64,       upb_int64_handlerfunc*,       UPB_HANDLER_INT64)
2314 SETTER(uint32,      upb_uint32_handlerfunc*,      UPB_HANDLER_UINT32)
2315 SETTER(uint64,      upb_uint64_handlerfunc*,      UPB_HANDLER_UINT64)
2316 SETTER(float,       upb_float_handlerfunc*,       UPB_HANDLER_FLOAT)
2317 SETTER(double,      upb_double_handlerfunc*,      UPB_HANDLER_DOUBLE)
2318 SETTER(bool,        upb_bool_handlerfunc*,        UPB_HANDLER_BOOL)
2319 SETTER(startstr,    upb_startstr_handlerfunc*,    UPB_HANDLER_STARTSTR)
2320 SETTER(string,      upb_string_handlerfunc*,      UPB_HANDLER_STRING)
2321 SETTER(endstr,      upb_endfield_handlerfunc*,    UPB_HANDLER_ENDSTR)
2322 SETTER(startseq,    upb_startfield_handlerfunc*,  UPB_HANDLER_STARTSEQ)
2323 SETTER(startsubmsg, upb_startfield_handlerfunc*,  UPB_HANDLER_STARTSUBMSG)
2324 SETTER(endsubmsg,   upb_endfield_handlerfunc*,    UPB_HANDLER_ENDSUBMSG)
2325 SETTER(endseq,      upb_endfield_handlerfunc*,    UPB_HANDLER_ENDSEQ)
2326 
2327 #undef SETTER
2328 
2329 bool upb_handlers_setstartmsg(upb_handlers *h, upb_startmsg_handlerfunc *func,
2330                               upb_handlerattr *attr) {
2331   return doset(h, UPB_STARTMSG_SELECTOR, NULL, UPB_HANDLER_INT32,
2332                (upb_func *)func, attr);
2333 }
2334 
upb_handlers_setendmsg(upb_handlers * h,upb_endmsg_handlerfunc * func,upb_handlerattr * attr)2335 bool upb_handlers_setendmsg(upb_handlers *h, upb_endmsg_handlerfunc *func,
2336                             upb_handlerattr *attr) {
2337   assert(!upb_handlers_isfrozen(h));
2338   return doset(h, UPB_ENDMSG_SELECTOR, NULL, UPB_HANDLER_INT32,
2339                (upb_func *)func, attr);
2340 }
2341 
upb_handlers_setsubhandlers(upb_handlers * h,const upb_fielddef * f,const upb_handlers * sub)2342 bool upb_handlers_setsubhandlers(upb_handlers *h, const upb_fielddef *f,
2343                                  const upb_handlers *sub) {
2344   assert(sub);
2345   assert(!upb_handlers_isfrozen(h));
2346   assert(upb_fielddef_issubmsg(f));
2347   if (SUBH_F(h, f)) return false;  /* Can't reset. */
2348   if (upb_msgdef_upcast(upb_handlers_msgdef(sub)) != upb_fielddef_subdef(f)) {
2349     return false;
2350   }
2351   SUBH_F(h, f) = sub;
2352   upb_ref2(sub, h);
2353   return true;
2354 }
2355 
upb_handlers_getsubhandlers(const upb_handlers * h,const upb_fielddef * f)2356 const upb_handlers *upb_handlers_getsubhandlers(const upb_handlers *h,
2357                                                 const upb_fielddef *f) {
2358   assert(upb_fielddef_issubmsg(f));
2359   return SUBH_F(h, f);
2360 }
2361 
upb_handlers_getattr(const upb_handlers * h,upb_selector_t sel,upb_handlerattr * attr)2362 bool upb_handlers_getattr(const upb_handlers *h, upb_selector_t sel,
2363                           upb_handlerattr *attr) {
2364   if (!upb_handlers_gethandler(h, sel))
2365     return false;
2366   *attr = h->table[sel].attr;
2367   return true;
2368 }
2369 
upb_handlers_getsubhandlers_sel(const upb_handlers * h,upb_selector_t sel)2370 const upb_handlers *upb_handlers_getsubhandlers_sel(const upb_handlers *h,
2371                                                     upb_selector_t sel) {
2372   /* STARTSUBMSG selector in sel is the field's selector base. */
2373   return SUBH(h, sel - UPB_STATIC_SELECTOR_COUNT);
2374 }
2375 
upb_handlers_msgdef(const upb_handlers * h)2376 const upb_msgdef *upb_handlers_msgdef(const upb_handlers *h) { return h->msg; }
2377 
upb_handlers_addcleanup(upb_handlers * h,void * p,upb_handlerfree * func)2378 bool upb_handlers_addcleanup(upb_handlers *h, void *p, upb_handlerfree *func) {
2379   bool ok;
2380   if (upb_inttable_lookupptr(&h->cleanup_, p, NULL)) {
2381     return false;
2382   }
2383   ok = upb_inttable_insertptr(&h->cleanup_, p, upb_value_fptr(func));
2384   UPB_ASSERT_VAR(ok, ok);
2385   return true;
2386 }
2387 
2388 
2389 /* "Static" methods ***********************************************************/
2390 
upb_handlers_freeze(upb_handlers * const * handlers,int n,upb_status * s)2391 bool upb_handlers_freeze(upb_handlers *const*handlers, int n, upb_status *s) {
2392   /* TODO: verify we have a transitive closure. */
2393   int i;
2394   for (i = 0; i < n; i++) {
2395     upb_msg_field_iter j;
2396     upb_handlers *h = handlers[i];
2397 
2398     if (!upb_ok(&h->status_)) {
2399       upb_status_seterrf(s, "handlers for message %s had error status: %s",
2400                          upb_msgdef_fullname(upb_handlers_msgdef(h)),
2401                          upb_status_errmsg(&h->status_));
2402       return false;
2403     }
2404 
2405     /* Check that there are no closure mismatches due to missing Start* handlers
2406      * or subhandlers with different type-level types. */
2407     for(upb_msg_field_begin(&j, h->msg);
2408         !upb_msg_field_done(&j);
2409         upb_msg_field_next(&j)) {
2410 
2411       const upb_fielddef *f = upb_msg_iter_field(&j);
2412       if (upb_fielddef_isseq(f)) {
2413         if (!checkstart(h, f, UPB_HANDLER_STARTSEQ, s))
2414           return false;
2415       }
2416 
2417       if (upb_fielddef_isstring(f)) {
2418         if (!checkstart(h, f, UPB_HANDLER_STARTSTR, s))
2419           return false;
2420       }
2421 
2422       if (upb_fielddef_issubmsg(f)) {
2423         bool hashandler = false;
2424         if (upb_handlers_gethandler(
2425                 h, handlers_getsel(h, f, UPB_HANDLER_STARTSUBMSG)) ||
2426             upb_handlers_gethandler(
2427                 h, handlers_getsel(h, f, UPB_HANDLER_ENDSUBMSG))) {
2428           hashandler = true;
2429         }
2430 
2431         if (upb_fielddef_isseq(f) &&
2432             (upb_handlers_gethandler(
2433                  h, handlers_getsel(h, f, UPB_HANDLER_STARTSEQ)) ||
2434              upb_handlers_gethandler(
2435                  h, handlers_getsel(h, f, UPB_HANDLER_ENDSEQ)))) {
2436           hashandler = true;
2437         }
2438 
2439         if (hashandler && !upb_handlers_getsubhandlers(h, f)) {
2440           /* For now we add an empty subhandlers in this case.  It makes the
2441            * decoder code generator simpler, because it only has to handle two
2442            * cases (submessage has handlers or not) as opposed to three
2443            * (submessage has handlers in enclosing message but no subhandlers).
2444            *
2445            * This makes parsing less efficient in the case that we want to
2446            * notice a submessage but skip its contents (like if we're testing
2447            * for submessage presence or counting the number of repeated
2448            * submessages).  In this case we will end up parsing the submessage
2449            * field by field and throwing away the results for each, instead of
2450            * skipping the whole delimited thing at once.  If this is an issue we
2451            * can revisit it, but do remember that this only arises when you have
2452            * handlers (startseq/startsubmsg/endsubmsg/endseq) set for the
2453            * submessage but no subhandlers.  The uses cases for this are
2454            * limited. */
2455           upb_handlers *sub = upb_handlers_new(upb_fielddef_msgsubdef(f), &sub);
2456           upb_handlers_setsubhandlers(h, f, sub);
2457           upb_handlers_unref(sub, &sub);
2458         }
2459 
2460         /* TODO(haberman): check type of submessage.
2461          * This is slightly tricky; also consider whether we should check that
2462          * they match at setsubhandlers time. */
2463       }
2464     }
2465   }
2466 
2467   if (!upb_refcounted_freeze((upb_refcounted*const*)handlers, n, s,
2468                              UPB_MAX_HANDLER_DEPTH)) {
2469     return false;
2470   }
2471 
2472   return true;
2473 }
2474 
upb_handlers_getprimitivehandlertype(const upb_fielddef * f)2475 upb_handlertype_t upb_handlers_getprimitivehandlertype(const upb_fielddef *f) {
2476   switch (upb_fielddef_type(f)) {
2477     case UPB_TYPE_INT32:
2478     case UPB_TYPE_ENUM: return UPB_HANDLER_INT32;
2479     case UPB_TYPE_INT64: return UPB_HANDLER_INT64;
2480     case UPB_TYPE_UINT32: return UPB_HANDLER_UINT32;
2481     case UPB_TYPE_UINT64: return UPB_HANDLER_UINT64;
2482     case UPB_TYPE_FLOAT: return UPB_HANDLER_FLOAT;
2483     case UPB_TYPE_DOUBLE: return UPB_HANDLER_DOUBLE;
2484     case UPB_TYPE_BOOL: return UPB_HANDLER_BOOL;
2485     default: assert(false); return -1;  /* Invalid input. */
2486   }
2487 }
2488 
upb_handlers_getselector(const upb_fielddef * f,upb_handlertype_t type,upb_selector_t * s)2489 bool upb_handlers_getselector(const upb_fielddef *f, upb_handlertype_t type,
2490                               upb_selector_t *s) {
2491   switch (type) {
2492     case UPB_HANDLER_INT32:
2493     case UPB_HANDLER_INT64:
2494     case UPB_HANDLER_UINT32:
2495     case UPB_HANDLER_UINT64:
2496     case UPB_HANDLER_FLOAT:
2497     case UPB_HANDLER_DOUBLE:
2498     case UPB_HANDLER_BOOL:
2499       if (!upb_fielddef_isprimitive(f) ||
2500           upb_handlers_getprimitivehandlertype(f) != type)
2501         return false;
2502       *s = f->selector_base;
2503       break;
2504     case UPB_HANDLER_STRING:
2505       if (upb_fielddef_isstring(f)) {
2506         *s = f->selector_base;
2507       } else if (upb_fielddef_lazy(f)) {
2508         *s = f->selector_base + 3;
2509       } else {
2510         return false;
2511       }
2512       break;
2513     case UPB_HANDLER_STARTSTR:
2514       if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) {
2515         *s = f->selector_base + 1;
2516       } else {
2517         return false;
2518       }
2519       break;
2520     case UPB_HANDLER_ENDSTR:
2521       if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) {
2522         *s = f->selector_base + 2;
2523       } else {
2524         return false;
2525       }
2526       break;
2527     case UPB_HANDLER_STARTSEQ:
2528       if (!upb_fielddef_isseq(f)) return false;
2529       *s = f->selector_base - 2;
2530       break;
2531     case UPB_HANDLER_ENDSEQ:
2532       if (!upb_fielddef_isseq(f)) return false;
2533       *s = f->selector_base - 1;
2534       break;
2535     case UPB_HANDLER_STARTSUBMSG:
2536       if (!upb_fielddef_issubmsg(f)) return false;
2537       /* Selectors for STARTSUBMSG are at the beginning of the table so that the
2538        * selector can also be used as an index into the "sub" array of
2539        * subhandlers.  The indexes for the two into these two tables are the
2540        * same, except that in the handler table the static selectors come first. */
2541       *s = f->index_ + UPB_STATIC_SELECTOR_COUNT;
2542       break;
2543     case UPB_HANDLER_ENDSUBMSG:
2544       if (!upb_fielddef_issubmsg(f)) return false;
2545       *s = f->selector_base;
2546       break;
2547   }
2548   assert((size_t)*s < upb_fielddef_containingtype(f)->selector_count);
2549   return true;
2550 }
2551 
upb_handlers_selectorbaseoffset(const upb_fielddef * f)2552 uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f) {
2553   return upb_fielddef_isseq(f) ? 2 : 0;
2554 }
2555 
upb_handlers_selectorcount(const upb_fielddef * f)2556 uint32_t upb_handlers_selectorcount(const upb_fielddef *f) {
2557   uint32_t ret = 1;
2558   if (upb_fielddef_isseq(f)) ret += 2;    /* STARTSEQ/ENDSEQ */
2559   if (upb_fielddef_isstring(f)) ret += 2; /* [STRING]/STARTSTR/ENDSTR */
2560   if (upb_fielddef_issubmsg(f)) {
2561     /* ENDSUBMSG (STARTSUBMSG is at table beginning) */
2562     ret += 0;
2563     if (upb_fielddef_lazy(f)) {
2564       /* STARTSTR/ENDSTR/STRING (for lazy) */
2565       ret += 3;
2566     }
2567   }
2568   return ret;
2569 }
2570 
2571 
2572 /* upb_handlerattr ************************************************************/
2573 
upb_handlerattr_init(upb_handlerattr * attr)2574 void upb_handlerattr_init(upb_handlerattr *attr) {
2575   upb_handlerattr from = UPB_HANDLERATTR_INITIALIZER;
2576   memcpy(attr, &from, sizeof(*attr));
2577 }
2578 
upb_handlerattr_uninit(upb_handlerattr * attr)2579 void upb_handlerattr_uninit(upb_handlerattr *attr) {
2580   UPB_UNUSED(attr);
2581 }
2582 
upb_handlerattr_sethandlerdata(upb_handlerattr * attr,const void * hd)2583 bool upb_handlerattr_sethandlerdata(upb_handlerattr *attr, const void *hd) {
2584   attr->handler_data_ = hd;
2585   return true;
2586 }
2587 
upb_handlerattr_setclosuretype(upb_handlerattr * attr,const void * type)2588 bool upb_handlerattr_setclosuretype(upb_handlerattr *attr, const void *type) {
2589   attr->closure_type_ = type;
2590   return true;
2591 }
2592 
upb_handlerattr_closuretype(const upb_handlerattr * attr)2593 const void *upb_handlerattr_closuretype(const upb_handlerattr *attr) {
2594   return attr->closure_type_;
2595 }
2596 
upb_handlerattr_setreturnclosuretype(upb_handlerattr * attr,const void * type)2597 bool upb_handlerattr_setreturnclosuretype(upb_handlerattr *attr,
2598                                           const void *type) {
2599   attr->return_closure_type_ = type;
2600   return true;
2601 }
2602 
upb_handlerattr_returnclosuretype(const upb_handlerattr * attr)2603 const void *upb_handlerattr_returnclosuretype(const upb_handlerattr *attr) {
2604   return attr->return_closure_type_;
2605 }
2606 
upb_handlerattr_setalwaysok(upb_handlerattr * attr,bool alwaysok)2607 bool upb_handlerattr_setalwaysok(upb_handlerattr *attr, bool alwaysok) {
2608   attr->alwaysok_ = alwaysok;
2609   return true;
2610 }
2611 
upb_handlerattr_alwaysok(const upb_handlerattr * attr)2612 bool upb_handlerattr_alwaysok(const upb_handlerattr *attr) {
2613   return attr->alwaysok_;
2614 }
2615 
2616 /* upb_bufhandle **************************************************************/
2617 
upb_bufhandle_objofs(const upb_bufhandle * h)2618 size_t upb_bufhandle_objofs(const upb_bufhandle *h) {
2619   return h->objofs_;
2620 }
2621 
2622 /* upb_byteshandler ***********************************************************/
2623 
upb_byteshandler_init(upb_byteshandler * h)2624 void upb_byteshandler_init(upb_byteshandler* h) {
2625   memset(h, 0, sizeof(*h));
2626 }
2627 
2628 /* For when we support handlerfree callbacks. */
upb_byteshandler_uninit(upb_byteshandler * h)2629 void upb_byteshandler_uninit(upb_byteshandler* h) {
2630   UPB_UNUSED(h);
2631 }
2632 
upb_byteshandler_setstartstr(upb_byteshandler * h,upb_startstr_handlerfunc * func,void * d)2633 bool upb_byteshandler_setstartstr(upb_byteshandler *h,
2634                                   upb_startstr_handlerfunc *func, void *d) {
2635   h->table[UPB_STARTSTR_SELECTOR].func = (upb_func*)func;
2636   h->table[UPB_STARTSTR_SELECTOR].attr.handler_data_ = d;
2637   return true;
2638 }
2639 
upb_byteshandler_setstring(upb_byteshandler * h,upb_string_handlerfunc * func,void * d)2640 bool upb_byteshandler_setstring(upb_byteshandler *h,
2641                                 upb_string_handlerfunc *func, void *d) {
2642   h->table[UPB_STRING_SELECTOR].func = (upb_func*)func;
2643   h->table[UPB_STRING_SELECTOR].attr.handler_data_ = d;
2644   return true;
2645 }
2646 
upb_byteshandler_setendstr(upb_byteshandler * h,upb_endfield_handlerfunc * func,void * d)2647 bool upb_byteshandler_setendstr(upb_byteshandler *h,
2648                                 upb_endfield_handlerfunc *func, void *d) {
2649   h->table[UPB_ENDSTR_SELECTOR].func = (upb_func*)func;
2650   h->table[UPB_ENDSTR_SELECTOR].attr.handler_data_ = d;
2651   return true;
2652 }
2653 /*
2654 ** upb::RefCounted Implementation
2655 **
2656 ** Our key invariants are:
2657 ** 1. reference cycles never span groups
2658 ** 2. for ref2(to, from), we increment to's count iff group(from) != group(to)
2659 **
2660 ** The previous two are how we avoid leaking cycles.  Other important
2661 ** invariants are:
2662 ** 3. for mutable objects "from" and "to", if there exists a ref2(to, from)
2663 **    this implies group(from) == group(to).  (In practice, what we implement
2664 **    is even stronger; "from" and "to" will share a group if there has *ever*
2665 **    been a ref2(to, from), but all that is necessary for correctness is the
2666 **    weaker one).
2667 ** 4. mutable and immutable objects are never in the same group.
2668 */
2669 
2670 
2671 #include <setjmp.h>
2672 #include <stdlib.h>
2673 
2674 static void freeobj(upb_refcounted *o);
2675 
2676 const char untracked_val;
2677 const void *UPB_UNTRACKED_REF = &untracked_val;
2678 
2679 /* arch-specific atomic primitives  *******************************************/
2680 
2681 #ifdef UPB_THREAD_UNSAFE /*---------------------------------------------------*/
2682 
atomic_inc(uint32_t * a)2683 static void atomic_inc(uint32_t *a) { (*a)++; }
atomic_dec(uint32_t * a)2684 static bool atomic_dec(uint32_t *a) { return --(*a) == 0; }
2685 
2686 #elif defined(__GNUC__) || defined(__clang__) /*------------------------------*/
2687 
atomic_inc(uint32_t * a)2688 static void atomic_inc(uint32_t *a) { __sync_fetch_and_add(a, 1); }
atomic_dec(uint32_t * a)2689 static bool atomic_dec(uint32_t *a) { return __sync_sub_and_fetch(a, 1) == 0; }
2690 
2691 #elif defined(WIN32) /*-------------------------------------------------------*/
2692 
2693 #include <Windows.h>
2694 
atomic_inc(upb_atomic_t * a)2695 static void atomic_inc(upb_atomic_t *a) { InterlockedIncrement(&a->val); }
atomic_dec(upb_atomic_t * a)2696 static bool atomic_dec(upb_atomic_t *a) {
2697   return InterlockedDecrement(&a->val) == 0;
2698 }
2699 
2700 #else
2701 #error Atomic primitives not defined for your platform/CPU.  \
2702        Implement them or compile with UPB_THREAD_UNSAFE.
2703 #endif
2704 
2705 /* All static objects point to this refcount.
2706  * It is special-cased in ref/unref below.  */
2707 uint32_t static_refcount = -1;
2708 
2709 /* We can avoid atomic ops for statically-declared objects.
2710  * This is a minor optimization but nice since we can avoid degrading under
2711  * contention in this case. */
2712 
refgroup(uint32_t * group)2713 static void refgroup(uint32_t *group) {
2714   if (group != &static_refcount)
2715     atomic_inc(group);
2716 }
2717 
unrefgroup(uint32_t * group)2718 static bool unrefgroup(uint32_t *group) {
2719   if (group == &static_refcount) {
2720     return false;
2721   } else {
2722     return atomic_dec(group);
2723   }
2724 }
2725 
2726 
2727 /* Reference tracking (debug only) ********************************************/
2728 
2729 #ifdef UPB_DEBUG_REFS
2730 
2731 #ifdef UPB_THREAD_UNSAFE
2732 
upb_lock()2733 static void upb_lock() {}
upb_unlock()2734 static void upb_unlock() {}
2735 
2736 #else
2737 
2738 /* User must define functions that lock/unlock a global mutex and link this
2739  * file against them. */
2740 void upb_lock();
2741 void upb_unlock();
2742 
2743 #endif
2744 
2745 /* UPB_DEBUG_REFS mode counts on being able to malloc() memory in some
2746  * code-paths that can normally never fail, like upb_refcounted_ref().  Since
2747  * we have no way to propagage out-of-memory errors back to the user, and since
2748  * these errors can only occur in UPB_DEBUG_REFS mode, we immediately fail. */
2749 #define CHECK_OOM(predicate) if (!(predicate)) { assert(predicate); exit(1); }
2750 
2751 typedef struct {
2752   int count;  /* How many refs there are (duplicates only allowed for ref2). */
2753   bool is_ref2;
2754 } trackedref;
2755 
trackedref_new(bool is_ref2)2756 static trackedref *trackedref_new(bool is_ref2) {
2757   trackedref *ret = malloc(sizeof(*ret));
2758   CHECK_OOM(ret);
2759   ret->count = 1;
2760   ret->is_ref2 = is_ref2;
2761   return ret;
2762 }
2763 
track(const upb_refcounted * r,const void * owner,bool ref2)2764 static void track(const upb_refcounted *r, const void *owner, bool ref2) {
2765   upb_value v;
2766 
2767   assert(owner);
2768   if (owner == UPB_UNTRACKED_REF) return;
2769 
2770   upb_lock();
2771   if (upb_inttable_lookupptr(r->refs, owner, &v)) {
2772     trackedref *ref = upb_value_getptr(v);
2773     /* Since we allow multiple ref2's for the same to/from pair without
2774      * allocating separate memory for each one, we lose the fine-grained
2775      * tracking behavior we get with regular refs.  Since ref2s only happen
2776      * inside upb, we'll accept this limitation until/unless there is a really
2777      * difficult upb-internal bug that can't be figured out without it. */
2778     assert(ref2);
2779     assert(ref->is_ref2);
2780     ref->count++;
2781   } else {
2782     trackedref *ref = trackedref_new(ref2);
2783     bool ok = upb_inttable_insertptr(r->refs, owner, upb_value_ptr(ref));
2784     CHECK_OOM(ok);
2785     if (ref2) {
2786       /* We know this cast is safe when it is a ref2, because it's coming from
2787        * another refcounted object. */
2788       const upb_refcounted *from = owner;
2789       assert(!upb_inttable_lookupptr(from->ref2s, r, NULL));
2790       ok = upb_inttable_insertptr(from->ref2s, r, upb_value_ptr(NULL));
2791       CHECK_OOM(ok);
2792     }
2793   }
2794   upb_unlock();
2795 }
2796 
untrack(const upb_refcounted * r,const void * owner,bool ref2)2797 static void untrack(const upb_refcounted *r, const void *owner, bool ref2) {
2798   upb_value v;
2799   bool found;
2800   trackedref *ref;
2801 
2802   assert(owner);
2803   if (owner == UPB_UNTRACKED_REF) return;
2804 
2805   upb_lock();
2806   found = upb_inttable_lookupptr(r->refs, owner, &v);
2807   /* This assert will fail if an owner attempts to release a ref it didn't have. */
2808   UPB_ASSERT_VAR(found, found);
2809   ref = upb_value_getptr(v);
2810   assert(ref->is_ref2 == ref2);
2811   if (--ref->count == 0) {
2812     free(ref);
2813     upb_inttable_removeptr(r->refs, owner, NULL);
2814     if (ref2) {
2815       /* We know this cast is safe when it is a ref2, because it's coming from
2816        * another refcounted object. */
2817       const upb_refcounted *from = owner;
2818       bool removed = upb_inttable_removeptr(from->ref2s, r, NULL);
2819       assert(removed);
2820     }
2821   }
2822   upb_unlock();
2823 }
2824 
checkref(const upb_refcounted * r,const void * owner,bool ref2)2825 static void checkref(const upb_refcounted *r, const void *owner, bool ref2) {
2826   upb_value v;
2827   bool found;
2828   trackedref *ref;
2829 
2830   upb_lock();
2831   found = upb_inttable_lookupptr(r->refs, owner, &v);
2832   UPB_ASSERT_VAR(found, found);
2833   ref = upb_value_getptr(v);
2834   assert(ref->is_ref2 == ref2);
2835   upb_unlock();
2836 }
2837 
2838 /* Populates the given UPB_CTYPE_INT32 inttable with counts of ref2's that
2839  * originate from the given owner. */
getref2s(const upb_refcounted * owner,upb_inttable * tab)2840 static void getref2s(const upb_refcounted *owner, upb_inttable *tab) {
2841   upb_inttable_iter i;
2842 
2843   upb_lock();
2844   upb_inttable_begin(&i, owner->ref2s);
2845   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
2846     upb_value v;
2847     upb_value count;
2848     trackedref *ref;
2849     bool ok;
2850     bool found;
2851 
2852     upb_refcounted *to = (upb_refcounted*)upb_inttable_iter_key(&i);
2853 
2854     /* To get the count we need to look in the target's table. */
2855     found = upb_inttable_lookupptr(to->refs, owner, &v);
2856     assert(found);
2857     ref = upb_value_getptr(v);
2858     count = upb_value_int32(ref->count);
2859 
2860     ok = upb_inttable_insertptr(tab, to, count);
2861     CHECK_OOM(ok);
2862   }
2863   upb_unlock();
2864 }
2865 
2866 typedef struct {
2867   upb_inttable ref2;
2868   const upb_refcounted *obj;
2869 } check_state;
2870 
visit_check(const upb_refcounted * obj,const upb_refcounted * subobj,void * closure)2871 static void visit_check(const upb_refcounted *obj, const upb_refcounted *subobj,
2872                         void *closure) {
2873   check_state *s = closure;
2874   upb_inttable *ref2 = &s->ref2;
2875   upb_value v;
2876   bool removed;
2877   int32_t newcount;
2878 
2879   assert(obj == s->obj);
2880   assert(subobj);
2881   removed = upb_inttable_removeptr(ref2, subobj, &v);
2882   /* The following assertion will fail if the visit() function visits a subobj
2883    * that it did not have a ref2 on, or visits the same subobj too many times. */
2884   assert(removed);
2885   newcount = upb_value_getint32(v) - 1;
2886   if (newcount > 0) {
2887     upb_inttable_insert(ref2, (uintptr_t)subobj, upb_value_int32(newcount));
2888   }
2889 }
2890 
visit(const upb_refcounted * r,upb_refcounted_visit * v,void * closure)2891 static void visit(const upb_refcounted *r, upb_refcounted_visit *v,
2892                   void *closure) {
2893   bool ok;
2894 
2895   /* In DEBUG_REFS mode we know what existing ref2 refs there are, so we know
2896    * exactly the set of nodes that visit() should visit.  So we verify visit()'s
2897    * correctness here. */
2898   check_state state;
2899   state.obj = r;
2900   ok = upb_inttable_init(&state.ref2, UPB_CTYPE_INT32);
2901   CHECK_OOM(ok);
2902   getref2s(r, &state.ref2);
2903 
2904   /* This should visit any children in the ref2 table. */
2905   if (r->vtbl->visit) r->vtbl->visit(r, visit_check, &state);
2906 
2907   /* This assertion will fail if the visit() function missed any children. */
2908   assert(upb_inttable_count(&state.ref2) == 0);
2909   upb_inttable_uninit(&state.ref2);
2910   if (r->vtbl->visit) r->vtbl->visit(r, v, closure);
2911 }
2912 
trackinit(upb_refcounted * r)2913 static bool trackinit(upb_refcounted *r) {
2914   r->refs = malloc(sizeof(*r->refs));
2915   r->ref2s = malloc(sizeof(*r->ref2s));
2916   if (!r->refs || !r->ref2s) goto err1;
2917 
2918   if (!upb_inttable_init(r->refs, UPB_CTYPE_PTR)) goto err1;
2919   if (!upb_inttable_init(r->ref2s, UPB_CTYPE_PTR)) goto err2;
2920   return true;
2921 
2922 err2:
2923   upb_inttable_uninit(r->refs);
2924 err1:
2925   free(r->refs);
2926   free(r->ref2s);
2927   return false;
2928 }
2929 
trackfree(const upb_refcounted * r)2930 static void trackfree(const upb_refcounted *r) {
2931   upb_inttable_uninit(r->refs);
2932   upb_inttable_uninit(r->ref2s);
2933   free(r->refs);
2934   free(r->ref2s);
2935 }
2936 
2937 #else
2938 
track(const upb_refcounted * r,const void * owner,bool ref2)2939 static void track(const upb_refcounted *r, const void *owner, bool ref2) {
2940   UPB_UNUSED(r);
2941   UPB_UNUSED(owner);
2942   UPB_UNUSED(ref2);
2943 }
2944 
untrack(const upb_refcounted * r,const void * owner,bool ref2)2945 static void untrack(const upb_refcounted *r, const void *owner, bool ref2) {
2946   UPB_UNUSED(r);
2947   UPB_UNUSED(owner);
2948   UPB_UNUSED(ref2);
2949 }
2950 
checkref(const upb_refcounted * r,const void * owner,bool ref2)2951 static void checkref(const upb_refcounted *r, const void *owner, bool ref2) {
2952   UPB_UNUSED(r);
2953   UPB_UNUSED(owner);
2954   UPB_UNUSED(ref2);
2955 }
2956 
trackinit(upb_refcounted * r)2957 static bool trackinit(upb_refcounted *r) {
2958   UPB_UNUSED(r);
2959   return true;
2960 }
2961 
trackfree(const upb_refcounted * r)2962 static void trackfree(const upb_refcounted *r) {
2963   UPB_UNUSED(r);
2964 }
2965 
visit(const upb_refcounted * r,upb_refcounted_visit * v,void * closure)2966 static void visit(const upb_refcounted *r, upb_refcounted_visit *v,
2967                   void *closure) {
2968   if (r->vtbl->visit) r->vtbl->visit(r, v, closure);
2969 }
2970 
2971 #endif  /* UPB_DEBUG_REFS */
2972 
2973 
2974 /* freeze() *******************************************************************/
2975 
2976 /* The freeze() operation is by far the most complicated part of this scheme.
2977  * We compute strongly-connected components and then mutate the graph such that
2978  * we preserve the invariants documented at the top of this file.  And we must
2979  * handle out-of-memory errors gracefully (without leaving the graph
2980  * inconsistent), which adds to the fun. */
2981 
2982 /* The state used by the freeze operation (shared across many functions). */
2983 typedef struct {
2984   int depth;
2985   int maxdepth;
2986   uint64_t index;
2987   /* Maps upb_refcounted* -> attributes (color, etc).  attr layout varies by
2988    * color. */
2989   upb_inttable objattr;
2990   upb_inttable stack;   /* stack of upb_refcounted* for Tarjan's algorithm. */
2991   upb_inttable groups;  /* array of uint32_t*, malloc'd refcounts for new groups */
2992   upb_status *status;
2993   jmp_buf err;
2994 } tarjan;
2995 
2996 static void release_ref2(const upb_refcounted *obj,
2997                          const upb_refcounted *subobj,
2998                          void *closure);
2999 
3000 /* Node attributes -----------------------------------------------------------*/
3001 
3002 /* After our analysis phase all nodes will be either GRAY or WHITE. */
3003 
3004 typedef enum {
3005   BLACK = 0,  /* Object has not been seen. */
3006   GRAY,   /* Object has been found via a refgroup but may not be reachable. */
3007   GREEN,  /* Object is reachable and is currently on the Tarjan stack. */
3008   WHITE   /* Object is reachable and has been assigned a group (SCC). */
3009 } color_t;
3010 
err(tarjan * t)3011 UPB_NORETURN static void err(tarjan *t) { longjmp(t->err, 1); }
oom(tarjan * t)3012 UPB_NORETURN static void oom(tarjan *t) {
3013   upb_status_seterrmsg(t->status, "out of memory");
3014   err(t);
3015 }
3016 
trygetattr(const tarjan * t,const upb_refcounted * r)3017 static uint64_t trygetattr(const tarjan *t, const upb_refcounted *r) {
3018   upb_value v;
3019   return upb_inttable_lookupptr(&t->objattr, r, &v) ?
3020       upb_value_getuint64(v) : 0;
3021 }
3022 
getattr(const tarjan * t,const upb_refcounted * r)3023 static uint64_t getattr(const tarjan *t, const upb_refcounted *r) {
3024   upb_value v;
3025   bool found = upb_inttable_lookupptr(&t->objattr, r, &v);
3026   UPB_ASSERT_VAR(found, found);
3027   return upb_value_getuint64(v);
3028 }
3029 
setattr(tarjan * t,const upb_refcounted * r,uint64_t attr)3030 static void setattr(tarjan *t, const upb_refcounted *r, uint64_t attr) {
3031   upb_inttable_removeptr(&t->objattr, r, NULL);
3032   upb_inttable_insertptr(&t->objattr, r, upb_value_uint64(attr));
3033 }
3034 
color(tarjan * t,const upb_refcounted * r)3035 static color_t color(tarjan *t, const upb_refcounted *r) {
3036   return trygetattr(t, r) & 0x3;  /* Color is always stored in the low 2 bits. */
3037 }
3038 
set_gray(tarjan * t,const upb_refcounted * r)3039 static void set_gray(tarjan *t, const upb_refcounted *r) {
3040   assert(color(t, r) == BLACK);
3041   setattr(t, r, GRAY);
3042 }
3043 
3044 /* Pushes an obj onto the Tarjan stack and sets it to GREEN. */
push(tarjan * t,const upb_refcounted * r)3045 static void push(tarjan *t, const upb_refcounted *r) {
3046   assert(color(t, r) == BLACK || color(t, r) == GRAY);
3047   /* This defines the attr layout for the GREEN state.  "index" and "lowlink"
3048    * get 31 bits, which is plenty (limit of 2B objects frozen at a time). */
3049   setattr(t, r, GREEN | (t->index << 2) | (t->index << 33));
3050   if (++t->index == 0x80000000) {
3051     upb_status_seterrmsg(t->status, "too many objects to freeze");
3052     err(t);
3053   }
3054   upb_inttable_push(&t->stack, upb_value_ptr((void*)r));
3055 }
3056 
3057 /* Pops an obj from the Tarjan stack and sets it to WHITE, with a ptr to its
3058  * SCC group. */
pop(tarjan * t)3059 static upb_refcounted *pop(tarjan *t) {
3060   upb_refcounted *r = upb_value_getptr(upb_inttable_pop(&t->stack));
3061   assert(color(t, r) == GREEN);
3062   /* This defines the attr layout for nodes in the WHITE state.
3063    * Top of group stack is [group, NULL]; we point at group. */
3064   setattr(t, r, WHITE | (upb_inttable_count(&t->groups) - 2) << 8);
3065   return r;
3066 }
3067 
tarjan_newgroup(tarjan * t)3068 static void tarjan_newgroup(tarjan *t) {
3069   uint32_t *group = malloc(sizeof(*group));
3070   if (!group) oom(t);
3071   /* Push group and empty group leader (we'll fill in leader later). */
3072   if (!upb_inttable_push(&t->groups, upb_value_ptr(group)) ||
3073       !upb_inttable_push(&t->groups, upb_value_ptr(NULL))) {
3074     free(group);
3075     oom(t);
3076   }
3077   *group = 0;
3078 }
3079 
idx(tarjan * t,const upb_refcounted * r)3080 static uint32_t idx(tarjan *t, const upb_refcounted *r) {
3081   assert(color(t, r) == GREEN);
3082   return (getattr(t, r) >> 2) & 0x7FFFFFFF;
3083 }
3084 
lowlink(tarjan * t,const upb_refcounted * r)3085 static uint32_t lowlink(tarjan *t, const upb_refcounted *r) {
3086   if (color(t, r) == GREEN) {
3087     return getattr(t, r) >> 33;
3088   } else {
3089     return UINT32_MAX;
3090   }
3091 }
3092 
set_lowlink(tarjan * t,const upb_refcounted * r,uint32_t lowlink)3093 static void set_lowlink(tarjan *t, const upb_refcounted *r, uint32_t lowlink) {
3094   assert(color(t, r) == GREEN);
3095   setattr(t, r, ((uint64_t)lowlink << 33) | (getattr(t, r) & 0x1FFFFFFFF));
3096 }
3097 
group(tarjan * t,upb_refcounted * r)3098 static uint32_t *group(tarjan *t, upb_refcounted *r) {
3099   uint64_t groupnum;
3100   upb_value v;
3101   bool found;
3102 
3103   assert(color(t, r) == WHITE);
3104   groupnum = getattr(t, r) >> 8;
3105   found = upb_inttable_lookup(&t->groups, groupnum, &v);
3106   UPB_ASSERT_VAR(found, found);
3107   return upb_value_getptr(v);
3108 }
3109 
3110 /* If the group leader for this object's group has not previously been set,
3111  * the given object is assigned to be its leader. */
groupleader(tarjan * t,upb_refcounted * r)3112 static upb_refcounted *groupleader(tarjan *t, upb_refcounted *r) {
3113   uint64_t leader_slot;
3114   upb_value v;
3115   bool found;
3116 
3117   assert(color(t, r) == WHITE);
3118   leader_slot = (getattr(t, r) >> 8) + 1;
3119   found = upb_inttable_lookup(&t->groups, leader_slot, &v);
3120   UPB_ASSERT_VAR(found, found);
3121   if (upb_value_getptr(v)) {
3122     return upb_value_getptr(v);
3123   } else {
3124     upb_inttable_remove(&t->groups, leader_slot, NULL);
3125     upb_inttable_insert(&t->groups, leader_slot, upb_value_ptr(r));
3126     return r;
3127   }
3128 }
3129 
3130 
3131 /* Tarjan's algorithm --------------------------------------------------------*/
3132 
3133 /* See:
3134  *   http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm */
3135 static void do_tarjan(const upb_refcounted *obj, tarjan *t);
3136 
tarjan_visit(const upb_refcounted * obj,const upb_refcounted * subobj,void * closure)3137 static void tarjan_visit(const upb_refcounted *obj,
3138                          const upb_refcounted *subobj,
3139                          void *closure) {
3140   tarjan *t = closure;
3141   if (++t->depth > t->maxdepth) {
3142     upb_status_seterrf(t->status, "graph too deep to freeze (%d)", t->maxdepth);
3143     err(t);
3144   } else if (subobj->is_frozen || color(t, subobj) == WHITE) {
3145     /* Do nothing: we don't want to visit or color already-frozen nodes,
3146      * and WHITE nodes have already been assigned a SCC. */
3147   } else if (color(t, subobj) < GREEN) {
3148     /* Subdef has not yet been visited; recurse on it. */
3149     do_tarjan(subobj, t);
3150     set_lowlink(t, obj, UPB_MIN(lowlink(t, obj), lowlink(t, subobj)));
3151   } else if (color(t, subobj) == GREEN) {
3152     /* Subdef is in the stack and hence in the current SCC. */
3153     set_lowlink(t, obj, UPB_MIN(lowlink(t, obj), idx(t, subobj)));
3154   }
3155   --t->depth;
3156 }
3157 
do_tarjan(const upb_refcounted * obj,tarjan * t)3158 static void do_tarjan(const upb_refcounted *obj, tarjan *t) {
3159   if (color(t, obj) == BLACK) {
3160     /* We haven't seen this object's group; mark the whole group GRAY. */
3161     const upb_refcounted *o = obj;
3162     do { set_gray(t, o); } while ((o = o->next) != obj);
3163   }
3164 
3165   push(t, obj);
3166   visit(obj, tarjan_visit, t);
3167   if (lowlink(t, obj) == idx(t, obj)) {
3168     tarjan_newgroup(t);
3169     while (pop(t) != obj)
3170       ;
3171   }
3172 }
3173 
3174 
3175 /* freeze() ------------------------------------------------------------------*/
3176 
crossref(const upb_refcounted * r,const upb_refcounted * subobj,void * _t)3177 static void crossref(const upb_refcounted *r, const upb_refcounted *subobj,
3178                      void *_t) {
3179   tarjan *t = _t;
3180   assert(color(t, r) > BLACK);
3181   if (color(t, subobj) > BLACK && r->group != subobj->group) {
3182     /* Previously this ref was not reflected in subobj->group because they
3183      * were in the same group; now that they are split a ref must be taken. */
3184     refgroup(subobj->group);
3185   }
3186 }
3187 
freeze(upb_refcounted * const * roots,int n,upb_status * s,int maxdepth)3188 static bool freeze(upb_refcounted *const*roots, int n, upb_status *s,
3189                    int maxdepth) {
3190   volatile bool ret = false;
3191   int i;
3192   upb_inttable_iter iter;
3193 
3194   /* We run in two passes so that we can allocate all memory before performing
3195    * any mutation of the input -- this allows us to leave the input unchanged
3196    * in the case of memory allocation failure. */
3197   tarjan t;
3198   t.index = 0;
3199   t.depth = 0;
3200   t.maxdepth = maxdepth;
3201   t.status = s;
3202   if (!upb_inttable_init(&t.objattr, UPB_CTYPE_UINT64)) goto err1;
3203   if (!upb_inttable_init(&t.stack, UPB_CTYPE_PTR)) goto err2;
3204   if (!upb_inttable_init(&t.groups, UPB_CTYPE_PTR)) goto err3;
3205   if (setjmp(t.err) != 0) goto err4;
3206 
3207 
3208   for (i = 0; i < n; i++) {
3209     if (color(&t, roots[i]) < GREEN) {
3210       do_tarjan(roots[i], &t);
3211     }
3212   }
3213 
3214   /* If we've made it this far, no further errors are possible so it's safe to
3215    * mutate the objects without risk of leaving them in an inconsistent state. */
3216   ret = true;
3217 
3218   /* The transformation that follows requires care.  The preconditions are:
3219    * - all objects in attr map are WHITE or GRAY, and are in mutable groups
3220    *   (groups of all mutable objs)
3221    * - no ref2(to, from) refs have incremented count(to) if both "to" and
3222    *   "from" are in our attr map (this follows from invariants (2) and (3)) */
3223 
3224   /* Pass 1: we remove WHITE objects from their mutable groups, and add them to
3225    * new groups  according to the SCC's we computed.  These new groups will
3226    * consist of only frozen objects.  None will be immediately collectible,
3227    * because WHITE objects are by definition reachable from one of "roots",
3228    * which the caller must own refs on. */
3229   upb_inttable_begin(&iter, &t.objattr);
3230   for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) {
3231     upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter);
3232     /* Since removal from a singly-linked list requires access to the object's
3233      * predecessor, we consider obj->next instead of obj for moving.  With the
3234      * while() loop we guarantee that we will visit every node's predecessor.
3235      * Proof:
3236      *  1. every node's predecessor is in our attr map.
3237      *  2. though the loop body may change a node's predecessor, it will only
3238      *     change it to be the node we are currently operating on, so with a
3239      *     while() loop we guarantee ourselves the chance to remove each node. */
3240     while (color(&t, obj->next) == WHITE &&
3241            group(&t, obj->next) != obj->next->group) {
3242       upb_refcounted *leader;
3243 
3244       /* Remove from old group. */
3245       upb_refcounted *move = obj->next;
3246       if (obj == move) {
3247         /* Removing the last object from a group. */
3248         assert(*obj->group == obj->individual_count);
3249         free(obj->group);
3250       } else {
3251         obj->next = move->next;
3252         /* This may decrease to zero; we'll collect GRAY objects (if any) that
3253          * remain in the group in the third pass. */
3254         assert(*move->group >= move->individual_count);
3255         *move->group -= move->individual_count;
3256       }
3257 
3258       /* Add to new group. */
3259       leader = groupleader(&t, move);
3260       if (move == leader) {
3261         /* First object added to new group is its leader. */
3262         move->group = group(&t, move);
3263         move->next = move;
3264         *move->group = move->individual_count;
3265       } else {
3266         /* Group already has at least one object in it. */
3267         assert(leader->group == group(&t, move));
3268         move->group = group(&t, move);
3269         move->next = leader->next;
3270         leader->next = move;
3271         *move->group += move->individual_count;
3272       }
3273 
3274       move->is_frozen = true;
3275     }
3276   }
3277 
3278   /* Pass 2: GRAY and WHITE objects "obj" with ref2(to, obj) references must
3279    * increment count(to) if group(obj) != group(to) (which could now be the
3280    * case if "to" was just frozen). */
3281   upb_inttable_begin(&iter, &t.objattr);
3282   for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) {
3283     upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter);
3284     visit(obj, crossref, &t);
3285   }
3286 
3287   /* Pass 3: GRAY objects are collected if their group's refcount dropped to
3288    * zero when we removed its white nodes.  This can happen if they had only
3289    * been kept alive by virtue of sharing a group with an object that was just
3290    * frozen.
3291    *
3292    * It is important that we do this last, since the GRAY object's free()
3293    * function could call unref2() on just-frozen objects, which will decrement
3294    * refs that were added in pass 2. */
3295   upb_inttable_begin(&iter, &t.objattr);
3296   for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) {
3297     upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter);
3298     if (obj->group == NULL || *obj->group == 0) {
3299       if (obj->group) {
3300         upb_refcounted *o;
3301 
3302         /* We eagerly free() the group's count (since we can't easily determine
3303          * the group's remaining size it's the easiest way to ensure it gets
3304          * done). */
3305         free(obj->group);
3306 
3307         /* Visit to release ref2's (done in a separate pass since release_ref2
3308          * depends on o->group being unmodified so it can test merged()). */
3309         o = obj;
3310         do { visit(o, release_ref2, NULL); } while ((o = o->next) != obj);
3311 
3312         /* Mark "group" fields as NULL so we know to free the objects later in
3313          * this loop, but also don't try to delete the group twice. */
3314         o = obj;
3315         do { o->group = NULL; } while ((o = o->next) != obj);
3316       }
3317       freeobj(obj);
3318     }
3319   }
3320 
3321 err4:
3322   if (!ret) {
3323     upb_inttable_begin(&iter, &t.groups);
3324     for(; !upb_inttable_done(&iter); upb_inttable_next(&iter))
3325       free(upb_value_getptr(upb_inttable_iter_value(&iter)));
3326   }
3327   upb_inttable_uninit(&t.groups);
3328 err3:
3329   upb_inttable_uninit(&t.stack);
3330 err2:
3331   upb_inttable_uninit(&t.objattr);
3332 err1:
3333   return ret;
3334 }
3335 
3336 
3337 /* Misc internal functions  ***************************************************/
3338 
merged(const upb_refcounted * r,const upb_refcounted * r2)3339 static bool merged(const upb_refcounted *r, const upb_refcounted *r2) {
3340   return r->group == r2->group;
3341 }
3342 
merge(upb_refcounted * r,upb_refcounted * from)3343 static void merge(upb_refcounted *r, upb_refcounted *from) {
3344   upb_refcounted *base;
3345   upb_refcounted *tmp;
3346 
3347   if (merged(r, from)) return;
3348   *r->group += *from->group;
3349   free(from->group);
3350   base = from;
3351 
3352   /* Set all refcount pointers in the "from" chain to the merged refcount.
3353    *
3354    * TODO(haberman): this linear algorithm can result in an overall O(n^2) bound
3355    * if the user continuously extends a group by one object.  Prevent this by
3356    * using one of the techniques in this paper:
3357    *     ftp://www.ncedc.org/outgoing/geomorph/dino/orals/p245-tarjan.pdf */
3358   do { from->group = r->group; } while ((from = from->next) != base);
3359 
3360   /* Merge the two circularly linked lists by swapping their next pointers. */
3361   tmp = r->next;
3362   r->next = base->next;
3363   base->next = tmp;
3364 }
3365 
3366 static void unref(const upb_refcounted *r);
3367 
release_ref2(const upb_refcounted * obj,const upb_refcounted * subobj,void * closure)3368 static void release_ref2(const upb_refcounted *obj,
3369                          const upb_refcounted *subobj,
3370                          void *closure) {
3371   UPB_UNUSED(closure);
3372   untrack(subobj, obj, true);
3373   if (!merged(obj, subobj)) {
3374     assert(subobj->is_frozen);
3375     unref(subobj);
3376   }
3377 }
3378 
unref(const upb_refcounted * r)3379 static void unref(const upb_refcounted *r) {
3380   if (unrefgroup(r->group)) {
3381     const upb_refcounted *o;
3382 
3383     free(r->group);
3384 
3385     /* In two passes, since release_ref2 needs a guarantee that any subobjs
3386      * are alive. */
3387     o = r;
3388     do { visit(o, release_ref2, NULL); } while((o = o->next) != r);
3389 
3390     o = r;
3391     do {
3392       const upb_refcounted *next = o->next;
3393       assert(o->is_frozen || o->individual_count == 0);
3394       freeobj((upb_refcounted*)o);
3395       o = next;
3396     } while(o != r);
3397   }
3398 }
3399 
freeobj(upb_refcounted * o)3400 static void freeobj(upb_refcounted *o) {
3401   trackfree(o);
3402   o->vtbl->free((upb_refcounted*)o);
3403 }
3404 
3405 
3406 /* Public interface ***********************************************************/
3407 
upb_refcounted_init(upb_refcounted * r,const struct upb_refcounted_vtbl * vtbl,const void * owner)3408 bool upb_refcounted_init(upb_refcounted *r,
3409                          const struct upb_refcounted_vtbl *vtbl,
3410                          const void *owner) {
3411 #ifndef NDEBUG
3412   /* Endianness check.  This is unrelated to upb_refcounted, it's just a
3413    * convenient place to put the check that we can be assured will run for
3414    * basically every program using upb. */
3415   const int x = 1;
3416 #ifdef UPB_BIG_ENDIAN
3417   assert(*(char*)&x != 1);
3418 #else
3419   assert(*(char*)&x == 1);
3420 #endif
3421 #endif
3422 
3423   r->next = r;
3424   r->vtbl = vtbl;
3425   r->individual_count = 0;
3426   r->is_frozen = false;
3427   r->group = malloc(sizeof(*r->group));
3428   if (!r->group) return false;
3429   *r->group = 0;
3430   if (!trackinit(r)) {
3431     free(r->group);
3432     return false;
3433   }
3434   upb_refcounted_ref(r, owner);
3435   return true;
3436 }
3437 
upb_refcounted_isfrozen(const upb_refcounted * r)3438 bool upb_refcounted_isfrozen(const upb_refcounted *r) {
3439   return r->is_frozen;
3440 }
3441 
upb_refcounted_ref(const upb_refcounted * r,const void * owner)3442 void upb_refcounted_ref(const upb_refcounted *r, const void *owner) {
3443   track(r, owner, false);
3444   if (!r->is_frozen)
3445     ((upb_refcounted*)r)->individual_count++;
3446   refgroup(r->group);
3447 }
3448 
upb_refcounted_unref(const upb_refcounted * r,const void * owner)3449 void upb_refcounted_unref(const upb_refcounted *r, const void *owner) {
3450   untrack(r, owner, false);
3451   if (!r->is_frozen)
3452     ((upb_refcounted*)r)->individual_count--;
3453   unref(r);
3454 }
3455 
upb_refcounted_ref2(const upb_refcounted * r,upb_refcounted * from)3456 void upb_refcounted_ref2(const upb_refcounted *r, upb_refcounted *from) {
3457   assert(!from->is_frozen);  /* Non-const pointer implies this. */
3458   track(r, from, true);
3459   if (r->is_frozen) {
3460     refgroup(r->group);
3461   } else {
3462     merge((upb_refcounted*)r, from);
3463   }
3464 }
3465 
upb_refcounted_unref2(const upb_refcounted * r,upb_refcounted * from)3466 void upb_refcounted_unref2(const upb_refcounted *r, upb_refcounted *from) {
3467   assert(!from->is_frozen);  /* Non-const pointer implies this. */
3468   untrack(r, from, true);
3469   if (r->is_frozen) {
3470     unref(r);
3471   } else {
3472     assert(merged(r, from));
3473   }
3474 }
3475 
upb_refcounted_donateref(const upb_refcounted * r,const void * from,const void * to)3476 void upb_refcounted_donateref(
3477     const upb_refcounted *r, const void *from, const void *to) {
3478   assert(from != to);
3479   if (to != NULL)
3480     upb_refcounted_ref(r, to);
3481   if (from != NULL)
3482     upb_refcounted_unref(r, from);
3483 }
3484 
upb_refcounted_checkref(const upb_refcounted * r,const void * owner)3485 void upb_refcounted_checkref(const upb_refcounted *r, const void *owner) {
3486   checkref(r, owner, false);
3487 }
3488 
upb_refcounted_freeze(upb_refcounted * const * roots,int n,upb_status * s,int maxdepth)3489 bool upb_refcounted_freeze(upb_refcounted *const*roots, int n, upb_status *s,
3490                            int maxdepth) {
3491   int i;
3492   for (i = 0; i < n; i++) {
3493     assert(!roots[i]->is_frozen);
3494   }
3495   return freeze(roots, n, s, maxdepth);
3496 }
3497 
3498 
3499 #include <stdlib.h>
3500 
3501 /* Fallback implementation if the shim is not specialized by the JIT. */
3502 #define SHIM_WRITER(type, ctype)                                              \
3503   bool upb_shim_set ## type (void *c, const void *hd, ctype val) {            \
3504     uint8_t *m = c;                                                           \
3505     const upb_shim_data *d = hd;                                              \
3506     if (d->hasbit > 0)                                                        \
3507       *(uint8_t*)&m[d->hasbit / 8] |= 1 << (d->hasbit % 8);                   \
3508     *(ctype*)&m[d->offset] = val;                                             \
3509     return true;                                                              \
3510   }                                                                           \
3511 
SHIM_WRITER(double,double)3512 SHIM_WRITER(double, double)
3513 SHIM_WRITER(float,  float)
3514 SHIM_WRITER(int32,  int32_t)
3515 SHIM_WRITER(int64,  int64_t)
3516 SHIM_WRITER(uint32, uint32_t)
3517 SHIM_WRITER(uint64, uint64_t)
3518 SHIM_WRITER(bool,   bool)
3519 #undef SHIM_WRITER
3520 
3521 bool upb_shim_set(upb_handlers *h, const upb_fielddef *f, size_t offset,
3522                   int32_t hasbit) {
3523   upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
3524   bool ok;
3525 
3526   upb_shim_data *d = malloc(sizeof(*d));
3527   if (!d) return false;
3528   d->offset = offset;
3529   d->hasbit = hasbit;
3530 
3531   upb_handlerattr_sethandlerdata(&attr, d);
3532   upb_handlerattr_setalwaysok(&attr, true);
3533   upb_handlers_addcleanup(h, d, free);
3534 
3535 #define TYPE(u, l) \
3536   case UPB_TYPE_##u: \
3537     ok = upb_handlers_set##l(h, f, upb_shim_set##l, &attr); break;
3538 
3539   ok = false;
3540 
3541   switch (upb_fielddef_type(f)) {
3542     TYPE(INT64,  int64);
3543     TYPE(INT32,  int32);
3544     TYPE(ENUM,   int32);
3545     TYPE(UINT64, uint64);
3546     TYPE(UINT32, uint32);
3547     TYPE(DOUBLE, double);
3548     TYPE(FLOAT,  float);
3549     TYPE(BOOL,   bool);
3550     default: assert(false); break;
3551   }
3552 #undef TYPE
3553 
3554   upb_handlerattr_uninit(&attr);
3555   return ok;
3556 }
3557 
upb_shim_getdata(const upb_handlers * h,upb_selector_t s,upb_fieldtype_t * type)3558 const upb_shim_data *upb_shim_getdata(const upb_handlers *h, upb_selector_t s,
3559                                       upb_fieldtype_t *type) {
3560   upb_func *f = upb_handlers_gethandler(h, s);
3561 
3562   if ((upb_int64_handlerfunc*)f == upb_shim_setint64) {
3563     *type = UPB_TYPE_INT64;
3564   } else if ((upb_int32_handlerfunc*)f == upb_shim_setint32) {
3565     *type = UPB_TYPE_INT32;
3566   } else if ((upb_uint64_handlerfunc*)f == upb_shim_setuint64) {
3567     *type = UPB_TYPE_UINT64;
3568   } else if ((upb_uint32_handlerfunc*)f == upb_shim_setuint32) {
3569     *type = UPB_TYPE_UINT32;
3570   } else if ((upb_double_handlerfunc*)f == upb_shim_setdouble) {
3571     *type = UPB_TYPE_DOUBLE;
3572   } else if ((upb_float_handlerfunc*)f == upb_shim_setfloat) {
3573     *type = UPB_TYPE_FLOAT;
3574   } else if ((upb_bool_handlerfunc*)f == upb_shim_setbool) {
3575     *type = UPB_TYPE_BOOL;
3576   } else {
3577     return NULL;
3578   }
3579 
3580   return (const upb_shim_data*)upb_handlers_gethandlerdata(h, s);
3581 }
3582 
3583 
3584 #include <stdlib.h>
3585 #include <string.h>
3586 
upb_symtab_free(upb_refcounted * r)3587 static void upb_symtab_free(upb_refcounted *r) {
3588   upb_symtab *s = (upb_symtab*)r;
3589   upb_strtable_iter i;
3590   upb_strtable_begin(&i, &s->symtab);
3591   for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
3592     const upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i));
3593     upb_def_unref(def, s);
3594   }
3595   upb_strtable_uninit(&s->symtab);
3596   free(s);
3597 }
3598 
3599 
upb_symtab_new(const void * owner)3600 upb_symtab *upb_symtab_new(const void *owner) {
3601   static const struct upb_refcounted_vtbl vtbl = {NULL, &upb_symtab_free};
3602   upb_symtab *s = malloc(sizeof(*s));
3603   upb_refcounted_init(upb_symtab_upcast_mutable(s), &vtbl, owner);
3604   upb_strtable_init(&s->symtab, UPB_CTYPE_PTR);
3605   return s;
3606 }
3607 
upb_symtab_freeze(upb_symtab * s)3608 void upb_symtab_freeze(upb_symtab *s) {
3609   upb_refcounted *r;
3610   bool ok;
3611 
3612   assert(!upb_symtab_isfrozen(s));
3613   r = upb_symtab_upcast_mutable(s);
3614   /* The symtab does not take ref2's (see refcounted.h) on the defs, because
3615    * defs cannot refer back to the table and therefore cannot create cycles.  So
3616    * 0 will suffice for maxdepth here. */
3617   ok = upb_refcounted_freeze(&r, 1, NULL, 0);
3618   UPB_ASSERT_VAR(ok, ok);
3619 }
3620 
upb_symtab_lookup(const upb_symtab * s,const char * sym)3621 const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym) {
3622   upb_value v;
3623   upb_def *ret = upb_strtable_lookup(&s->symtab, sym, &v) ?
3624       upb_value_getptr(v) : NULL;
3625   return ret;
3626 }
3627 
upb_symtab_lookupmsg(const upb_symtab * s,const char * sym)3628 const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) {
3629   upb_value v;
3630   upb_def *def = upb_strtable_lookup(&s->symtab, sym, &v) ?
3631       upb_value_getptr(v) : NULL;
3632   return def ? upb_dyncast_msgdef(def) : NULL;
3633 }
3634 
upb_symtab_lookupenum(const upb_symtab * s,const char * sym)3635 const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym) {
3636   upb_value v;
3637   upb_def *def = upb_strtable_lookup(&s->symtab, sym, &v) ?
3638       upb_value_getptr(v) : NULL;
3639   return def ? upb_dyncast_enumdef(def) : NULL;
3640 }
3641 
3642 /* Given a symbol and the base symbol inside which it is defined, find the
3643  * symbol's definition in t. */
upb_resolvename(const upb_strtable * t,const char * base,const char * sym)3644 static upb_def *upb_resolvename(const upb_strtable *t,
3645                                 const char *base, const char *sym) {
3646   if(strlen(sym) == 0) return NULL;
3647   if(sym[0] == '.') {
3648     /* Symbols starting with '.' are absolute, so we do a single lookup.
3649      * Slice to omit the leading '.' */
3650     upb_value v;
3651     return upb_strtable_lookup(t, sym + 1, &v) ? upb_value_getptr(v) : NULL;
3652   } else {
3653     /* Remove components from base until we find an entry or run out.
3654      * TODO: This branch is totally broken, but currently not used. */
3655     (void)base;
3656     assert(false);
3657     return NULL;
3658   }
3659 }
3660 
upb_symtab_resolve(const upb_symtab * s,const char * base,const char * sym)3661 const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base,
3662                                   const char *sym) {
3663   upb_def *ret = upb_resolvename(&s->symtab, base, sym);
3664   return ret;
3665 }
3666 
3667 /* Starts a depth-first traversal at "def", recursing into any subdefs
3668  * (ie. submessage types).  Adds duplicates of existing defs to addtab
3669  * wherever necessary, so that the resulting symtab will be consistent once
3670  * addtab is added.
3671  *
3672  * More specifically, if any def D is found in the DFS that:
3673  *
3674  *   1. can reach a def that is being replaced by something in addtab, AND
3675  *
3676  *   2. is not itself being replaced already (ie. this name doesn't already
3677  *      exist in addtab)
3678  *
3679  * ...then a duplicate (new copy) of D will be added to addtab.
3680  *
3681  * Returns true if this happened for any def reachable from "def."
3682  *
3683  * It is slightly tricky to do this correctly in the presence of cycles.  If we
3684  * detect that our DFS has hit a cycle, we might not yet know if any SCCs on
3685  * our stack can reach a def in addtab or not.  Once we figure this out, that
3686  * answer needs to apply to *all* defs in these SCCs, even if we visited them
3687  * already.  So a straight up one-pass cycle-detecting DFS won't work.
3688  *
3689  * To work around this problem, we traverse each SCC (which we already
3690  * computed, since these defs are frozen) as a single node.  We first compute
3691  * whether the SCC as a whole can reach any def in addtab, then we dup (or not)
3692  * the entire SCC.  This requires breaking the encapsulation of upb_refcounted,
3693  * since that is where we get the data about what SCC we are in. */
upb_resolve_dfs(const upb_def * def,upb_strtable * addtab,const void * new_owner,upb_inttable * seen,upb_status * s)3694 static bool upb_resolve_dfs(const upb_def *def, upb_strtable *addtab,
3695                             const void *new_owner, upb_inttable *seen,
3696                             upb_status *s) {
3697   upb_value v;
3698   bool need_dup;
3699   const upb_def *base;
3700   const void* memoize_key;
3701 
3702   /* Memoize results of this function for efficiency (since we're traversing a
3703    * DAG this is not needed to limit the depth of the search).
3704    *
3705    * We memoize by SCC instead of by individual def. */
3706   memoize_key = def->base.group;
3707 
3708   if (upb_inttable_lookupptr(seen, memoize_key, &v))
3709     return upb_value_getbool(v);
3710 
3711   /* Visit submessages for all messages in the SCC. */
3712   need_dup = false;
3713   base = def;
3714   do {
3715     upb_value v;
3716     const upb_msgdef *m;
3717 
3718     assert(upb_def_isfrozen(def));
3719     if (def->type == UPB_DEF_FIELD) continue;
3720     if (upb_strtable_lookup(addtab, upb_def_fullname(def), &v)) {
3721       need_dup = true;
3722     }
3723 
3724     /* For messages, continue the recursion by visiting all subdefs, but only
3725      * ones in different SCCs. */
3726     m = upb_dyncast_msgdef(def);
3727     if (m) {
3728       upb_msg_field_iter i;
3729       for(upb_msg_field_begin(&i, m);
3730           !upb_msg_field_done(&i);
3731           upb_msg_field_next(&i)) {
3732         upb_fielddef *f = upb_msg_iter_field(&i);
3733         const upb_def *subdef;
3734 
3735         if (!upb_fielddef_hassubdef(f)) continue;
3736         subdef = upb_fielddef_subdef(f);
3737 
3738         /* Skip subdefs in this SCC. */
3739         if (def->base.group == subdef->base.group) continue;
3740 
3741         /* |= to avoid short-circuit; we need its side-effects. */
3742         need_dup |= upb_resolve_dfs(subdef, addtab, new_owner, seen, s);
3743         if (!upb_ok(s)) return false;
3744       }
3745     }
3746   } while ((def = (upb_def*)def->base.next) != base);
3747 
3748   if (need_dup) {
3749     /* Dup all defs in this SCC that don't already have entries in addtab. */
3750     def = base;
3751     do {
3752       const char *name;
3753 
3754       if (def->type == UPB_DEF_FIELD) continue;
3755       name = upb_def_fullname(def);
3756       if (!upb_strtable_lookup(addtab, name, NULL)) {
3757         upb_def *newdef = upb_def_dup(def, new_owner);
3758         if (!newdef) goto oom;
3759         newdef->came_from_user = false;
3760         if (!upb_strtable_insert(addtab, name, upb_value_ptr(newdef)))
3761           goto oom;
3762       }
3763     } while ((def = (upb_def*)def->base.next) != base);
3764   }
3765 
3766   upb_inttable_insertptr(seen, memoize_key, upb_value_bool(need_dup));
3767   return need_dup;
3768 
3769 oom:
3770   upb_status_seterrmsg(s, "out of memory");
3771   return false;
3772 }
3773 
3774 /* TODO(haberman): we need a lot more testing of error conditions.
3775  * The came_from_user stuff in particular is not tested. */
upb_symtab_add(upb_symtab * s,upb_def * const * defs,int n,void * ref_donor,upb_status * status)3776 bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor,
3777                     upb_status *status) {
3778   int i;
3779   upb_strtable_iter iter;
3780   upb_def **add_defs = NULL;
3781   upb_strtable addtab;
3782   upb_inttable seen;
3783 
3784   assert(!upb_symtab_isfrozen(s));
3785   if (!upb_strtable_init(&addtab, UPB_CTYPE_PTR)) {
3786     upb_status_seterrmsg(status, "out of memory");
3787     return false;
3788   }
3789 
3790   /* Add new defs to our "add" set. */
3791   for (i = 0; i < n; i++) {
3792     upb_def *def = defs[i];
3793     const char *fullname;
3794     upb_fielddef *f;
3795 
3796     if (upb_def_isfrozen(def)) {
3797       upb_status_seterrmsg(status, "added defs must be mutable");
3798       goto err;
3799     }
3800     assert(!upb_def_isfrozen(def));
3801     fullname = upb_def_fullname(def);
3802     if (!fullname) {
3803       upb_status_seterrmsg(
3804           status, "Anonymous defs cannot be added to a symtab");
3805       goto err;
3806     }
3807 
3808     f = upb_dyncast_fielddef_mutable(def);
3809 
3810     if (f) {
3811       if (!upb_fielddef_containingtypename(f)) {
3812         upb_status_seterrmsg(status,
3813                              "Standalone fielddefs must have a containing type "
3814                              "(extendee) name set");
3815         goto err;
3816       }
3817     } else {
3818       if (upb_strtable_lookup(&addtab, fullname, NULL)) {
3819         upb_status_seterrf(status, "Conflicting defs named '%s'", fullname);
3820         goto err;
3821       }
3822       /* We need this to back out properly, because if there is a failure we
3823        * need to donate the ref back to the caller. */
3824       def->came_from_user = true;
3825       upb_def_donateref(def, ref_donor, s);
3826       if (!upb_strtable_insert(&addtab, fullname, upb_value_ptr(def)))
3827         goto oom_err;
3828     }
3829   }
3830 
3831   /* Add standalone fielddefs (ie. extensions) to the appropriate messages.
3832    * If the appropriate message only exists in the existing symtab, duplicate
3833    * it so we have a mutable copy we can add the fields to. */
3834   for (i = 0; i < n; i++) {
3835     upb_def *def = defs[i];
3836     upb_fielddef *f = upb_dyncast_fielddef_mutable(def);
3837     const char *msgname;
3838     upb_value v;
3839     upb_msgdef *m;
3840 
3841     if (!f) continue;
3842     msgname = upb_fielddef_containingtypename(f);
3843     /* We validated this earlier in this function. */
3844     assert(msgname);
3845 
3846     /* If the extendee name is absolutely qualified, move past the initial ".".
3847      * TODO(haberman): it is not obvious what it would mean if this was not
3848      * absolutely qualified. */
3849     if (msgname[0] == '.') {
3850       msgname++;
3851     }
3852 
3853     if (upb_strtable_lookup(&addtab, msgname, &v)) {
3854       /* Extendee is in the set of defs the user asked us to add. */
3855       m = upb_value_getptr(v);
3856     } else {
3857       /* Need to find and dup the extendee from the existing symtab. */
3858       const upb_msgdef *frozen_m = upb_symtab_lookupmsg(s, msgname);
3859       if (!frozen_m) {
3860         upb_status_seterrf(status,
3861                            "Tried to extend message %s that does not exist "
3862                            "in this SymbolTable.",
3863                            msgname);
3864         goto err;
3865       }
3866       m = upb_msgdef_dup(frozen_m, s);
3867       if (!m) goto oom_err;
3868       if (!upb_strtable_insert(&addtab, msgname, upb_value_ptr(m))) {
3869         upb_msgdef_unref(m, s);
3870         goto oom_err;
3871       }
3872     }
3873 
3874     if (!upb_msgdef_addfield(m, f, ref_donor, status)) {
3875       goto err;
3876     }
3877   }
3878 
3879   /* Add dups of any existing def that can reach a def with the same name as
3880    * anything in our "add" set. */
3881   if (!upb_inttable_init(&seen, UPB_CTYPE_BOOL)) goto oom_err;
3882   upb_strtable_begin(&iter, &s->symtab);
3883   for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
3884     upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
3885     upb_resolve_dfs(def, &addtab, s, &seen, status);
3886     if (!upb_ok(status)) goto err;
3887   }
3888   upb_inttable_uninit(&seen);
3889 
3890   /* Now using the table, resolve symbolic references for subdefs. */
3891   upb_strtable_begin(&iter, &addtab);
3892   for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
3893     const char *base;
3894     upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
3895     upb_msgdef *m = upb_dyncast_msgdef_mutable(def);
3896     upb_msg_field_iter j;
3897 
3898     if (!m) continue;
3899     /* Type names are resolved relative to the message in which they appear. */
3900     base = upb_msgdef_fullname(m);
3901 
3902     for(upb_msg_field_begin(&j, m);
3903         !upb_msg_field_done(&j);
3904         upb_msg_field_next(&j)) {
3905       upb_fielddef *f = upb_msg_iter_field(&j);
3906       const char *name = upb_fielddef_subdefname(f);
3907       if (name && !upb_fielddef_subdef(f)) {
3908         /* Try the lookup in the current set of to-be-added defs first. If not
3909          * there, try existing defs. */
3910         upb_def *subdef = upb_resolvename(&addtab, base, name);
3911         if (subdef == NULL) {
3912           subdef = upb_resolvename(&s->symtab, base, name);
3913         }
3914         if (subdef == NULL) {
3915           upb_status_seterrf(
3916               status, "couldn't resolve name '%s' in message '%s'", name, base);
3917           goto err;
3918         } else if (!upb_fielddef_setsubdef(f, subdef, status)) {
3919           goto err;
3920         }
3921       }
3922     }
3923   }
3924 
3925   /* We need an array of the defs in addtab, for passing to upb_def_freeze. */
3926   add_defs = malloc(sizeof(void*) * upb_strtable_count(&addtab));
3927   if (add_defs == NULL) goto oom_err;
3928   upb_strtable_begin(&iter, &addtab);
3929   for (n = 0; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
3930     add_defs[n++] = upb_value_getptr(upb_strtable_iter_value(&iter));
3931   }
3932 
3933   if (!upb_def_freeze(add_defs, n, status)) goto err;
3934 
3935   /* This must be delayed until all errors have been detected, since error
3936    * recovery code uses this table to cleanup defs. */
3937   upb_strtable_uninit(&addtab);
3938 
3939   /* TODO(haberman) we don't properly handle errors after this point (like
3940    * OOM in upb_strtable_insert() below). */
3941   for (i = 0; i < n; i++) {
3942     upb_def *def = add_defs[i];
3943     const char *name = upb_def_fullname(def);
3944     upb_value v;
3945     bool success;
3946 
3947     if (upb_strtable_remove(&s->symtab, name, &v)) {
3948       const upb_def *def = upb_value_getptr(v);
3949       upb_def_unref(def, s);
3950     }
3951     success = upb_strtable_insert(&s->symtab, name, upb_value_ptr(def));
3952     UPB_ASSERT_VAR(success, success == true);
3953   }
3954   free(add_defs);
3955   return true;
3956 
3957 oom_err:
3958   upb_status_seterrmsg(status, "out of memory");
3959 err: {
3960     /* For defs the user passed in, we need to donate the refs back.  For defs
3961      * we dup'd, we need to just unref them. */
3962     upb_strtable_begin(&iter, &addtab);
3963     for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
3964       upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
3965       bool came_from_user = def->came_from_user;
3966       def->came_from_user = false;
3967       if (came_from_user) {
3968         upb_def_donateref(def, s, ref_donor);
3969       } else {
3970         upb_def_unref(def, s);
3971       }
3972     }
3973   }
3974   upb_strtable_uninit(&addtab);
3975   free(add_defs);
3976   assert(!upb_ok(status));
3977   return false;
3978 }
3979 
3980 /* Iteration. */
3981 
advance_to_matching(upb_symtab_iter * iter)3982 static void advance_to_matching(upb_symtab_iter *iter) {
3983   if (iter->type == UPB_DEF_ANY)
3984     return;
3985 
3986   while (!upb_strtable_done(&iter->iter) &&
3987          iter->type != upb_symtab_iter_def(iter)->type) {
3988     upb_strtable_next(&iter->iter);
3989   }
3990 }
3991 
upb_symtab_begin(upb_symtab_iter * iter,const upb_symtab * s,upb_deftype_t type)3992 void upb_symtab_begin(upb_symtab_iter *iter, const upb_symtab *s,
3993                       upb_deftype_t type) {
3994   upb_strtable_begin(&iter->iter, &s->symtab);
3995   iter->type = type;
3996   advance_to_matching(iter);
3997 }
3998 
upb_symtab_next(upb_symtab_iter * iter)3999 void upb_symtab_next(upb_symtab_iter *iter) {
4000   upb_strtable_next(&iter->iter);
4001   advance_to_matching(iter);
4002 }
4003 
upb_symtab_done(const upb_symtab_iter * iter)4004 bool upb_symtab_done(const upb_symtab_iter *iter) {
4005   return upb_strtable_done(&iter->iter);
4006 }
4007 
upb_symtab_iter_def(const upb_symtab_iter * iter)4008 const upb_def *upb_symtab_iter_def(const upb_symtab_iter *iter) {
4009   return upb_value_getptr(upb_strtable_iter_value(&iter->iter));
4010 }
4011 /*
4012 ** upb_table Implementation
4013 **
4014 ** Implementation is heavily inspired by Lua's ltable.c.
4015 */
4016 
4017 
4018 #include <stdlib.h>
4019 #include <string.h>
4020 
4021 #define UPB_MAXARRSIZE 16  /* 64k. */
4022 
4023 /* From Chromium. */
4024 #define ARRAY_SIZE(x) \
4025     ((sizeof(x)/sizeof(0[x])) / ((size_t)(!(sizeof(x) % sizeof(0[x])))))
4026 
4027 static const double MAX_LOAD = 0.85;
4028 
4029 /* The minimum utilization of the array part of a mixed hash/array table.  This
4030  * is a speed/memory-usage tradeoff (though it's not straightforward because of
4031  * cache effects).  The lower this is, the more memory we'll use. */
4032 static const double MIN_DENSITY = 0.1;
4033 
is_pow2(uint64_t v)4034 bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; }
4035 
log2ceil(uint64_t v)4036 int log2ceil(uint64_t v) {
4037   int ret = 0;
4038   bool pow2 = is_pow2(v);
4039   while (v >>= 1) ret++;
4040   ret = pow2 ? ret : ret + 1;  /* Ceiling. */
4041   return UPB_MIN(UPB_MAXARRSIZE, ret);
4042 }
4043 
upb_strdup(const char * s)4044 char *upb_strdup(const char *s) {
4045   return upb_strdup2(s, strlen(s));
4046 }
4047 
upb_strdup2(const char * s,size_t len)4048 char *upb_strdup2(const char *s, size_t len) {
4049   size_t n;
4050   char *p;
4051 
4052   /* Prevent overflow errors. */
4053   if (len == SIZE_MAX) return NULL;
4054   /* Always null-terminate, even if binary data; but don't rely on the input to
4055    * have a null-terminating byte since it may be a raw binary buffer. */
4056   n = len + 1;
4057   p = malloc(n);
4058   if (p) {
4059     memcpy(p, s, len);
4060     p[len] = 0;
4061   }
4062   return p;
4063 }
4064 
4065 /* A type to represent the lookup key of either a strtable or an inttable. */
4066 typedef union {
4067   uintptr_t num;
4068   struct {
4069     const char *str;
4070     size_t len;
4071   } str;
4072 } lookupkey_t;
4073 
strkey2(const char * str,size_t len)4074 static lookupkey_t strkey2(const char *str, size_t len) {
4075   lookupkey_t k;
4076   k.str.str = str;
4077   k.str.len = len;
4078   return k;
4079 }
4080 
intkey(uintptr_t key)4081 static lookupkey_t intkey(uintptr_t key) {
4082   lookupkey_t k;
4083   k.num = key;
4084   return k;
4085 }
4086 
4087 typedef uint32_t hashfunc_t(upb_tabkey key);
4088 typedef bool eqlfunc_t(upb_tabkey k1, lookupkey_t k2);
4089 
4090 /* Base table (shared code) ***************************************************/
4091 
4092 /* For when we need to cast away const. */
mutable_entries(upb_table * t)4093 static upb_tabent *mutable_entries(upb_table *t) {
4094   return (upb_tabent*)t->entries;
4095 }
4096 
isfull(upb_table * t)4097 static bool isfull(upb_table *t) {
4098   return (double)(t->count + 1) / upb_table_size(t) > MAX_LOAD;
4099 }
4100 
init(upb_table * t,upb_ctype_t ctype,uint8_t size_lg2)4101 static bool init(upb_table *t, upb_ctype_t ctype, uint8_t size_lg2) {
4102   size_t bytes;
4103 
4104   t->count = 0;
4105   t->ctype = ctype;
4106   t->size_lg2 = size_lg2;
4107   t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0;
4108   bytes = upb_table_size(t) * sizeof(upb_tabent);
4109   if (bytes > 0) {
4110     t->entries = malloc(bytes);
4111     if (!t->entries) return false;
4112     memset(mutable_entries(t), 0, bytes);
4113   } else {
4114     t->entries = NULL;
4115   }
4116   return true;
4117 }
4118 
uninit(upb_table * t)4119 static void uninit(upb_table *t) { free(mutable_entries(t)); }
4120 
emptyent(upb_table * t)4121 static upb_tabent *emptyent(upb_table *t) {
4122   upb_tabent *e = mutable_entries(t) + upb_table_size(t);
4123   while (1) { if (upb_tabent_isempty(--e)) return e; assert(e > t->entries); }
4124 }
4125 
getentry_mutable(upb_table * t,uint32_t hash)4126 static upb_tabent *getentry_mutable(upb_table *t, uint32_t hash) {
4127   return (upb_tabent*)upb_getentry(t, hash);
4128 }
4129 
findentry(const upb_table * t,lookupkey_t key,uint32_t hash,eqlfunc_t * eql)4130 static const upb_tabent *findentry(const upb_table *t, lookupkey_t key,
4131                                    uint32_t hash, eqlfunc_t *eql) {
4132   const upb_tabent *e;
4133 
4134   if (t->size_lg2 == 0) return NULL;
4135   e = upb_getentry(t, hash);
4136   if (upb_tabent_isempty(e)) return NULL;
4137   while (1) {
4138     if (eql(e->key, key)) return e;
4139     if ((e = e->next) == NULL) return NULL;
4140   }
4141 }
4142 
findentry_mutable(upb_table * t,lookupkey_t key,uint32_t hash,eqlfunc_t * eql)4143 static upb_tabent *findentry_mutable(upb_table *t, lookupkey_t key,
4144                                      uint32_t hash, eqlfunc_t *eql) {
4145   return (upb_tabent*)findentry(t, key, hash, eql);
4146 }
4147 
lookup(const upb_table * t,lookupkey_t key,upb_value * v,uint32_t hash,eqlfunc_t * eql)4148 static bool lookup(const upb_table *t, lookupkey_t key, upb_value *v,
4149                    uint32_t hash, eqlfunc_t *eql) {
4150   const upb_tabent *e = findentry(t, key, hash, eql);
4151   if (e) {
4152     if (v) {
4153       _upb_value_setval(v, e->val.val, t->ctype);
4154     }
4155     return true;
4156   } else {
4157     return false;
4158   }
4159 }
4160 
4161 /* The given key must not already exist in the table. */
insert(upb_table * t,lookupkey_t key,upb_tabkey tabkey,upb_value val,uint32_t hash,hashfunc_t * hashfunc,eqlfunc_t * eql)4162 static void insert(upb_table *t, lookupkey_t key, upb_tabkey tabkey,
4163                    upb_value val, uint32_t hash,
4164                    hashfunc_t *hashfunc, eqlfunc_t *eql) {
4165   upb_tabent *mainpos_e;
4166   upb_tabent *our_e;
4167 
4168   UPB_UNUSED(eql);
4169   UPB_UNUSED(key);
4170   assert(findentry(t, key, hash, eql) == NULL);
4171   assert(val.ctype == t->ctype);
4172 
4173   t->count++;
4174   mainpos_e = getentry_mutable(t, hash);
4175   our_e = mainpos_e;
4176 
4177   if (upb_tabent_isempty(mainpos_e)) {
4178     /* Our main position is empty; use it. */
4179     our_e->next = NULL;
4180   } else {
4181     /* Collision. */
4182     upb_tabent *new_e = emptyent(t);
4183     /* Head of collider's chain. */
4184     upb_tabent *chain = getentry_mutable(t, hashfunc(mainpos_e->key));
4185     if (chain == mainpos_e) {
4186       /* Existing ent is in its main posisiton (it has the same hash as us, and
4187        * is the head of our chain).  Insert to new ent and append to this chain. */
4188       new_e->next = mainpos_e->next;
4189       mainpos_e->next = new_e;
4190       our_e = new_e;
4191     } else {
4192       /* Existing ent is not in its main position (it is a node in some other
4193        * chain).  This implies that no existing ent in the table has our hash.
4194        * Evict it (updating its chain) and use its ent for head of our chain. */
4195       *new_e = *mainpos_e;  /* copies next. */
4196       while (chain->next != mainpos_e) {
4197         chain = (upb_tabent*)chain->next;
4198         assert(chain);
4199       }
4200       chain->next = new_e;
4201       our_e = mainpos_e;
4202       our_e->next = NULL;
4203     }
4204   }
4205   our_e->key = tabkey;
4206   our_e->val.val = val.val;
4207   assert(findentry(t, key, hash, eql) == our_e);
4208 }
4209 
rm(upb_table * t,lookupkey_t key,upb_value * val,upb_tabkey * removed,uint32_t hash,eqlfunc_t * eql)4210 static bool rm(upb_table *t, lookupkey_t key, upb_value *val,
4211                upb_tabkey *removed, uint32_t hash, eqlfunc_t *eql) {
4212   upb_tabent *chain = getentry_mutable(t, hash);
4213   if (upb_tabent_isempty(chain)) return false;
4214   if (eql(chain->key, key)) {
4215     /* Element to remove is at the head of its chain. */
4216     t->count--;
4217     if (val) {
4218       _upb_value_setval(val, chain->val.val, t->ctype);
4219     }
4220     if (chain->next) {
4221       upb_tabent *move = (upb_tabent*)chain->next;
4222       *chain = *move;
4223       if (removed) *removed = move->key;
4224       move->key = 0;  /* Make the slot empty. */
4225     } else {
4226       if (removed) *removed = chain->key;
4227       chain->key = 0;  /* Make the slot empty. */
4228     }
4229     return true;
4230   } else {
4231     /* Element to remove is either in a non-head position or not in the
4232      * table. */
4233     while (chain->next && !eql(chain->next->key, key))
4234       chain = (upb_tabent*)chain->next;
4235     if (chain->next) {
4236       /* Found element to remove. */
4237       upb_tabent *rm;
4238 
4239       if (val) {
4240         _upb_value_setval(val, chain->next->val.val, t->ctype);
4241       }
4242       rm = (upb_tabent*)chain->next;
4243       if (removed) *removed = rm->key;
4244       rm->key = 0;
4245       chain->next = rm->next;
4246       t->count--;
4247       return true;
4248     } else {
4249       return false;
4250     }
4251   }
4252 }
4253 
next(const upb_table * t,size_t i)4254 static size_t next(const upb_table *t, size_t i) {
4255   do {
4256     if (++i >= upb_table_size(t))
4257       return SIZE_MAX;
4258   } while(upb_tabent_isempty(&t->entries[i]));
4259 
4260   return i;
4261 }
4262 
begin(const upb_table * t)4263 static size_t begin(const upb_table *t) {
4264   return next(t, -1);
4265 }
4266 
4267 
4268 /* upb_strtable ***************************************************************/
4269 
4270 /* A simple "subclass" of upb_table that only adds a hash function for strings. */
4271 
strcopy(lookupkey_t k2)4272 static upb_tabkey strcopy(lookupkey_t k2) {
4273   char *str = malloc(k2.str.len + sizeof(uint32_t) + 1);
4274   if (str == NULL) return 0;
4275   memcpy(str, &k2.str.len, sizeof(uint32_t));
4276   memcpy(str + sizeof(uint32_t), k2.str.str, k2.str.len + 1);
4277   return (uintptr_t)str;
4278 }
4279 
strhash(upb_tabkey key)4280 static uint32_t strhash(upb_tabkey key) {
4281   uint32_t len;
4282   char *str = upb_tabstr(key, &len);
4283   return MurmurHash2(str, len, 0);
4284 }
4285 
streql(upb_tabkey k1,lookupkey_t k2)4286 static bool streql(upb_tabkey k1, lookupkey_t k2) {
4287   uint32_t len;
4288   char *str = upb_tabstr(k1, &len);
4289   return len == k2.str.len && memcmp(str, k2.str.str, len) == 0;
4290 }
4291 
upb_strtable_init(upb_strtable * t,upb_ctype_t ctype)4292 bool upb_strtable_init(upb_strtable *t, upb_ctype_t ctype) {
4293   return init(&t->t, ctype, 2);
4294 }
4295 
upb_strtable_uninit(upb_strtable * t)4296 void upb_strtable_uninit(upb_strtable *t) {
4297   size_t i;
4298   for (i = 0; i < upb_table_size(&t->t); i++)
4299     free((void*)t->t.entries[i].key);
4300   uninit(&t->t);
4301 }
4302 
upb_strtable_resize(upb_strtable * t,size_t size_lg2)4303 bool upb_strtable_resize(upb_strtable *t, size_t size_lg2) {
4304   upb_strtable new_table;
4305   upb_strtable_iter i;
4306 
4307   if (!init(&new_table.t, t->t.ctype, size_lg2))
4308     return false;
4309   upb_strtable_begin(&i, t);
4310   for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) {
4311     upb_strtable_insert2(
4312         &new_table,
4313         upb_strtable_iter_key(&i),
4314         upb_strtable_iter_keylength(&i),
4315         upb_strtable_iter_value(&i));
4316   }
4317   upb_strtable_uninit(t);
4318   *t = new_table;
4319   return true;
4320 }
4321 
upb_strtable_insert2(upb_strtable * t,const char * k,size_t len,upb_value v)4322 bool upb_strtable_insert2(upb_strtable *t, const char *k, size_t len,
4323                           upb_value v) {
4324   lookupkey_t key;
4325   upb_tabkey tabkey;
4326   uint32_t hash;
4327 
4328   if (isfull(&t->t)) {
4329     /* Need to resize.  New table of double the size, add old elements to it. */
4330     if (!upb_strtable_resize(t, t->t.size_lg2 + 1)) {
4331       return false;
4332     }
4333   }
4334 
4335   key = strkey2(k, len);
4336   tabkey = strcopy(key);
4337   if (tabkey == 0) return false;
4338 
4339   hash = MurmurHash2(key.str.str, key.str.len, 0);
4340   insert(&t->t, key, tabkey, v, hash, &strhash, &streql);
4341   return true;
4342 }
4343 
upb_strtable_lookup2(const upb_strtable * t,const char * key,size_t len,upb_value * v)4344 bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len,
4345                           upb_value *v) {
4346   uint32_t hash = MurmurHash2(key, len, 0);
4347   return lookup(&t->t, strkey2(key, len), v, hash, &streql);
4348 }
4349 
upb_strtable_remove2(upb_strtable * t,const char * key,size_t len,upb_value * val)4350 bool upb_strtable_remove2(upb_strtable *t, const char *key, size_t len,
4351                          upb_value *val) {
4352   uint32_t hash = MurmurHash2(key, strlen(key), 0);
4353   upb_tabkey tabkey;
4354   if (rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql)) {
4355     free((void*)tabkey);
4356     return true;
4357   } else {
4358     return false;
4359   }
4360 }
4361 
4362 /* Iteration */
4363 
str_tabent(const upb_strtable_iter * i)4364 static const upb_tabent *str_tabent(const upb_strtable_iter *i) {
4365   return &i->t->t.entries[i->index];
4366 }
4367 
upb_strtable_begin(upb_strtable_iter * i,const upb_strtable * t)4368 void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t) {
4369   i->t = t;
4370   i->index = begin(&t->t);
4371 }
4372 
upb_strtable_next(upb_strtable_iter * i)4373 void upb_strtable_next(upb_strtable_iter *i) {
4374   i->index = next(&i->t->t, i->index);
4375 }
4376 
upb_strtable_done(const upb_strtable_iter * i)4377 bool upb_strtable_done(const upb_strtable_iter *i) {
4378   return i->index >= upb_table_size(&i->t->t) ||
4379          upb_tabent_isempty(str_tabent(i));
4380 }
4381 
upb_strtable_iter_key(upb_strtable_iter * i)4382 const char *upb_strtable_iter_key(upb_strtable_iter *i) {
4383   assert(!upb_strtable_done(i));
4384   return upb_tabstr(str_tabent(i)->key, NULL);
4385 }
4386 
upb_strtable_iter_keylength(upb_strtable_iter * i)4387 size_t upb_strtable_iter_keylength(upb_strtable_iter *i) {
4388   uint32_t len;
4389   assert(!upb_strtable_done(i));
4390   upb_tabstr(str_tabent(i)->key, &len);
4391   return len;
4392 }
4393 
upb_strtable_iter_value(const upb_strtable_iter * i)4394 upb_value upb_strtable_iter_value(const upb_strtable_iter *i) {
4395   assert(!upb_strtable_done(i));
4396   return _upb_value_val(str_tabent(i)->val.val, i->t->t.ctype);
4397 }
4398 
upb_strtable_iter_setdone(upb_strtable_iter * i)4399 void upb_strtable_iter_setdone(upb_strtable_iter *i) {
4400   i->index = SIZE_MAX;
4401 }
4402 
upb_strtable_iter_isequal(const upb_strtable_iter * i1,const upb_strtable_iter * i2)4403 bool upb_strtable_iter_isequal(const upb_strtable_iter *i1,
4404                                const upb_strtable_iter *i2) {
4405   if (upb_strtable_done(i1) && upb_strtable_done(i2))
4406     return true;
4407   return i1->t == i2->t && i1->index == i2->index;
4408 }
4409 
4410 
4411 /* upb_inttable ***************************************************************/
4412 
4413 /* For inttables we use a hybrid structure where small keys are kept in an
4414  * array and large keys are put in the hash table. */
4415 
inthash(upb_tabkey key)4416 static uint32_t inthash(upb_tabkey key) { return upb_inthash(key); }
4417 
inteql(upb_tabkey k1,lookupkey_t k2)4418 static bool inteql(upb_tabkey k1, lookupkey_t k2) {
4419   return k1 == k2.num;
4420 }
4421 
mutable_array(upb_inttable * t)4422 static upb_tabval *mutable_array(upb_inttable *t) {
4423   return (upb_tabval*)t->array;
4424 }
4425 
inttable_val(upb_inttable * t,uintptr_t key)4426 static upb_tabval *inttable_val(upb_inttable *t, uintptr_t key) {
4427   if (key < t->array_size) {
4428     return upb_arrhas(t->array[key]) ? &(mutable_array(t)[key]) : NULL;
4429   } else {
4430     upb_tabent *e =
4431         findentry_mutable(&t->t, intkey(key), upb_inthash(key), &inteql);
4432     return e ? &e->val : NULL;
4433   }
4434 }
4435 
inttable_val_const(const upb_inttable * t,uintptr_t key)4436 static const upb_tabval *inttable_val_const(const upb_inttable *t,
4437                                             uintptr_t key) {
4438   return inttable_val((upb_inttable*)t, key);
4439 }
4440 
upb_inttable_count(const upb_inttable * t)4441 size_t upb_inttable_count(const upb_inttable *t) {
4442   return t->t.count + t->array_count;
4443 }
4444 
check(upb_inttable * t)4445 static void check(upb_inttable *t) {
4446   UPB_UNUSED(t);
4447 #if defined(UPB_DEBUG_TABLE) && !defined(NDEBUG)
4448   {
4449     /* This check is very expensive (makes inserts/deletes O(N)). */
4450     size_t count = 0;
4451     upb_inttable_iter i;
4452     upb_inttable_begin(&i, t);
4453     for(; !upb_inttable_done(&i); upb_inttable_next(&i), count++) {
4454       assert(upb_inttable_lookup(t, upb_inttable_iter_key(&i), NULL));
4455     }
4456     assert(count == upb_inttable_count(t));
4457   }
4458 #endif
4459 }
4460 
upb_inttable_sizedinit(upb_inttable * t,upb_ctype_t ctype,size_t asize,int hsize_lg2)4461 bool upb_inttable_sizedinit(upb_inttable *t, upb_ctype_t ctype,
4462                             size_t asize, int hsize_lg2) {
4463   size_t array_bytes;
4464 
4465   if (!init(&t->t, ctype, hsize_lg2)) return false;
4466   /* Always make the array part at least 1 long, so that we know key 0
4467    * won't be in the hash part, which simplifies things. */
4468   t->array_size = UPB_MAX(1, asize);
4469   t->array_count = 0;
4470   array_bytes = t->array_size * sizeof(upb_value);
4471   t->array = malloc(array_bytes);
4472   if (!t->array) {
4473     uninit(&t->t);
4474     return false;
4475   }
4476   memset(mutable_array(t), 0xff, array_bytes);
4477   check(t);
4478   return true;
4479 }
4480 
upb_inttable_init(upb_inttable * t,upb_ctype_t ctype)4481 bool upb_inttable_init(upb_inttable *t, upb_ctype_t ctype) {
4482   return upb_inttable_sizedinit(t, ctype, 0, 4);
4483 }
4484 
upb_inttable_uninit(upb_inttable * t)4485 void upb_inttable_uninit(upb_inttable *t) {
4486   uninit(&t->t);
4487   free(mutable_array(t));
4488 }
4489 
upb_inttable_insert(upb_inttable * t,uintptr_t key,upb_value val)4490 bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val) {
4491   /* XXX: Table can't store value (uint64_t)-1.  Need to somehow statically
4492    * guarantee that this is not necessary, or fix the limitation. */
4493   upb_tabval tabval;
4494   tabval.val = val.val;
4495   UPB_UNUSED(tabval);
4496   assert(upb_arrhas(tabval));
4497 
4498   if (key < t->array_size) {
4499     assert(!upb_arrhas(t->array[key]));
4500     t->array_count++;
4501     mutable_array(t)[key].val = val.val;
4502   } else {
4503     if (isfull(&t->t)) {
4504       /* Need to resize the hash part, but we re-use the array part. */
4505       size_t i;
4506       upb_table new_table;
4507       if (!init(&new_table, t->t.ctype, t->t.size_lg2 + 1))
4508         return false;
4509       for (i = begin(&t->t); i < upb_table_size(&t->t); i = next(&t->t, i)) {
4510         const upb_tabent *e = &t->t.entries[i];
4511         uint32_t hash;
4512         upb_value v;
4513 
4514         _upb_value_setval(&v, e->val.val, t->t.ctype);
4515         hash = upb_inthash(e->key);
4516         insert(&new_table, intkey(e->key), e->key, v, hash, &inthash, &inteql);
4517       }
4518 
4519       assert(t->t.count == new_table.count);
4520 
4521       uninit(&t->t);
4522       t->t = new_table;
4523     }
4524     insert(&t->t, intkey(key), key, val, upb_inthash(key), &inthash, &inteql);
4525   }
4526   check(t);
4527   return true;
4528 }
4529 
upb_inttable_lookup(const upb_inttable * t,uintptr_t key,upb_value * v)4530 bool upb_inttable_lookup(const upb_inttable *t, uintptr_t key, upb_value *v) {
4531   const upb_tabval *table_v = inttable_val_const(t, key);
4532   if (!table_v) return false;
4533   if (v) _upb_value_setval(v, table_v->val, t->t.ctype);
4534   return true;
4535 }
4536 
upb_inttable_replace(upb_inttable * t,uintptr_t key,upb_value val)4537 bool upb_inttable_replace(upb_inttable *t, uintptr_t key, upb_value val) {
4538   upb_tabval *table_v = inttable_val(t, key);
4539   if (!table_v) return false;
4540   table_v->val = val.val;
4541   return true;
4542 }
4543 
upb_inttable_remove(upb_inttable * t,uintptr_t key,upb_value * val)4544 bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) {
4545   bool success;
4546   if (key < t->array_size) {
4547     if (upb_arrhas(t->array[key])) {
4548       upb_tabval empty = UPB_TABVALUE_EMPTY_INIT;
4549       t->array_count--;
4550       if (val) {
4551         _upb_value_setval(val, t->array[key].val, t->t.ctype);
4552       }
4553       mutable_array(t)[key] = empty;
4554       success = true;
4555     } else {
4556       success = false;
4557     }
4558   } else {
4559     upb_tabkey removed;
4560     uint32_t hash = upb_inthash(key);
4561     success = rm(&t->t, intkey(key), val, &removed, hash, &inteql);
4562   }
4563   check(t);
4564   return success;
4565 }
4566 
upb_inttable_push(upb_inttable * t,upb_value val)4567 bool upb_inttable_push(upb_inttable *t, upb_value val) {
4568   return upb_inttable_insert(t, upb_inttable_count(t), val);
4569 }
4570 
upb_inttable_pop(upb_inttable * t)4571 upb_value upb_inttable_pop(upb_inttable *t) {
4572   upb_value val;
4573   bool ok = upb_inttable_remove(t, upb_inttable_count(t) - 1, &val);
4574   UPB_ASSERT_VAR(ok, ok);
4575   return val;
4576 }
4577 
upb_inttable_insertptr(upb_inttable * t,const void * key,upb_value val)4578 bool upb_inttable_insertptr(upb_inttable *t, const void *key, upb_value val) {
4579   return upb_inttable_insert(t, (uintptr_t)key, val);
4580 }
4581 
upb_inttable_lookupptr(const upb_inttable * t,const void * key,upb_value * v)4582 bool upb_inttable_lookupptr(const upb_inttable *t, const void *key,
4583                             upb_value *v) {
4584   return upb_inttable_lookup(t, (uintptr_t)key, v);
4585 }
4586 
upb_inttable_removeptr(upb_inttable * t,const void * key,upb_value * val)4587 bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val) {
4588   return upb_inttable_remove(t, (uintptr_t)key, val);
4589 }
4590 
upb_inttable_compact(upb_inttable * t)4591 void upb_inttable_compact(upb_inttable *t) {
4592   /* Create a power-of-two histogram of the table keys. */
4593   int counts[UPB_MAXARRSIZE + 1] = {0};
4594   uintptr_t max_key = 0;
4595   upb_inttable_iter i;
4596   size_t arr_size;
4597   int arr_count;
4598   upb_inttable new_t;
4599 
4600   upb_inttable_begin(&i, t);
4601   for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
4602     uintptr_t key = upb_inttable_iter_key(&i);
4603     if (key > max_key) {
4604       max_key = key;
4605     }
4606     counts[log2ceil(key)]++;
4607   }
4608 
4609   arr_size = 1;
4610   arr_count = upb_inttable_count(t);
4611 
4612   if (upb_inttable_count(t) >= max_key * MIN_DENSITY) {
4613     /* We can put 100% of the entries in the array part. */
4614     arr_size = max_key + 1;
4615   } else {
4616     /* Find the largest power of two that satisfies the MIN_DENSITY
4617      * definition. */
4618     int size_lg2;
4619     for (size_lg2 = ARRAY_SIZE(counts) - 1; size_lg2 > 1; size_lg2--) {
4620       arr_size = 1 << size_lg2;
4621       arr_count -= counts[size_lg2];
4622       if (arr_count >= arr_size * MIN_DENSITY) {
4623         break;
4624       }
4625     }
4626   }
4627 
4628   /* Array part must always be at least 1 entry large to catch lookups of key
4629    * 0.  Key 0 must always be in the array part because "0" in the hash part
4630    * denotes an empty entry. */
4631   arr_size = UPB_MAX(arr_size, 1);
4632 
4633   {
4634     /* Insert all elements into new, perfectly-sized table. */
4635     int hash_count = upb_inttable_count(t) - arr_count;
4636     int hash_size = hash_count ? (hash_count / MAX_LOAD) + 1 : 0;
4637     int hashsize_lg2 = log2ceil(hash_size);
4638 
4639     assert(hash_count >= 0);
4640     upb_inttable_sizedinit(&new_t, t->t.ctype, arr_size, hashsize_lg2);
4641     upb_inttable_begin(&i, t);
4642     for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
4643       uintptr_t k = upb_inttable_iter_key(&i);
4644       upb_inttable_insert(&new_t, k, upb_inttable_iter_value(&i));
4645     }
4646     assert(new_t.array_size == arr_size);
4647     assert(new_t.t.size_lg2 == hashsize_lg2);
4648   }
4649   upb_inttable_uninit(t);
4650   *t = new_t;
4651 }
4652 
4653 /* Iteration. */
4654 
int_tabent(const upb_inttable_iter * i)4655 static const upb_tabent *int_tabent(const upb_inttable_iter *i) {
4656   assert(!i->array_part);
4657   return &i->t->t.entries[i->index];
4658 }
4659 
int_arrent(const upb_inttable_iter * i)4660 static upb_tabval int_arrent(const upb_inttable_iter *i) {
4661   assert(i->array_part);
4662   return i->t->array[i->index];
4663 }
4664 
upb_inttable_begin(upb_inttable_iter * i,const upb_inttable * t)4665 void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t) {
4666   i->t = t;
4667   i->index = -1;
4668   i->array_part = true;
4669   upb_inttable_next(i);
4670 }
4671 
upb_inttable_next(upb_inttable_iter * iter)4672 void upb_inttable_next(upb_inttable_iter *iter) {
4673   const upb_inttable *t = iter->t;
4674   if (iter->array_part) {
4675     while (++iter->index < t->array_size) {
4676       if (upb_arrhas(int_arrent(iter))) {
4677         return;
4678       }
4679     }
4680     iter->array_part = false;
4681     iter->index = begin(&t->t);
4682   } else {
4683     iter->index = next(&t->t, iter->index);
4684   }
4685 }
4686 
upb_inttable_done(const upb_inttable_iter * i)4687 bool upb_inttable_done(const upb_inttable_iter *i) {
4688   if (i->array_part) {
4689     return i->index >= i->t->array_size ||
4690            !upb_arrhas(int_arrent(i));
4691   } else {
4692     return i->index >= upb_table_size(&i->t->t) ||
4693            upb_tabent_isempty(int_tabent(i));
4694   }
4695 }
4696 
upb_inttable_iter_key(const upb_inttable_iter * i)4697 uintptr_t upb_inttable_iter_key(const upb_inttable_iter *i) {
4698   assert(!upb_inttable_done(i));
4699   return i->array_part ? i->index : int_tabent(i)->key;
4700 }
4701 
upb_inttable_iter_value(const upb_inttable_iter * i)4702 upb_value upb_inttable_iter_value(const upb_inttable_iter *i) {
4703   assert(!upb_inttable_done(i));
4704   return _upb_value_val(
4705       i->array_part ? i->t->array[i->index].val : int_tabent(i)->val.val,
4706       i->t->t.ctype);
4707 }
4708 
upb_inttable_iter_setdone(upb_inttable_iter * i)4709 void upb_inttable_iter_setdone(upb_inttable_iter *i) {
4710   i->index = SIZE_MAX;
4711   i->array_part = false;
4712 }
4713 
upb_inttable_iter_isequal(const upb_inttable_iter * i1,const upb_inttable_iter * i2)4714 bool upb_inttable_iter_isequal(const upb_inttable_iter *i1,
4715                                           const upb_inttable_iter *i2) {
4716   if (upb_inttable_done(i1) && upb_inttable_done(i2))
4717     return true;
4718   return i1->t == i2->t && i1->index == i2->index &&
4719          i1->array_part == i2->array_part;
4720 }
4721 
4722 #ifdef UPB_UNALIGNED_READS_OK
4723 /* -----------------------------------------------------------------------------
4724  * MurmurHash2, by Austin Appleby (released as public domain).
4725  * Reformatted and C99-ified by Joshua Haberman.
4726  * Note - This code makes a few assumptions about how your machine behaves -
4727  *   1. We can read a 4-byte value from any address without crashing
4728  *   2. sizeof(int) == 4 (in upb this limitation is removed by using uint32_t
4729  * And it has a few limitations -
4730  *   1. It will not work incrementally.
4731  *   2. It will not produce the same results on little-endian and big-endian
4732  *      machines. */
MurmurHash2(const void * key,size_t len,uint32_t seed)4733 uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) {
4734   /* 'm' and 'r' are mixing constants generated offline.
4735    * They're not really 'magic', they just happen to work well. */
4736   const uint32_t m = 0x5bd1e995;
4737   const int32_t r = 24;
4738 
4739   /* Initialize the hash to a 'random' value */
4740   uint32_t h = seed ^ len;
4741 
4742   /* Mix 4 bytes at a time into the hash */
4743   const uint8_t * data = (const uint8_t *)key;
4744   while(len >= 4) {
4745     uint32_t k = *(uint32_t *)data;
4746 
4747     k *= m;
4748     k ^= k >> r;
4749     k *= m;
4750 
4751     h *= m;
4752     h ^= k;
4753 
4754     data += 4;
4755     len -= 4;
4756   }
4757 
4758   /* Handle the last few bytes of the input array */
4759   switch(len) {
4760     case 3: h ^= data[2] << 16;
4761     case 2: h ^= data[1] << 8;
4762     case 1: h ^= data[0]; h *= m;
4763   };
4764 
4765   /* Do a few final mixes of the hash to ensure the last few
4766    * bytes are well-incorporated. */
4767   h ^= h >> 13;
4768   h *= m;
4769   h ^= h >> 15;
4770 
4771   return h;
4772 }
4773 
4774 #else /* !UPB_UNALIGNED_READS_OK */
4775 
4776 /* -----------------------------------------------------------------------------
4777  * MurmurHashAligned2, by Austin Appleby
4778  * Same algorithm as MurmurHash2, but only does aligned reads - should be safer
4779  * on certain platforms.
4780  * Performance will be lower than MurmurHash2 */
4781 
4782 #define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
4783 
MurmurHash2(const void * key,size_t len,uint32_t seed)4784 uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) {
4785   const uint32_t m = 0x5bd1e995;
4786   const int32_t r = 24;
4787   const uint8_t * data = (const uint8_t *)key;
4788   uint32_t h = seed ^ len;
4789   uint8_t align = (uintptr_t)data & 3;
4790 
4791   if(align && (len >= 4)) {
4792     /* Pre-load the temp registers */
4793     uint32_t t = 0, d = 0;
4794     int32_t sl;
4795     int32_t sr;
4796 
4797     switch(align) {
4798       case 1: t |= data[2] << 16;
4799       case 2: t |= data[1] << 8;
4800       case 3: t |= data[0];
4801     }
4802 
4803     t <<= (8 * align);
4804 
4805     data += 4-align;
4806     len -= 4-align;
4807 
4808     sl = 8 * (4-align);
4809     sr = 8 * align;
4810 
4811     /* Mix */
4812 
4813     while(len >= 4) {
4814       uint32_t k;
4815 
4816       d = *(uint32_t *)data;
4817       t = (t >> sr) | (d << sl);
4818 
4819       k = t;
4820 
4821       MIX(h,k,m);
4822 
4823       t = d;
4824 
4825       data += 4;
4826       len -= 4;
4827     }
4828 
4829     /* Handle leftover data in temp registers */
4830 
4831     d = 0;
4832 
4833     if(len >= align) {
4834       uint32_t k;
4835 
4836       switch(align) {
4837         case 3: d |= data[2] << 16;
4838         case 2: d |= data[1] << 8;
4839         case 1: d |= data[0];
4840       }
4841 
4842       k = (t >> sr) | (d << sl);
4843       MIX(h,k,m);
4844 
4845       data += align;
4846       len -= align;
4847 
4848       /* ----------
4849        * Handle tail bytes */
4850 
4851       switch(len) {
4852         case 3: h ^= data[2] << 16;
4853         case 2: h ^= data[1] << 8;
4854         case 1: h ^= data[0]; h *= m;
4855       };
4856     } else {
4857       switch(len) {
4858         case 3: d |= data[2] << 16;
4859         case 2: d |= data[1] << 8;
4860         case 1: d |= data[0];
4861         case 0: h ^= (t >> sr) | (d << sl); h *= m;
4862       }
4863     }
4864 
4865     h ^= h >> 13;
4866     h *= m;
4867     h ^= h >> 15;
4868 
4869     return h;
4870   } else {
4871     while(len >= 4) {
4872       uint32_t k = *(uint32_t *)data;
4873 
4874       MIX(h,k,m);
4875 
4876       data += 4;
4877       len -= 4;
4878     }
4879 
4880     /* ----------
4881      * Handle tail bytes */
4882 
4883     switch(len) {
4884       case 3: h ^= data[2] << 16;
4885       case 2: h ^= data[1] << 8;
4886       case 1: h ^= data[0]; h *= m;
4887     };
4888 
4889     h ^= h >> 13;
4890     h *= m;
4891     h ^= h >> 15;
4892 
4893     return h;
4894   }
4895 }
4896 #undef MIX
4897 
4898 #endif /* UPB_UNALIGNED_READS_OK */
4899 
4900 #include <errno.h>
4901 #include <stdarg.h>
4902 #include <stddef.h>
4903 #include <stdint.h>
4904 #include <stdio.h>
4905 #include <stdlib.h>
4906 #include <string.h>
4907 
upb_dumptostderr(void * closure,const upb_status * status)4908 bool upb_dumptostderr(void *closure, const upb_status* status) {
4909   UPB_UNUSED(closure);
4910   fprintf(stderr, "%s\n", upb_status_errmsg(status));
4911   return false;
4912 }
4913 
4914 /* Guarantee null-termination and provide ellipsis truncation.
4915  * It may be tempting to "optimize" this by initializing these final
4916  * four bytes up-front and then being careful never to overwrite them,
4917  * this is safer and simpler. */
nullz(upb_status * status)4918 static void nullz(upb_status *status) {
4919   const char *ellipsis = "...";
4920   size_t len = strlen(ellipsis);
4921   assert(sizeof(status->msg) > len);
4922   memcpy(status->msg + sizeof(status->msg) - len, ellipsis, len);
4923 }
4924 
upb_status_clear(upb_status * status)4925 void upb_status_clear(upb_status *status) {
4926   if (!status) return;
4927   status->ok_ = true;
4928   status->code_ = 0;
4929   status->msg[0] = '\0';
4930 }
4931 
upb_ok(const upb_status * status)4932 bool upb_ok(const upb_status *status) { return status->ok_; }
4933 
upb_status_errspace(const upb_status * status)4934 upb_errorspace *upb_status_errspace(const upb_status *status) {
4935   return status->error_space_;
4936 }
4937 
upb_status_errcode(const upb_status * status)4938 int upb_status_errcode(const upb_status *status) { return status->code_; }
4939 
upb_status_errmsg(const upb_status * status)4940 const char *upb_status_errmsg(const upb_status *status) { return status->msg; }
4941 
upb_status_seterrmsg(upb_status * status,const char * msg)4942 void upb_status_seterrmsg(upb_status *status, const char *msg) {
4943   if (!status) return;
4944   status->ok_ = false;
4945   strncpy(status->msg, msg, sizeof(status->msg));
4946   nullz(status);
4947 }
4948 
upb_status_seterrf(upb_status * status,const char * fmt,...)4949 void upb_status_seterrf(upb_status *status, const char *fmt, ...) {
4950   va_list args;
4951   va_start(args, fmt);
4952   upb_status_vseterrf(status, fmt, args);
4953   va_end(args);
4954 }
4955 
upb_status_vseterrf(upb_status * status,const char * fmt,va_list args)4956 void upb_status_vseterrf(upb_status *status, const char *fmt, va_list args) {
4957   if (!status) return;
4958   status->ok_ = false;
4959   _upb_vsnprintf(status->msg, sizeof(status->msg), fmt, args);
4960   nullz(status);
4961 }
4962 
upb_status_seterrcode(upb_status * status,upb_errorspace * space,int code)4963 void upb_status_seterrcode(upb_status *status, upb_errorspace *space,
4964                            int code) {
4965   if (!status) return;
4966   status->ok_ = false;
4967   status->error_space_ = space;
4968   status->code_ = code;
4969   space->set_message(status, code);
4970 }
4971 
upb_status_copy(upb_status * to,const upb_status * from)4972 void upb_status_copy(upb_status *to, const upb_status *from) {
4973   if (!to) return;
4974   *to = *from;
4975 }
4976 /* This file was generated by upbc (the upb compiler).
4977  * Do not edit -- your changes will be discarded when the file is
4978  * regenerated. */
4979 
4980 
4981 static const upb_msgdef msgs[20];
4982 static const upb_fielddef fields[81];
4983 static const upb_enumdef enums[4];
4984 static const upb_tabent strentries[236];
4985 static const upb_tabent intentries[14];
4986 static const upb_tabval arrays[232];
4987 
4988 #ifdef UPB_DEBUG_REFS
4989 static upb_inttable reftables[212];
4990 #endif
4991 
4992 static const upb_msgdef msgs[20] = {
4993   UPB_MSGDEF_INIT("google.protobuf.DescriptorProto", 27, 6, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[0], 8, 7), UPB_STRTABLE_INIT(7, 15, UPB_CTYPE_PTR, 4, &strentries[0]),&reftables[0], &reftables[1]),
4994   UPB_MSGDEF_INIT("google.protobuf.DescriptorProto.ExtensionRange", 4, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[8], 3, 2), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[16]),&reftables[2], &reftables[3]),
4995   UPB_MSGDEF_INIT("google.protobuf.EnumDescriptorProto", 11, 2, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[11], 4, 3), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[20]),&reftables[4], &reftables[5]),
4996   UPB_MSGDEF_INIT("google.protobuf.EnumOptions", 7, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[0], &arrays[15], 8, 1), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[24]),&reftables[6], &reftables[7]),
4997   UPB_MSGDEF_INIT("google.protobuf.EnumValueDescriptorProto", 8, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[23], 4, 3), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[28]),&reftables[8], &reftables[9]),
4998   UPB_MSGDEF_INIT("google.protobuf.EnumValueOptions", 6, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[2], &arrays[27], 4, 0), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[32]),&reftables[10], &reftables[11]),
4999   UPB_MSGDEF_INIT("google.protobuf.FieldDescriptorProto", 19, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[31], 9, 8), UPB_STRTABLE_INIT(8, 15, UPB_CTYPE_PTR, 4, &strentries[36]),&reftables[12], &reftables[13]),
5000   UPB_MSGDEF_INIT("google.protobuf.FieldOptions", 14, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[4], &arrays[40], 32, 6), UPB_STRTABLE_INIT(7, 15, UPB_CTYPE_PTR, 4, &strentries[52]),&reftables[14], &reftables[15]),
5001   UPB_MSGDEF_INIT("google.protobuf.FileDescriptorProto", 39, 6, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[72], 12, 11), UPB_STRTABLE_INIT(11, 15, UPB_CTYPE_PTR, 4, &strentries[68]),&reftables[16], &reftables[17]),
5002   UPB_MSGDEF_INIT("google.protobuf.FileDescriptorSet", 6, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[84], 2, 1), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[84]),&reftables[18], &reftables[19]),
5003   UPB_MSGDEF_INIT("google.protobuf.FileOptions", 21, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[6], &arrays[86], 64, 9), UPB_STRTABLE_INIT(10, 15, UPB_CTYPE_PTR, 4, &strentries[88]),&reftables[20], &reftables[21]),
5004   UPB_MSGDEF_INIT("google.protobuf.MessageOptions", 8, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[8], &arrays[150], 16, 2), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[104]),&reftables[22], &reftables[23]),
5005   UPB_MSGDEF_INIT("google.protobuf.MethodDescriptorProto", 13, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[166], 5, 4), UPB_STRTABLE_INIT(4, 7, UPB_CTYPE_PTR, 3, &strentries[108]),&reftables[24], &reftables[25]),
5006   UPB_MSGDEF_INIT("google.protobuf.MethodOptions", 6, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[10], &arrays[171], 4, 0), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[116]),&reftables[26], &reftables[27]),
5007   UPB_MSGDEF_INIT("google.protobuf.ServiceDescriptorProto", 11, 2, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[175], 4, 3), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[120]),&reftables[28], &reftables[29]),
5008   UPB_MSGDEF_INIT("google.protobuf.ServiceOptions", 6, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[12], &arrays[179], 4, 0), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[124]),&reftables[30], &reftables[31]),
5009   UPB_MSGDEF_INIT("google.protobuf.SourceCodeInfo", 6, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[183], 2, 1), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[128]),&reftables[32], &reftables[33]),
5010   UPB_MSGDEF_INIT("google.protobuf.SourceCodeInfo.Location", 14, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[185], 5, 4), UPB_STRTABLE_INIT(4, 7, UPB_CTYPE_PTR, 3, &strentries[132]),&reftables[34], &reftables[35]),
5011   UPB_MSGDEF_INIT("google.protobuf.UninterpretedOption", 18, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[190], 9, 7), UPB_STRTABLE_INIT(7, 15, UPB_CTYPE_PTR, 4, &strentries[140]),&reftables[36], &reftables[37]),
5012   UPB_MSGDEF_INIT("google.protobuf.UninterpretedOption.NamePart", 6, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[199], 3, 2), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[156]),&reftables[38], &reftables[39]),
5013 };
5014 
5015 static const upb_fielddef fields[81] = {
5016   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "aggregate_value", 8, &msgs[18], NULL, 15, 6, {0},&reftables[40], &reftables[41]),
5017   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "allow_alias", 2, &msgs[3], NULL, 6, 1, {0},&reftables[42], &reftables[43]),
5018   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "cc_generic_services", 16, &msgs[10], NULL, 17, 6, {0},&reftables[44], &reftables[45]),
5019   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "ctype", 1, &msgs[7], (const upb_def*)(&enums[2]), 6, 1, {0},&reftables[46], &reftables[47]),
5020   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "default_value", 7, &msgs[6], NULL, 16, 7, {0},&reftables[48], &reftables[49]),
5021   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_STRING, 0, false, false, false, false, "dependency", 3, &msgs[8], NULL, 30, 8, {0},&reftables[50], &reftables[51]),
5022   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 3, &msgs[7], NULL, 8, 3, {0},&reftables[52], &reftables[53]),
5023   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_DOUBLE, 0, false, false, false, false, "double_value", 6, &msgs[18], NULL, 11, 4, {0},&reftables[54], &reftables[55]),
5024   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "end", 2, &msgs[1], NULL, 3, 1, {0},&reftables[56], &reftables[57]),
5025   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "enum_type", 4, &msgs[0], (const upb_def*)(&msgs[2]), 16, 2, {0},&reftables[58], &reftables[59]),
5026   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "enum_type", 5, &msgs[8], (const upb_def*)(&msgs[2]), 13, 1, {0},&reftables[60], &reftables[61]),
5027   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "experimental_map_key", 9, &msgs[7], NULL, 10, 5, {0},&reftables[62], &reftables[63]),
5028   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "extendee", 2, &msgs[6], NULL, 7, 2, {0},&reftables[64], &reftables[65]),
5029   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension", 7, &msgs[8], (const upb_def*)(&msgs[6]), 19, 3, {0},&reftables[66], &reftables[67]),
5030   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension", 6, &msgs[0], (const upb_def*)(&msgs[6]), 22, 4, {0},&reftables[68], &reftables[69]),
5031   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension_range", 5, &msgs[0], (const upb_def*)(&msgs[1]), 19, 3, {0},&reftables[70], &reftables[71]),
5032   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "field", 2, &msgs[0], (const upb_def*)(&msgs[6]), 10, 0, {0},&reftables[72], &reftables[73]),
5033   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "file", 1, &msgs[9], (const upb_def*)(&msgs[8]), 5, 0, {0},&reftables[74], &reftables[75]),
5034   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "go_package", 11, &msgs[10], NULL, 14, 5, {0},&reftables[76], &reftables[77]),
5035   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "identifier_value", 3, &msgs[18], NULL, 6, 1, {0},&reftables[78], &reftables[79]),
5036   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "input_type", 2, &msgs[12], NULL, 7, 2, {0},&reftables[80], &reftables[81]),
5037   UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_BOOL, 0, false, false, false, false, "is_extension", 2, &msgs[19], NULL, 5, 1, {0},&reftables[82], &reftables[83]),
5038   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_generate_equals_and_hash", 20, &msgs[10], NULL, 20, 9, {0},&reftables[84], &reftables[85]),
5039   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_generic_services", 17, &msgs[10], NULL, 18, 7, {0},&reftables[86], &reftables[87]),
5040   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_multiple_files", 10, &msgs[10], NULL, 13, 4, {0},&reftables[88], &reftables[89]),
5041   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "java_outer_classname", 8, &msgs[10], NULL, 9, 2, {0},&reftables[90], &reftables[91]),
5042   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "java_package", 1, &msgs[10], NULL, 6, 1, {0},&reftables[92], &reftables[93]),
5043   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "label", 4, &msgs[6], (const upb_def*)(&enums[0]), 11, 4, {0},&reftables[94], &reftables[95]),
5044   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "lazy", 5, &msgs[7], NULL, 9, 4, {0},&reftables[96], &reftables[97]),
5045   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "leading_comments", 3, &msgs[17], NULL, 8, 2, {0},&reftables[98], &reftables[99]),
5046   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "location", 1, &msgs[16], (const upb_def*)(&msgs[17]), 5, 0, {0},&reftables[100], &reftables[101]),
5047   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "message_set_wire_format", 1, &msgs[11], NULL, 6, 1, {0},&reftables[102], &reftables[103]),
5048   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "message_type", 4, &msgs[8], (const upb_def*)(&msgs[0]), 10, 0, {0},&reftables[104], &reftables[105]),
5049   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "method", 2, &msgs[14], (const upb_def*)(&msgs[12]), 6, 0, {0},&reftables[106], &reftables[107]),
5050   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[8], NULL, 22, 6, {0},&reftables[108], &reftables[109]),
5051   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[14], NULL, 8, 2, {0},&reftables[110], &reftables[111]),
5052   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "name", 2, &msgs[18], (const upb_def*)(&msgs[19]), 5, 0, {0},&reftables[112], &reftables[113]),
5053   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[4], NULL, 4, 1, {0},&reftables[114], &reftables[115]),
5054   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[0], NULL, 24, 6, {0},&reftables[116], &reftables[117]),
5055   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[12], NULL, 4, 1, {0},&reftables[118], &reftables[119]),
5056   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[2], NULL, 8, 2, {0},&reftables[120], &reftables[121]),
5057   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[6], NULL, 4, 1, {0},&reftables[122], &reftables[123]),
5058   UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_STRING, 0, false, false, false, false, "name_part", 1, &msgs[19], NULL, 2, 0, {0},&reftables[124], &reftables[125]),
5059   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT64, UPB_INTFMT_VARIABLE, false, false, false, false, "negative_int_value", 5, &msgs[18], NULL, 10, 3, {0},&reftables[126], &reftables[127]),
5060   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "nested_type", 3, &msgs[0], (const upb_def*)(&msgs[0]), 13, 1, {0},&reftables[128], &reftables[129]),
5061   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "no_standard_descriptor_accessor", 2, &msgs[11], NULL, 7, 2, {0},&reftables[130], &reftables[131]),
5062   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "number", 3, &msgs[6], NULL, 10, 3, {0},&reftables[132], &reftables[133]),
5063   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "number", 2, &msgs[4], NULL, 7, 2, {0},&reftables[134], &reftables[135]),
5064   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "optimize_for", 9, &msgs[10], (const upb_def*)(&enums[3]), 12, 3, {0},&reftables[136], &reftables[137]),
5065   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 7, &msgs[0], (const upb_def*)(&msgs[11]), 23, 5, {0},&reftables[138], &reftables[139]),
5066   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[2], (const upb_def*)(&msgs[3]), 7, 1, {0},&reftables[140], &reftables[141]),
5067   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 8, &msgs[6], (const upb_def*)(&msgs[7]), 3, 0, {0},&reftables[142], &reftables[143]),
5068   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[4], (const upb_def*)(&msgs[5]), 3, 0, {0},&reftables[144], &reftables[145]),
5069   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 8, &msgs[8], (const upb_def*)(&msgs[10]), 20, 4, {0},&reftables[146], &reftables[147]),
5070   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[14], (const upb_def*)(&msgs[15]), 7, 1, {0},&reftables[148], &reftables[149]),
5071   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 4, &msgs[12], (const upb_def*)(&msgs[13]), 3, 0, {0},&reftables[150], &reftables[151]),
5072   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "output_type", 3, &msgs[12], NULL, 10, 3, {0},&reftables[152], &reftables[153]),
5073   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "package", 2, &msgs[8], NULL, 25, 7, {0},&reftables[154], &reftables[155]),
5074   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "packed", 2, &msgs[7], NULL, 7, 2, {0},&reftables[156], &reftables[157]),
5075   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, true, "path", 1, &msgs[17], NULL, 4, 0, {0},&reftables[158], &reftables[159]),
5076   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_UINT64, UPB_INTFMT_VARIABLE, false, false, false, false, "positive_int_value", 4, &msgs[18], NULL, 9, 2, {0},&reftables[160], &reftables[161]),
5077   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "public_dependency", 10, &msgs[8], NULL, 35, 9, {0},&reftables[162], &reftables[163]),
5078   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "py_generic_services", 18, &msgs[10], NULL, 19, 8, {0},&reftables[164], &reftables[165]),
5079   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "service", 6, &msgs[8], (const upb_def*)(&msgs[14]), 16, 2, {0},&reftables[166], &reftables[167]),
5080   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "source_code_info", 9, &msgs[8], (const upb_def*)(&msgs[16]), 21, 5, {0},&reftables[168], &reftables[169]),
5081   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, true, "span", 2, &msgs[17], NULL, 7, 1, {0},&reftables[170], &reftables[171]),
5082   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "start", 1, &msgs[1], NULL, 2, 0, {0},&reftables[172], &reftables[173]),
5083   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BYTES, 0, false, false, false, false, "string_value", 7, &msgs[18], NULL, 12, 5, {0},&reftables[174], &reftables[175]),
5084   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "trailing_comments", 4, &msgs[17], NULL, 11, 3, {0},&reftables[176], &reftables[177]),
5085   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "type", 5, &msgs[6], (const upb_def*)(&enums[1]), 12, 5, {0},&reftables[178], &reftables[179]),
5086   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "type_name", 6, &msgs[6], NULL, 13, 6, {0},&reftables[180], &reftables[181]),
5087   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[5], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[182], &reftables[183]),
5088   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[15], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[184], &reftables[185]),
5089   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[3], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[186], &reftables[187]),
5090   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[13], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[188], &reftables[189]),
5091   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[10], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[190], &reftables[191]),
5092   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[11], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[192], &reftables[193]),
5093   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[7], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[194], &reftables[195]),
5094   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "value", 2, &msgs[2], (const upb_def*)(&msgs[4]), 6, 0, {0},&reftables[196], &reftables[197]),
5095   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "weak", 10, &msgs[7], NULL, 13, 6, {0},&reftables[198], &reftables[199]),
5096   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "weak_dependency", 11, &msgs[8], NULL, 38, 10, {0},&reftables[200], &reftables[201]),
5097 };
5098 
5099 static const upb_enumdef enums[4] = {
5100   UPB_ENUMDEF_INIT("google.protobuf.FieldDescriptorProto.Label", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[160]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[202], 4, 3), 0, &reftables[202], &reftables[203]),
5101   UPB_ENUMDEF_INIT("google.protobuf.FieldDescriptorProto.Type", UPB_STRTABLE_INIT(18, 31, UPB_CTYPE_INT32, 5, &strentries[164]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[206], 19, 18), 0, &reftables[204], &reftables[205]),
5102   UPB_ENUMDEF_INIT("google.protobuf.FieldOptions.CType", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[196]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[225], 3, 3), 0, &reftables[206], &reftables[207]),
5103   UPB_ENUMDEF_INIT("google.protobuf.FileOptions.OptimizeMode", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[200]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[228], 4, 3), 0, &reftables[208], &reftables[209]),
5104 };
5105 
5106 static const upb_tabent strentries[236] = {
5107   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "extension"), UPB_TABVALUE_PTR_INIT(&fields[14]), NULL},
5108   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5109   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5110   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[38]), NULL},
5111   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5112   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5113   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5114   {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "field"), UPB_TABVALUE_PTR_INIT(&fields[16]), NULL},
5115   {UPB_TABKEY_STR("\017", "\000", "\000", "\000", "extension_range"), UPB_TABVALUE_PTR_INIT(&fields[15]), NULL},
5116   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5117   {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "nested_type"), UPB_TABVALUE_PTR_INIT(&fields[44]), NULL},
5118   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5119   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5120   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5121   {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[49]), NULL},
5122   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "enum_type"), UPB_TABVALUE_PTR_INIT(&fields[9]), &strentries[14]},
5123   {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "start"), UPB_TABVALUE_PTR_INIT(&fields[66]), NULL},
5124   {UPB_TABKEY_STR("\003", "\000", "\000", "\000", "end"), UPB_TABVALUE_PTR_INIT(&fields[8]), NULL},
5125   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5126   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5127   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5128   {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "value"), UPB_TABVALUE_PTR_INIT(&fields[78]), NULL},
5129   {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[50]), NULL},
5130   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[40]), &strentries[22]},
5131   {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[73]), NULL},
5132   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5133   {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "allow_alias"), UPB_TABVALUE_PTR_INIT(&fields[1]), NULL},
5134   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5135   {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "number"), UPB_TABVALUE_PTR_INIT(&fields[47]), NULL},
5136   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5137   {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[52]), NULL},
5138   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[37]), &strentries[30]},
5139   {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[71]), NULL},
5140   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5141   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5142   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5143   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5144   {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "label"), UPB_TABVALUE_PTR_INIT(&fields[27]), NULL},
5145   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5146   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[41]), NULL},
5147   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5148   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5149   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5150   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5151   {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "number"), UPB_TABVALUE_PTR_INIT(&fields[46]), &strentries[49]},
5152   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5153   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5154   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "type_name"), UPB_TABVALUE_PTR_INIT(&fields[70]), NULL},
5155   {UPB_TABKEY_STR("\010", "\000", "\000", "\000", "extendee"), UPB_TABVALUE_PTR_INIT(&fields[12]), NULL},
5156   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "type"), UPB_TABVALUE_PTR_INIT(&fields[69]), &strentries[48]},
5157   {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "default_value"), UPB_TABVALUE_PTR_INIT(&fields[4]), NULL},
5158   {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[51]), NULL},
5159   {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "experimental_map_key"), UPB_TABVALUE_PTR_INIT(&fields[11]), &strentries[67]},
5160   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5161   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "weak"), UPB_TABVALUE_PTR_INIT(&fields[79]), NULL},
5162   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5163   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5164   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5165   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5166   {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "packed"), UPB_TABVALUE_PTR_INIT(&fields[58]), NULL},
5167   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "lazy"), UPB_TABVALUE_PTR_INIT(&fields[28]), NULL},
5168   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5169   {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "ctype"), UPB_TABVALUE_PTR_INIT(&fields[3]), NULL},
5170   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5171   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5172   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[6]), NULL},
5173   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5174   {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[77]), NULL},
5175   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "extension"), UPB_TABVALUE_PTR_INIT(&fields[13]), NULL},
5176   {UPB_TABKEY_STR("\017", "\000", "\000", "\000", "weak_dependency"), UPB_TABVALUE_PTR_INIT(&fields[80]), NULL},
5177   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5178   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[34]), NULL},
5179   {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "service"), UPB_TABVALUE_PTR_INIT(&fields[63]), NULL},
5180   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5181   {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "source_code_info"), UPB_TABVALUE_PTR_INIT(&fields[64]), NULL},
5182   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5183   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5184   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5185   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "dependency"), UPB_TABVALUE_PTR_INIT(&fields[5]), NULL},
5186   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "message_type"), UPB_TABVALUE_PTR_INIT(&fields[32]), NULL},
5187   {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "package"), UPB_TABVALUE_PTR_INIT(&fields[57]), NULL},
5188   {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[53]), &strentries[82]},
5189   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "enum_type"), UPB_TABVALUE_PTR_INIT(&fields[10]), NULL},
5190   {UPB_TABKEY_STR("\021", "\000", "\000", "\000", "public_dependency"), UPB_TABVALUE_PTR_INIT(&fields[61]), &strentries[81]},
5191   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5192   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "file"), UPB_TABVALUE_PTR_INIT(&fields[17]), NULL},
5193   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5194   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5195   {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[75]), NULL},
5196   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5197   {UPB_TABKEY_STR("\023", "\000", "\000", "\000", "cc_generic_services"), UPB_TABVALUE_PTR_INIT(&fields[2]), NULL},
5198   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5199   {UPB_TABKEY_STR("\023", "\000", "\000", "\000", "java_multiple_files"), UPB_TABVALUE_PTR_INIT(&fields[24]), NULL},
5200   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5201   {UPB_TABKEY_STR("\025", "\000", "\000", "\000", "java_generic_services"), UPB_TABVALUE_PTR_INIT(&fields[23]), &strentries[102]},
5202   {UPB_TABKEY_STR("\035", "\000", "\000", "\000", "java_generate_equals_and_hash"), UPB_TABVALUE_PTR_INIT(&fields[22]), NULL},
5203   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5204   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5205   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5206   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "go_package"), UPB_TABVALUE_PTR_INIT(&fields[18]), NULL},
5207   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "java_package"), UPB_TABVALUE_PTR_INIT(&fields[26]), NULL},
5208   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "optimize_for"), UPB_TABVALUE_PTR_INIT(&fields[48]), NULL},
5209   {UPB_TABKEY_STR("\023", "\000", "\000", "\000", "py_generic_services"), UPB_TABVALUE_PTR_INIT(&fields[62]), NULL},
5210   {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "java_outer_classname"), UPB_TABVALUE_PTR_INIT(&fields[25]), NULL},
5211   {UPB_TABKEY_STR("\027", "\000", "\000", "\000", "message_set_wire_format"), UPB_TABVALUE_PTR_INIT(&fields[31]), &strentries[106]},
5212   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5213   {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[76]), NULL},
5214   {UPB_TABKEY_STR("\037", "\000", "\000", "\000", "no_standard_descriptor_accessor"), UPB_TABVALUE_PTR_INIT(&fields[45]), NULL},
5215   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5216   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5217   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5218   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[39]), NULL},
5219   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "input_type"), UPB_TABVALUE_PTR_INIT(&fields[20]), NULL},
5220   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5221   {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "output_type"), UPB_TABVALUE_PTR_INIT(&fields[56]), NULL},
5222   {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[55]), NULL},
5223   {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[74]), NULL},
5224   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5225   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5226   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5227   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5228   {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[54]), &strentries[122]},
5229   {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "method"), UPB_TABVALUE_PTR_INIT(&fields[33]), NULL},
5230   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[35]), &strentries[121]},
5231   {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[72]), NULL},
5232   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5233   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5234   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5235   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5236   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5237   {UPB_TABKEY_STR("\010", "\000", "\000", "\000", "location"), UPB_TABVALUE_PTR_INIT(&fields[30]), NULL},
5238   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5239   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5240   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5241   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5242   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "span"), UPB_TABVALUE_PTR_INIT(&fields[65]), &strentries[139]},
5243   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5244   {UPB_TABKEY_STR("\021", "\000", "\000", "\000", "trailing_comments"), UPB_TABVALUE_PTR_INIT(&fields[68]), NULL},
5245   {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "leading_comments"), UPB_TABVALUE_PTR_INIT(&fields[29]), &strentries[137]},
5246   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "path"), UPB_TABVALUE_PTR_INIT(&fields[59]), NULL},
5247   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "double_value"), UPB_TABVALUE_PTR_INIT(&fields[7]), NULL},
5248   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5249   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5250   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[36]), NULL},
5251   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5252   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5253   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5254   {UPB_TABKEY_STR("\022", "\000", "\000", "\000", "negative_int_value"), UPB_TABVALUE_PTR_INIT(&fields[43]), NULL},
5255   {UPB_TABKEY_STR("\017", "\000", "\000", "\000", "aggregate_value"), UPB_TABVALUE_PTR_INIT(&fields[0]), NULL},
5256   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5257   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5258   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5259   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5260   {UPB_TABKEY_STR("\022", "\000", "\000", "\000", "positive_int_value"), UPB_TABVALUE_PTR_INIT(&fields[60]), NULL},
5261   {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "identifier_value"), UPB_TABVALUE_PTR_INIT(&fields[19]), NULL},
5262   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "string_value"), UPB_TABVALUE_PTR_INIT(&fields[67]), &strentries[154]},
5263   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5264   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5265   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "is_extension"), UPB_TABVALUE_PTR_INIT(&fields[21]), NULL},
5266   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "name_part"), UPB_TABVALUE_PTR_INIT(&fields[42]), NULL},
5267   {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "LABEL_REQUIRED"), UPB_TABVALUE_INT_INIT(2), &strentries[162]},
5268   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5269   {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "LABEL_REPEATED"), UPB_TABVALUE_INT_INIT(3), NULL},
5270   {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "LABEL_OPTIONAL"), UPB_TABVALUE_INT_INIT(1), NULL},
5271   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "TYPE_FIXED64"), UPB_TABVALUE_INT_INIT(6), NULL},
5272   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5273   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5274   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5275   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5276   {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_STRING"), UPB_TABVALUE_INT_INIT(9), NULL},
5277   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_FLOAT"), UPB_TABVALUE_INT_INIT(2), &strentries[193]},
5278   {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_DOUBLE"), UPB_TABVALUE_INT_INIT(1), NULL},
5279   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5280   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_INT32"), UPB_TABVALUE_INT_INIT(5), NULL},
5281   {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "TYPE_SFIXED32"), UPB_TABVALUE_INT_INIT(15), NULL},
5282   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "TYPE_FIXED32"), UPB_TABVALUE_INT_INIT(7), NULL},
5283   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5284   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "TYPE_MESSAGE"), UPB_TABVALUE_INT_INIT(11), &strentries[194]},
5285   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5286   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5287   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_INT64"), UPB_TABVALUE_INT_INIT(3), &strentries[191]},
5288   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5289   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5290   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5291   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5292   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "TYPE_ENUM"), UPB_TABVALUE_INT_INIT(14), NULL},
5293   {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_UINT32"), UPB_TABVALUE_INT_INIT(13), NULL},
5294   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5295   {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_UINT64"), UPB_TABVALUE_INT_INIT(4), &strentries[190]},
5296   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5297   {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "TYPE_SFIXED64"), UPB_TABVALUE_INT_INIT(16), NULL},
5298   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_BYTES"), UPB_TABVALUE_INT_INIT(12), NULL},
5299   {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_SINT64"), UPB_TABVALUE_INT_INIT(18), NULL},
5300   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "TYPE_BOOL"), UPB_TABVALUE_INT_INIT(8), NULL},
5301   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_GROUP"), UPB_TABVALUE_INT_INIT(10), NULL},
5302   {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_SINT32"), UPB_TABVALUE_INT_INIT(17), NULL},
5303   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5304   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "CORD"), UPB_TABVALUE_INT_INIT(1), NULL},
5305   {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "STRING"), UPB_TABVALUE_INT_INIT(0), &strentries[197]},
5306   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "STRING_PIECE"), UPB_TABVALUE_INT_INIT(2), NULL},
5307   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "CODE_SIZE"), UPB_TABVALUE_INT_INIT(2), NULL},
5308   {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "SPEED"), UPB_TABVALUE_INT_INIT(1), &strentries[203]},
5309   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5310   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "LITE_RUNTIME"), UPB_TABVALUE_INT_INIT(3), NULL},
5311   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5312   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5313   {UPB_TABKEY_STR("\047", "\000", "\000", "\000", "google.protobuf.SourceCodeInfo.Location"), UPB_TABVALUE_PTR_INIT(&msgs[17]), NULL},
5314   {UPB_TABKEY_STR("\043", "\000", "\000", "\000", "google.protobuf.UninterpretedOption"), UPB_TABVALUE_PTR_INIT(&msgs[18]), NULL},
5315   {UPB_TABKEY_STR("\043", "\000", "\000", "\000", "google.protobuf.FileDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[8]), NULL},
5316   {UPB_TABKEY_STR("\045", "\000", "\000", "\000", "google.protobuf.MethodDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[12]), NULL},
5317   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5318   {UPB_TABKEY_STR("\040", "\000", "\000", "\000", "google.protobuf.EnumValueOptions"), UPB_TABVALUE_PTR_INIT(&msgs[5]), NULL},
5319   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5320   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5321   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5322   {UPB_TABKEY_STR("\037", "\000", "\000", "\000", "google.protobuf.DescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[0]), &strentries[228]},
5323   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5324   {UPB_TABKEY_STR("\036", "\000", "\000", "\000", "google.protobuf.SourceCodeInfo"), UPB_TABVALUE_PTR_INIT(&msgs[16]), NULL},
5325   {UPB_TABKEY_STR("\051", "\000", "\000", "\000", "google.protobuf.FieldDescriptorProto.Type"), UPB_TABVALUE_PTR_INIT(&enums[1]), NULL},
5326   {UPB_TABKEY_STR("\056", "\000", "\000", "\000", "google.protobuf.DescriptorProto.ExtensionRange"), UPB_TABVALUE_PTR_INIT(&msgs[1]), NULL},
5327   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5328   {UPB_TABKEY_STR("\050", "\000", "\000", "\000", "google.protobuf.EnumValueDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[4]), NULL},
5329   {UPB_TABKEY_STR("\034", "\000", "\000", "\000", "google.protobuf.FieldOptions"), UPB_TABVALUE_PTR_INIT(&msgs[7]), NULL},
5330   {UPB_TABKEY_STR("\033", "\000", "\000", "\000", "google.protobuf.FileOptions"), UPB_TABVALUE_PTR_INIT(&msgs[10]), NULL},
5331   {UPB_TABKEY_STR("\043", "\000", "\000", "\000", "google.protobuf.EnumDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[2]), &strentries[233]},
5332   {UPB_TABKEY_STR("\052", "\000", "\000", "\000", "google.protobuf.FieldDescriptorProto.Label"), UPB_TABVALUE_PTR_INIT(&enums[0]), NULL},
5333   {UPB_TABKEY_STR("\046", "\000", "\000", "\000", "google.protobuf.ServiceDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[14]), NULL},
5334   {UPB_TABKEY_STR("\042", "\000", "\000", "\000", "google.protobuf.FieldOptions.CType"), UPB_TABVALUE_PTR_INIT(&enums[2]), &strentries[229]},
5335   {UPB_TABKEY_STR("\041", "\000", "\000", "\000", "google.protobuf.FileDescriptorSet"), UPB_TABVALUE_PTR_INIT(&msgs[9]), &strentries[235]},
5336   {UPB_TABKEY_STR("\033", "\000", "\000", "\000", "google.protobuf.EnumOptions"), UPB_TABVALUE_PTR_INIT(&msgs[3]), NULL},
5337   {UPB_TABKEY_STR("\044", "\000", "\000", "\000", "google.protobuf.FieldDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[6]), NULL},
5338   {UPB_TABKEY_STR("\050", "\000", "\000", "\000", "google.protobuf.FileOptions.OptimizeMode"), UPB_TABVALUE_PTR_INIT(&enums[3]), &strentries[221]},
5339   {UPB_TABKEY_STR("\036", "\000", "\000", "\000", "google.protobuf.ServiceOptions"), UPB_TABVALUE_PTR_INIT(&msgs[15]), NULL},
5340   {UPB_TABKEY_STR("\036", "\000", "\000", "\000", "google.protobuf.MessageOptions"), UPB_TABVALUE_PTR_INIT(&msgs[11]), NULL},
5341   {UPB_TABKEY_STR("\035", "\000", "\000", "\000", "google.protobuf.MethodOptions"), UPB_TABVALUE_PTR_INIT(&msgs[13]), &strentries[226]},
5342   {UPB_TABKEY_STR("\054", "\000", "\000", "\000", "google.protobuf.UninterpretedOption.NamePart"), UPB_TABVALUE_PTR_INIT(&msgs[19]), NULL},
5343 };
5344 
5345 static const upb_tabent intentries[14] = {
5346   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5347   {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[73]), NULL},
5348   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5349   {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[71]), NULL},
5350   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5351   {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[77]), NULL},
5352   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5353   {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[75]), NULL},
5354   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5355   {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[76]), NULL},
5356   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5357   {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[74]), NULL},
5358   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
5359   {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[72]), NULL},
5360 };
5361 
5362 static const upb_tabval arrays[232] = {
5363   UPB_TABVALUE_EMPTY_INIT,
5364   UPB_TABVALUE_PTR_INIT(&fields[38]),
5365   UPB_TABVALUE_PTR_INIT(&fields[16]),
5366   UPB_TABVALUE_PTR_INIT(&fields[44]),
5367   UPB_TABVALUE_PTR_INIT(&fields[9]),
5368   UPB_TABVALUE_PTR_INIT(&fields[15]),
5369   UPB_TABVALUE_PTR_INIT(&fields[14]),
5370   UPB_TABVALUE_PTR_INIT(&fields[49]),
5371   UPB_TABVALUE_EMPTY_INIT,
5372   UPB_TABVALUE_PTR_INIT(&fields[66]),
5373   UPB_TABVALUE_PTR_INIT(&fields[8]),
5374   UPB_TABVALUE_EMPTY_INIT,
5375   UPB_TABVALUE_PTR_INIT(&fields[40]),
5376   UPB_TABVALUE_PTR_INIT(&fields[78]),
5377   UPB_TABVALUE_PTR_INIT(&fields[50]),
5378   UPB_TABVALUE_EMPTY_INIT,
5379   UPB_TABVALUE_EMPTY_INIT,
5380   UPB_TABVALUE_PTR_INIT(&fields[1]),
5381   UPB_TABVALUE_EMPTY_INIT,
5382   UPB_TABVALUE_EMPTY_INIT,
5383   UPB_TABVALUE_EMPTY_INIT,
5384   UPB_TABVALUE_EMPTY_INIT,
5385   UPB_TABVALUE_EMPTY_INIT,
5386   UPB_TABVALUE_EMPTY_INIT,
5387   UPB_TABVALUE_PTR_INIT(&fields[37]),
5388   UPB_TABVALUE_PTR_INIT(&fields[47]),
5389   UPB_TABVALUE_PTR_INIT(&fields[52]),
5390   UPB_TABVALUE_EMPTY_INIT,
5391   UPB_TABVALUE_EMPTY_INIT,
5392   UPB_TABVALUE_EMPTY_INIT,
5393   UPB_TABVALUE_EMPTY_INIT,
5394   UPB_TABVALUE_EMPTY_INIT,
5395   UPB_TABVALUE_PTR_INIT(&fields[41]),
5396   UPB_TABVALUE_PTR_INIT(&fields[12]),
5397   UPB_TABVALUE_PTR_INIT(&fields[46]),
5398   UPB_TABVALUE_PTR_INIT(&fields[27]),
5399   UPB_TABVALUE_PTR_INIT(&fields[69]),
5400   UPB_TABVALUE_PTR_INIT(&fields[70]),
5401   UPB_TABVALUE_PTR_INIT(&fields[4]),
5402   UPB_TABVALUE_PTR_INIT(&fields[51]),
5403   UPB_TABVALUE_EMPTY_INIT,
5404   UPB_TABVALUE_PTR_INIT(&fields[3]),
5405   UPB_TABVALUE_PTR_INIT(&fields[58]),
5406   UPB_TABVALUE_PTR_INIT(&fields[6]),
5407   UPB_TABVALUE_EMPTY_INIT,
5408   UPB_TABVALUE_PTR_INIT(&fields[28]),
5409   UPB_TABVALUE_EMPTY_INIT,
5410   UPB_TABVALUE_EMPTY_INIT,
5411   UPB_TABVALUE_EMPTY_INIT,
5412   UPB_TABVALUE_PTR_INIT(&fields[11]),
5413   UPB_TABVALUE_PTR_INIT(&fields[79]),
5414   UPB_TABVALUE_EMPTY_INIT,
5415   UPB_TABVALUE_EMPTY_INIT,
5416   UPB_TABVALUE_EMPTY_INIT,
5417   UPB_TABVALUE_EMPTY_INIT,
5418   UPB_TABVALUE_EMPTY_INIT,
5419   UPB_TABVALUE_EMPTY_INIT,
5420   UPB_TABVALUE_EMPTY_INIT,
5421   UPB_TABVALUE_EMPTY_INIT,
5422   UPB_TABVALUE_EMPTY_INIT,
5423   UPB_TABVALUE_EMPTY_INIT,
5424   UPB_TABVALUE_EMPTY_INIT,
5425   UPB_TABVALUE_EMPTY_INIT,
5426   UPB_TABVALUE_EMPTY_INIT,
5427   UPB_TABVALUE_EMPTY_INIT,
5428   UPB_TABVALUE_EMPTY_INIT,
5429   UPB_TABVALUE_EMPTY_INIT,
5430   UPB_TABVALUE_EMPTY_INIT,
5431   UPB_TABVALUE_EMPTY_INIT,
5432   UPB_TABVALUE_EMPTY_INIT,
5433   UPB_TABVALUE_EMPTY_INIT,
5434   UPB_TABVALUE_EMPTY_INIT,
5435   UPB_TABVALUE_EMPTY_INIT,
5436   UPB_TABVALUE_PTR_INIT(&fields[34]),
5437   UPB_TABVALUE_PTR_INIT(&fields[57]),
5438   UPB_TABVALUE_PTR_INIT(&fields[5]),
5439   UPB_TABVALUE_PTR_INIT(&fields[32]),
5440   UPB_TABVALUE_PTR_INIT(&fields[10]),
5441   UPB_TABVALUE_PTR_INIT(&fields[63]),
5442   UPB_TABVALUE_PTR_INIT(&fields[13]),
5443   UPB_TABVALUE_PTR_INIT(&fields[53]),
5444   UPB_TABVALUE_PTR_INIT(&fields[64]),
5445   UPB_TABVALUE_PTR_INIT(&fields[61]),
5446   UPB_TABVALUE_PTR_INIT(&fields[80]),
5447   UPB_TABVALUE_EMPTY_INIT,
5448   UPB_TABVALUE_PTR_INIT(&fields[17]),
5449   UPB_TABVALUE_EMPTY_INIT,
5450   UPB_TABVALUE_PTR_INIT(&fields[26]),
5451   UPB_TABVALUE_EMPTY_INIT,
5452   UPB_TABVALUE_EMPTY_INIT,
5453   UPB_TABVALUE_EMPTY_INIT,
5454   UPB_TABVALUE_EMPTY_INIT,
5455   UPB_TABVALUE_EMPTY_INIT,
5456   UPB_TABVALUE_EMPTY_INIT,
5457   UPB_TABVALUE_PTR_INIT(&fields[25]),
5458   UPB_TABVALUE_PTR_INIT(&fields[48]),
5459   UPB_TABVALUE_PTR_INIT(&fields[24]),
5460   UPB_TABVALUE_PTR_INIT(&fields[18]),
5461   UPB_TABVALUE_EMPTY_INIT,
5462   UPB_TABVALUE_EMPTY_INIT,
5463   UPB_TABVALUE_EMPTY_INIT,
5464   UPB_TABVALUE_EMPTY_INIT,
5465   UPB_TABVALUE_PTR_INIT(&fields[2]),
5466   UPB_TABVALUE_PTR_INIT(&fields[23]),
5467   UPB_TABVALUE_PTR_INIT(&fields[62]),
5468   UPB_TABVALUE_EMPTY_INIT,
5469   UPB_TABVALUE_PTR_INIT(&fields[22]),
5470   UPB_TABVALUE_EMPTY_INIT,
5471   UPB_TABVALUE_EMPTY_INIT,
5472   UPB_TABVALUE_EMPTY_INIT,
5473   UPB_TABVALUE_EMPTY_INIT,
5474   UPB_TABVALUE_EMPTY_INIT,
5475   UPB_TABVALUE_EMPTY_INIT,
5476   UPB_TABVALUE_EMPTY_INIT,
5477   UPB_TABVALUE_EMPTY_INIT,
5478   UPB_TABVALUE_EMPTY_INIT,
5479   UPB_TABVALUE_EMPTY_INIT,
5480   UPB_TABVALUE_EMPTY_INIT,
5481   UPB_TABVALUE_EMPTY_INIT,
5482   UPB_TABVALUE_EMPTY_INIT,
5483   UPB_TABVALUE_EMPTY_INIT,
5484   UPB_TABVALUE_EMPTY_INIT,
5485   UPB_TABVALUE_EMPTY_INIT,
5486   UPB_TABVALUE_EMPTY_INIT,
5487   UPB_TABVALUE_EMPTY_INIT,
5488   UPB_TABVALUE_EMPTY_INIT,
5489   UPB_TABVALUE_EMPTY_INIT,
5490   UPB_TABVALUE_EMPTY_INIT,
5491   UPB_TABVALUE_EMPTY_INIT,
5492   UPB_TABVALUE_EMPTY_INIT,
5493   UPB_TABVALUE_EMPTY_INIT,
5494   UPB_TABVALUE_EMPTY_INIT,
5495   UPB_TABVALUE_EMPTY_INIT,
5496   UPB_TABVALUE_EMPTY_INIT,
5497   UPB_TABVALUE_EMPTY_INIT,
5498   UPB_TABVALUE_EMPTY_INIT,
5499   UPB_TABVALUE_EMPTY_INIT,
5500   UPB_TABVALUE_EMPTY_INIT,
5501   UPB_TABVALUE_EMPTY_INIT,
5502   UPB_TABVALUE_EMPTY_INIT,
5503   UPB_TABVALUE_EMPTY_INIT,
5504   UPB_TABVALUE_EMPTY_INIT,
5505   UPB_TABVALUE_EMPTY_INIT,
5506   UPB_TABVALUE_EMPTY_INIT,
5507   UPB_TABVALUE_EMPTY_INIT,
5508   UPB_TABVALUE_EMPTY_INIT,
5509   UPB_TABVALUE_EMPTY_INIT,
5510   UPB_TABVALUE_EMPTY_INIT,
5511   UPB_TABVALUE_EMPTY_INIT,
5512   UPB_TABVALUE_EMPTY_INIT,
5513   UPB_TABVALUE_EMPTY_INIT,
5514   UPB_TABVALUE_PTR_INIT(&fields[31]),
5515   UPB_TABVALUE_PTR_INIT(&fields[45]),
5516   UPB_TABVALUE_EMPTY_INIT,
5517   UPB_TABVALUE_EMPTY_INIT,
5518   UPB_TABVALUE_EMPTY_INIT,
5519   UPB_TABVALUE_EMPTY_INIT,
5520   UPB_TABVALUE_EMPTY_INIT,
5521   UPB_TABVALUE_EMPTY_INIT,
5522   UPB_TABVALUE_EMPTY_INIT,
5523   UPB_TABVALUE_EMPTY_INIT,
5524   UPB_TABVALUE_EMPTY_INIT,
5525   UPB_TABVALUE_EMPTY_INIT,
5526   UPB_TABVALUE_EMPTY_INIT,
5527   UPB_TABVALUE_EMPTY_INIT,
5528   UPB_TABVALUE_EMPTY_INIT,
5529   UPB_TABVALUE_EMPTY_INIT,
5530   UPB_TABVALUE_PTR_INIT(&fields[39]),
5531   UPB_TABVALUE_PTR_INIT(&fields[20]),
5532   UPB_TABVALUE_PTR_INIT(&fields[56]),
5533   UPB_TABVALUE_PTR_INIT(&fields[55]),
5534   UPB_TABVALUE_EMPTY_INIT,
5535   UPB_TABVALUE_EMPTY_INIT,
5536   UPB_TABVALUE_EMPTY_INIT,
5537   UPB_TABVALUE_EMPTY_INIT,
5538   UPB_TABVALUE_EMPTY_INIT,
5539   UPB_TABVALUE_PTR_INIT(&fields[35]),
5540   UPB_TABVALUE_PTR_INIT(&fields[33]),
5541   UPB_TABVALUE_PTR_INIT(&fields[54]),
5542   UPB_TABVALUE_EMPTY_INIT,
5543   UPB_TABVALUE_EMPTY_INIT,
5544   UPB_TABVALUE_EMPTY_INIT,
5545   UPB_TABVALUE_EMPTY_INIT,
5546   UPB_TABVALUE_EMPTY_INIT,
5547   UPB_TABVALUE_PTR_INIT(&fields[30]),
5548   UPB_TABVALUE_EMPTY_INIT,
5549   UPB_TABVALUE_PTR_INIT(&fields[59]),
5550   UPB_TABVALUE_PTR_INIT(&fields[65]),
5551   UPB_TABVALUE_PTR_INIT(&fields[29]),
5552   UPB_TABVALUE_PTR_INIT(&fields[68]),
5553   UPB_TABVALUE_EMPTY_INIT,
5554   UPB_TABVALUE_EMPTY_INIT,
5555   UPB_TABVALUE_PTR_INIT(&fields[36]),
5556   UPB_TABVALUE_PTR_INIT(&fields[19]),
5557   UPB_TABVALUE_PTR_INIT(&fields[60]),
5558   UPB_TABVALUE_PTR_INIT(&fields[43]),
5559   UPB_TABVALUE_PTR_INIT(&fields[7]),
5560   UPB_TABVALUE_PTR_INIT(&fields[67]),
5561   UPB_TABVALUE_PTR_INIT(&fields[0]),
5562   UPB_TABVALUE_EMPTY_INIT,
5563   UPB_TABVALUE_PTR_INIT(&fields[42]),
5564   UPB_TABVALUE_PTR_INIT(&fields[21]),
5565   UPB_TABVALUE_EMPTY_INIT,
5566   UPB_TABVALUE_PTR_INIT("LABEL_OPTIONAL"),
5567   UPB_TABVALUE_PTR_INIT("LABEL_REQUIRED"),
5568   UPB_TABVALUE_PTR_INIT("LABEL_REPEATED"),
5569   UPB_TABVALUE_EMPTY_INIT,
5570   UPB_TABVALUE_PTR_INIT("TYPE_DOUBLE"),
5571   UPB_TABVALUE_PTR_INIT("TYPE_FLOAT"),
5572   UPB_TABVALUE_PTR_INIT("TYPE_INT64"),
5573   UPB_TABVALUE_PTR_INIT("TYPE_UINT64"),
5574   UPB_TABVALUE_PTR_INIT("TYPE_INT32"),
5575   UPB_TABVALUE_PTR_INIT("TYPE_FIXED64"),
5576   UPB_TABVALUE_PTR_INIT("TYPE_FIXED32"),
5577   UPB_TABVALUE_PTR_INIT("TYPE_BOOL"),
5578   UPB_TABVALUE_PTR_INIT("TYPE_STRING"),
5579   UPB_TABVALUE_PTR_INIT("TYPE_GROUP"),
5580   UPB_TABVALUE_PTR_INIT("TYPE_MESSAGE"),
5581   UPB_TABVALUE_PTR_INIT("TYPE_BYTES"),
5582   UPB_TABVALUE_PTR_INIT("TYPE_UINT32"),
5583   UPB_TABVALUE_PTR_INIT("TYPE_ENUM"),
5584   UPB_TABVALUE_PTR_INIT("TYPE_SFIXED32"),
5585   UPB_TABVALUE_PTR_INIT("TYPE_SFIXED64"),
5586   UPB_TABVALUE_PTR_INIT("TYPE_SINT32"),
5587   UPB_TABVALUE_PTR_INIT("TYPE_SINT64"),
5588   UPB_TABVALUE_PTR_INIT("STRING"),
5589   UPB_TABVALUE_PTR_INIT("CORD"),
5590   UPB_TABVALUE_PTR_INIT("STRING_PIECE"),
5591   UPB_TABVALUE_EMPTY_INIT,
5592   UPB_TABVALUE_PTR_INIT("SPEED"),
5593   UPB_TABVALUE_PTR_INIT("CODE_SIZE"),
5594   UPB_TABVALUE_PTR_INIT("LITE_RUNTIME"),
5595 };
5596 
5597 static const upb_symtab symtab = UPB_SYMTAB_INIT(UPB_STRTABLE_INIT(24, 31, UPB_CTYPE_PTR, 5, &strentries[204]), &reftables[210], &reftables[211]);
5598 
upbdefs_google_protobuf_descriptor(const void * owner)5599 const upb_symtab *upbdefs_google_protobuf_descriptor(const void *owner) {
5600   upb_symtab_ref(&symtab, owner);
5601   return &symtab;
5602 }
5603 
5604 #ifdef UPB_DEBUG_REFS
5605 static upb_inttable reftables[212] = {
5606   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5607   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5608   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5609   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5610   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5611   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5612   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5613   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5614   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5615   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5616   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5617   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5618   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5619   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5620   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5621   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5622   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5623   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5624   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5625   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5626   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5627   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5628   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5629   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5630   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5631   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5632   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5633   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5634   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5635   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5636   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5637   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5638   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5639   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5640   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5641   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5642   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5643   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5644   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5645   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5646   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5647   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5648   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5649   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5650   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5651   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5652   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5653   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5654   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5655   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5656   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5657   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5658   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5659   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5660   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5661   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5662   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5663   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5664   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5665   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5666   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5667   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5668   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5669   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5670   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5671   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5672   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5673   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5674   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5675   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5676   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5677   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5678   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5679   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5680   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5681   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5682   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5683   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5684   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5685   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5686   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5687   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5688   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5689   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5690   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5691   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5692   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5693   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5694   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5695   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5696   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5697   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5698   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5699   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5700   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5701   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5702   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5703   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5704   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5705   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5706   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5707   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5708   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5709   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5710   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5711   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5712   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5713   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5714   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5715   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5716   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5717   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5718   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5719   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5720   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5721   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5722   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5723   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5724   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5725   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5726   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5727   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5728   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5729   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5730   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5731   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5732   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5733   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5734   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5735   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5736   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5737   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5738   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5739   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5740   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5741   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5742   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5743   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5744   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5745   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5746   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5747   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5748   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5749   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5750   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5751   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5752   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5753   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5754   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5755   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5756   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5757   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5758   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5759   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5760   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5761   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5762   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5763   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5764   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5765   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5766   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5767   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5768   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5769   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5770   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5771   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5772   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5773   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5774   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5775   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5776   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5777   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5778   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5779   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5780   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5781   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5782   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5783   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5784   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5785   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5786   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5787   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5788   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5789   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5790   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5791   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5792   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5793   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5794   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5795   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5796   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5797   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5798   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5799   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5800   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5801   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5802   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5803   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5804   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5805   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5806   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5807   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5808   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5809   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5810   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5811   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5812   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5813   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5814   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5815   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5816   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5817   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
5818 };
5819 #endif
5820 
5821 /*
5822 ** XXX: The routines in this file that consume a string do not currently
5823 ** support having the string span buffers.  In the future, as upb_sink and
5824 ** its buffering/sharing functionality evolve there should be an easy and
5825 ** idiomatic way of correctly handling this case.  For now, we accept this
5826 ** limitation since we currently only parse descriptors from single strings.
5827 */
5828 
5829 
5830 #include <errno.h>
5831 #include <stdlib.h>
5832 #include <string.h>
5833 
5834 /* upb_deflist is an internal-only dynamic array for storing a growing list of
5835  * upb_defs. */
5836 typedef struct {
5837   upb_def **defs;
5838   size_t len;
5839   size_t size;
5840   bool owned;
5841 } upb_deflist;
5842 
5843 /* We keep a stack of all the messages scopes we are currently in, as well as
5844  * the top-level file scope.  This is necessary to correctly qualify the
5845  * definitions that are contained inside.  "name" tracks the name of the
5846  * message or package (a bare name -- not qualified by any enclosing scopes). */
5847 typedef struct {
5848   char *name;
5849   /* Index of the first def that is under this scope.  For msgdefs, the
5850    * msgdef itself is at start-1. */
5851   int start;
5852 } upb_descreader_frame;
5853 
5854 /* The maximum number of nested declarations that are allowed, ie.
5855  * message Foo {
5856  *   message Bar {
5857  *     message Baz {
5858  *     }
5859  *   }
5860  * }
5861  *
5862  * This is a resource limit that affects how big our runtime stack can grow.
5863  * TODO: make this a runtime-settable property of the Reader instance. */
5864 #define UPB_MAX_MESSAGE_NESTING 64
5865 
5866 struct upb_descreader {
5867   upb_sink sink;
5868   upb_deflist defs;
5869   upb_descreader_frame stack[UPB_MAX_MESSAGE_NESTING];
5870   int stack_len;
5871 
5872   uint32_t number;
5873   char *name;
5874   bool saw_number;
5875   bool saw_name;
5876 
5877   char *default_string;
5878 
5879   upb_fielddef *f;
5880 };
5881 
upb_strndup(const char * buf,size_t n)5882 static char *upb_strndup(const char *buf, size_t n) {
5883   char *ret = malloc(n + 1);
5884   if (!ret) return NULL;
5885   memcpy(ret, buf, n);
5886   ret[n] = '\0';
5887   return ret;
5888 }
5889 
5890 /* Returns a newly allocated string that joins input strings together, for
5891  * example:
5892  *   join("Foo.Bar", "Baz") -> "Foo.Bar.Baz"
5893  *   join("", "Baz") -> "Baz"
5894  * Caller owns a ref on the returned string. */
upb_join(const char * base,const char * name)5895 static char *upb_join(const char *base, const char *name) {
5896   if (!base || strlen(base) == 0) {
5897     return upb_strdup(name);
5898   } else {
5899     char *ret = malloc(strlen(base) + strlen(name) + 2);
5900     ret[0] = '\0';
5901     strcat(ret, base);
5902     strcat(ret, ".");
5903     strcat(ret, name);
5904     return ret;
5905   }
5906 }
5907 
5908 
5909 /* upb_deflist ****************************************************************/
5910 
upb_deflist_init(upb_deflist * l)5911 void upb_deflist_init(upb_deflist *l) {
5912   l->size = 0;
5913   l->defs = NULL;
5914   l->len = 0;
5915   l->owned = true;
5916 }
5917 
upb_deflist_uninit(upb_deflist * l)5918 void upb_deflist_uninit(upb_deflist *l) {
5919   size_t i;
5920   if (l->owned)
5921     for(i = 0; i < l->len; i++)
5922       upb_def_unref(l->defs[i], l);
5923   free(l->defs);
5924 }
5925 
upb_deflist_push(upb_deflist * l,upb_def * d)5926 bool upb_deflist_push(upb_deflist *l, upb_def *d) {
5927   if(++l->len >= l->size) {
5928     size_t new_size = UPB_MAX(l->size, 4);
5929     new_size *= 2;
5930     l->defs = realloc(l->defs, new_size * sizeof(void *));
5931     if (!l->defs) return false;
5932     l->size = new_size;
5933   }
5934   l->defs[l->len - 1] = d;
5935   return true;
5936 }
5937 
upb_deflist_donaterefs(upb_deflist * l,void * owner)5938 void upb_deflist_donaterefs(upb_deflist *l, void *owner) {
5939   size_t i;
5940   assert(l->owned);
5941   for (i = 0; i < l->len; i++)
5942     upb_def_donateref(l->defs[i], l, owner);
5943   l->owned = false;
5944 }
5945 
upb_deflist_last(upb_deflist * l)5946 static upb_def *upb_deflist_last(upb_deflist *l) {
5947   return l->defs[l->len-1];
5948 }
5949 
5950 /* Qualify the defname for all defs starting with offset "start" with "str". */
upb_deflist_qualify(upb_deflist * l,char * str,int32_t start)5951 static void upb_deflist_qualify(upb_deflist *l, char *str, int32_t start) {
5952   uint32_t i;
5953   for (i = start; i < l->len; i++) {
5954     upb_def *def = l->defs[i];
5955     char *name = upb_join(str, upb_def_fullname(def));
5956     upb_def_setfullname(def, name, NULL);
5957     free(name);
5958   }
5959 }
5960 
5961 
5962 /* upb_descreader  ************************************************************/
5963 
upb_descreader_top(upb_descreader * r)5964 static upb_msgdef *upb_descreader_top(upb_descreader *r) {
5965   int index;
5966   assert(r->stack_len > 1);
5967   index = r->stack[r->stack_len-1].start - 1;
5968   assert(index >= 0);
5969   return upb_downcast_msgdef_mutable(r->defs.defs[index]);
5970 }
5971 
upb_descreader_last(upb_descreader * r)5972 static upb_def *upb_descreader_last(upb_descreader *r) {
5973   return upb_deflist_last(&r->defs);
5974 }
5975 
5976 /* Start/end handlers for FileDescriptorProto and DescriptorProto (the two
5977  * entities that have names and can contain sub-definitions. */
upb_descreader_startcontainer(upb_descreader * r)5978 void upb_descreader_startcontainer(upb_descreader *r) {
5979   upb_descreader_frame *f = &r->stack[r->stack_len++];
5980   f->start = r->defs.len;
5981   f->name = NULL;
5982 }
5983 
upb_descreader_endcontainer(upb_descreader * r)5984 void upb_descreader_endcontainer(upb_descreader *r) {
5985   upb_descreader_frame *f = &r->stack[--r->stack_len];
5986   upb_deflist_qualify(&r->defs, f->name, f->start);
5987   free(f->name);
5988   f->name = NULL;
5989 }
5990 
upb_descreader_setscopename(upb_descreader * r,char * str)5991 void upb_descreader_setscopename(upb_descreader *r, char *str) {
5992   upb_descreader_frame *f = &r->stack[r->stack_len-1];
5993   free(f->name);
5994   f->name = str;
5995 }
5996 
5997 /* Handlers for google.protobuf.FileDescriptorProto. */
file_startmsg(void * r,const void * hd)5998 static bool file_startmsg(void *r, const void *hd) {
5999   UPB_UNUSED(hd);
6000   upb_descreader_startcontainer(r);
6001   return true;
6002 }
6003 
file_endmsg(void * closure,const void * hd,upb_status * status)6004 static bool file_endmsg(void *closure, const void *hd, upb_status *status) {
6005   upb_descreader *r = closure;
6006   UPB_UNUSED(hd);
6007   UPB_UNUSED(status);
6008   upb_descreader_endcontainer(r);
6009   return true;
6010 }
6011 
file_onpackage(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)6012 static size_t file_onpackage(void *closure, const void *hd, const char *buf,
6013                              size_t n, const upb_bufhandle *handle) {
6014   upb_descreader *r = closure;
6015   UPB_UNUSED(hd);
6016   UPB_UNUSED(handle);
6017   /* XXX: see comment at the top of the file. */
6018   upb_descreader_setscopename(r, upb_strndup(buf, n));
6019   return n;
6020 }
6021 
6022 /* Handlers for google.protobuf.EnumValueDescriptorProto. */
enumval_startmsg(void * closure,const void * hd)6023 static bool enumval_startmsg(void *closure, const void *hd) {
6024   upb_descreader *r = closure;
6025   UPB_UNUSED(hd);
6026   r->saw_number = false;
6027   r->saw_name = false;
6028   return true;
6029 }
6030 
enumval_onname(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)6031 static size_t enumval_onname(void *closure, const void *hd, const char *buf,
6032                              size_t n, const upb_bufhandle *handle) {
6033   upb_descreader *r = closure;
6034   UPB_UNUSED(hd);
6035   UPB_UNUSED(handle);
6036   /* XXX: see comment at the top of the file. */
6037   free(r->name);
6038   r->name = upb_strndup(buf, n);
6039   r->saw_name = true;
6040   return n;
6041 }
6042 
enumval_onnumber(void * closure,const void * hd,int32_t val)6043 static bool enumval_onnumber(void *closure, const void *hd, int32_t val) {
6044   upb_descreader *r = closure;
6045   UPB_UNUSED(hd);
6046   r->number = val;
6047   r->saw_number = true;
6048   return true;
6049 }
6050 
enumval_endmsg(void * closure,const void * hd,upb_status * status)6051 static bool enumval_endmsg(void *closure, const void *hd, upb_status *status) {
6052   upb_descreader *r = closure;
6053   upb_enumdef *e;
6054   UPB_UNUSED(hd);
6055 
6056   if(!r->saw_number || !r->saw_name) {
6057     upb_status_seterrmsg(status, "Enum value missing name or number.");
6058     return false;
6059   }
6060   e = upb_downcast_enumdef_mutable(upb_descreader_last(r));
6061   upb_enumdef_addval(e, r->name, r->number, status);
6062   free(r->name);
6063   r->name = NULL;
6064   return true;
6065 }
6066 
6067 
6068 /* Handlers for google.protobuf.EnumDescriptorProto. */
enum_startmsg(void * closure,const void * hd)6069 static bool enum_startmsg(void *closure, const void *hd) {
6070   upb_descreader *r = closure;
6071   UPB_UNUSED(hd);
6072   upb_deflist_push(&r->defs,
6073                    upb_enumdef_upcast_mutable(upb_enumdef_new(&r->defs)));
6074   return true;
6075 }
6076 
enum_endmsg(void * closure,const void * hd,upb_status * status)6077 static bool enum_endmsg(void *closure, const void *hd, upb_status *status) {
6078   upb_descreader *r = closure;
6079   upb_enumdef *e;
6080   UPB_UNUSED(hd);
6081 
6082   e = upb_downcast_enumdef_mutable(upb_descreader_last(r));
6083   if (upb_def_fullname(upb_descreader_last(r)) == NULL) {
6084     upb_status_seterrmsg(status, "Enum had no name.");
6085     return false;
6086   }
6087   if (upb_enumdef_numvals(e) == 0) {
6088     upb_status_seterrmsg(status, "Enum had no values.");
6089     return false;
6090   }
6091   return true;
6092 }
6093 
enum_onname(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)6094 static size_t enum_onname(void *closure, const void *hd, const char *buf,
6095                           size_t n, const upb_bufhandle *handle) {
6096   upb_descreader *r = closure;
6097   char *fullname = upb_strndup(buf, n);
6098   UPB_UNUSED(hd);
6099   UPB_UNUSED(handle);
6100   /* XXX: see comment at the top of the file. */
6101   upb_def_setfullname(upb_descreader_last(r), fullname, NULL);
6102   free(fullname);
6103   return n;
6104 }
6105 
6106 /* Handlers for google.protobuf.FieldDescriptorProto */
field_startmsg(void * closure,const void * hd)6107 static bool field_startmsg(void *closure, const void *hd) {
6108   upb_descreader *r = closure;
6109   UPB_UNUSED(hd);
6110   r->f = upb_fielddef_new(&r->defs);
6111   free(r->default_string);
6112   r->default_string = NULL;
6113 
6114   /* fielddefs default to packed, but descriptors default to non-packed. */
6115   upb_fielddef_setpacked(r->f, false);
6116   return true;
6117 }
6118 
6119 /* Converts the default value in string "str" into "d".  Passes a ref on str.
6120  * Returns true on success. */
parse_default(char * str,upb_fielddef * f)6121 static bool parse_default(char *str, upb_fielddef *f) {
6122   bool success = true;
6123   char *end;
6124   switch (upb_fielddef_type(f)) {
6125     case UPB_TYPE_INT32: {
6126       long val = strtol(str, &end, 0);
6127       if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end)
6128         success = false;
6129       else
6130         upb_fielddef_setdefaultint32(f, val);
6131       break;
6132     }
6133     case UPB_TYPE_INT64: {
6134       /* XXX: Need to write our own strtoll, since it's not available in c89. */
6135       long long val = strtol(str, &end, 0);
6136       if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end)
6137         success = false;
6138       else
6139         upb_fielddef_setdefaultint64(f, val);
6140       break;
6141     }
6142     case UPB_TYPE_UINT32: {
6143       unsigned long val = strtoul(str, &end, 0);
6144       if (val > UINT32_MAX || errno == ERANGE || *end)
6145         success = false;
6146       else
6147         upb_fielddef_setdefaultuint32(f, val);
6148       break;
6149     }
6150     case UPB_TYPE_UINT64: {
6151       /* XXX: Need to write our own strtoull, since it's not available in c89. */
6152       unsigned long long val = strtoul(str, &end, 0);
6153       if (val > UINT64_MAX || errno == ERANGE || *end)
6154         success = false;
6155       else
6156         upb_fielddef_setdefaultuint64(f, val);
6157       break;
6158     }
6159     case UPB_TYPE_DOUBLE: {
6160       double val = strtod(str, &end);
6161       if (errno == ERANGE || *end)
6162         success = false;
6163       else
6164         upb_fielddef_setdefaultdouble(f, val);
6165       break;
6166     }
6167     case UPB_TYPE_FLOAT: {
6168       /* XXX: Need to write our own strtof, since it's not available in c89. */
6169       float val = strtod(str, &end);
6170       if (errno == ERANGE || *end)
6171         success = false;
6172       else
6173         upb_fielddef_setdefaultfloat(f, val);
6174       break;
6175     }
6176     case UPB_TYPE_BOOL: {
6177       if (strcmp(str, "false") == 0)
6178         upb_fielddef_setdefaultbool(f, false);
6179       else if (strcmp(str, "true") == 0)
6180         upb_fielddef_setdefaultbool(f, true);
6181       else
6182         success = false;
6183       break;
6184     }
6185     default: abort();
6186   }
6187   return success;
6188 }
6189 
field_endmsg(void * closure,const void * hd,upb_status * status)6190 static bool field_endmsg(void *closure, const void *hd, upb_status *status) {
6191   upb_descreader *r = closure;
6192   upb_fielddef *f = r->f;
6193   UPB_UNUSED(hd);
6194 
6195   /* TODO: verify that all required fields were present. */
6196   assert(upb_fielddef_number(f) != 0);
6197   assert(upb_fielddef_name(f) != NULL);
6198   assert((upb_fielddef_subdefname(f) != NULL) == upb_fielddef_hassubdef(f));
6199 
6200   if (r->default_string) {
6201     if (upb_fielddef_issubmsg(f)) {
6202       upb_status_seterrmsg(status, "Submessages cannot have defaults.");
6203       return false;
6204     }
6205     if (upb_fielddef_isstring(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM) {
6206       upb_fielddef_setdefaultcstr(f, r->default_string, NULL);
6207     } else {
6208       if (r->default_string && !parse_default(r->default_string, f)) {
6209         /* We don't worry too much about giving a great error message since the
6210          * compiler should have ensured this was correct. */
6211         upb_status_seterrmsg(status, "Error converting default value.");
6212         return false;
6213       }
6214     }
6215   }
6216   return true;
6217 }
6218 
field_onlazy(void * closure,const void * hd,bool val)6219 static bool field_onlazy(void *closure, const void *hd, bool val) {
6220   upb_descreader *r = closure;
6221   UPB_UNUSED(hd);
6222 
6223   upb_fielddef_setlazy(r->f, val);
6224   return true;
6225 }
6226 
field_onpacked(void * closure,const void * hd,bool val)6227 static bool field_onpacked(void *closure, const void *hd, bool val) {
6228   upb_descreader *r = closure;
6229   UPB_UNUSED(hd);
6230 
6231   upb_fielddef_setpacked(r->f, val);
6232   return true;
6233 }
6234 
field_ontype(void * closure,const void * hd,int32_t val)6235 static bool field_ontype(void *closure, const void *hd, int32_t val) {
6236   upb_descreader *r = closure;
6237   UPB_UNUSED(hd);
6238 
6239   upb_fielddef_setdescriptortype(r->f, val);
6240   return true;
6241 }
6242 
field_onlabel(void * closure,const void * hd,int32_t val)6243 static bool field_onlabel(void *closure, const void *hd, int32_t val) {
6244   upb_descreader *r = closure;
6245   UPB_UNUSED(hd);
6246 
6247   upb_fielddef_setlabel(r->f, val);
6248   return true;
6249 }
6250 
field_onnumber(void * closure,const void * hd,int32_t val)6251 static bool field_onnumber(void *closure, const void *hd, int32_t val) {
6252   upb_descreader *r = closure;
6253   bool ok = upb_fielddef_setnumber(r->f, val, NULL);
6254   UPB_UNUSED(hd);
6255 
6256   UPB_ASSERT_VAR(ok, ok);
6257   return true;
6258 }
6259 
field_onname(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)6260 static size_t field_onname(void *closure, const void *hd, const char *buf,
6261                            size_t n, const upb_bufhandle *handle) {
6262   upb_descreader *r = closure;
6263   char *name = upb_strndup(buf, n);
6264   UPB_UNUSED(hd);
6265   UPB_UNUSED(handle);
6266 
6267   /* XXX: see comment at the top of the file. */
6268   upb_fielddef_setname(r->f, name, NULL);
6269   free(name);
6270   return n;
6271 }
6272 
field_ontypename(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)6273 static size_t field_ontypename(void *closure, const void *hd, const char *buf,
6274                                size_t n, const upb_bufhandle *handle) {
6275   upb_descreader *r = closure;
6276   char *name = upb_strndup(buf, n);
6277   UPB_UNUSED(hd);
6278   UPB_UNUSED(handle);
6279 
6280   /* XXX: see comment at the top of the file. */
6281   upb_fielddef_setsubdefname(r->f, name, NULL);
6282   free(name);
6283   return n;
6284 }
6285 
field_onextendee(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)6286 static size_t field_onextendee(void *closure, const void *hd, const char *buf,
6287                                size_t n, const upb_bufhandle *handle) {
6288   upb_descreader *r = closure;
6289   char *name = upb_strndup(buf, n);
6290   UPB_UNUSED(hd);
6291   UPB_UNUSED(handle);
6292 
6293   /* XXX: see comment at the top of the file. */
6294   upb_fielddef_setcontainingtypename(r->f, name, NULL);
6295   free(name);
6296   return n;
6297 }
6298 
field_ondefaultval(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)6299 static size_t field_ondefaultval(void *closure, const void *hd, const char *buf,
6300                                  size_t n, const upb_bufhandle *handle) {
6301   upb_descreader *r = closure;
6302   UPB_UNUSED(hd);
6303   UPB_UNUSED(handle);
6304 
6305   /* Have to convert from string to the correct type, but we might not know the
6306    * type yet, so we save it as a string until the end of the field.
6307    * XXX: see comment at the top of the file. */
6308   free(r->default_string);
6309   r->default_string = upb_strndup(buf, n);
6310   return n;
6311 }
6312 
6313 /* Handlers for google.protobuf.DescriptorProto (representing a message). */
msg_startmsg(void * closure,const void * hd)6314 static bool msg_startmsg(void *closure, const void *hd) {
6315   upb_descreader *r = closure;
6316   UPB_UNUSED(hd);
6317 
6318   upb_deflist_push(&r->defs,
6319                    upb_msgdef_upcast_mutable(upb_msgdef_new(&r->defs)));
6320   upb_descreader_startcontainer(r);
6321   return true;
6322 }
6323 
msg_endmsg(void * closure,const void * hd,upb_status * status)6324 static bool msg_endmsg(void *closure, const void *hd, upb_status *status) {
6325   upb_descreader *r = closure;
6326   upb_msgdef *m = upb_descreader_top(r);
6327   UPB_UNUSED(hd);
6328 
6329   if(!upb_def_fullname(upb_msgdef_upcast_mutable(m))) {
6330     upb_status_seterrmsg(status, "Encountered message with no name.");
6331     return false;
6332   }
6333   upb_descreader_endcontainer(r);
6334   return true;
6335 }
6336 
msg_onname(void * closure,const void * hd,const char * buf,size_t n,const upb_bufhandle * handle)6337 static size_t msg_onname(void *closure, const void *hd, const char *buf,
6338                          size_t n, const upb_bufhandle *handle) {
6339   upb_descreader *r = closure;
6340   upb_msgdef *m = upb_descreader_top(r);
6341   /* XXX: see comment at the top of the file. */
6342   char *name = upb_strndup(buf, n);
6343   UPB_UNUSED(hd);
6344   UPB_UNUSED(handle);
6345 
6346   upb_def_setfullname(upb_msgdef_upcast_mutable(m), name, NULL);
6347   upb_descreader_setscopename(r, name);  /* Passes ownership of name. */
6348   return n;
6349 }
6350 
msg_onendfield(void * closure,const void * hd)6351 static bool msg_onendfield(void *closure, const void *hd) {
6352   upb_descreader *r = closure;
6353   upb_msgdef *m = upb_descreader_top(r);
6354   UPB_UNUSED(hd);
6355 
6356   upb_msgdef_addfield(m, r->f, &r->defs, NULL);
6357   r->f = NULL;
6358   return true;
6359 }
6360 
pushextension(void * closure,const void * hd)6361 static bool pushextension(void *closure, const void *hd) {
6362   upb_descreader *r = closure;
6363   UPB_UNUSED(hd);
6364 
6365   assert(upb_fielddef_containingtypename(r->f));
6366   upb_fielddef_setisextension(r->f, true);
6367   upb_deflist_push(&r->defs, upb_fielddef_upcast_mutable(r->f));
6368   r->f = NULL;
6369   return true;
6370 }
6371 
6372 #define D(name) upbdefs_google_protobuf_ ## name(s)
6373 
reghandlers(const void * closure,upb_handlers * h)6374 static void reghandlers(const void *closure, upb_handlers *h) {
6375   const upb_symtab *s = closure;
6376   const upb_msgdef *m = upb_handlers_msgdef(h);
6377 
6378   if (m == D(DescriptorProto)) {
6379     upb_handlers_setstartmsg(h, &msg_startmsg, NULL);
6380     upb_handlers_setendmsg(h, &msg_endmsg, NULL);
6381     upb_handlers_setstring(h, D(DescriptorProto_name), &msg_onname, NULL);
6382     upb_handlers_setendsubmsg(h, D(DescriptorProto_field), &msg_onendfield,
6383                               NULL);
6384     upb_handlers_setendsubmsg(h, D(DescriptorProto_extension), &pushextension,
6385                               NULL);
6386   } else if (m == D(FileDescriptorProto)) {
6387     upb_handlers_setstartmsg(h, &file_startmsg, NULL);
6388     upb_handlers_setendmsg(h, &file_endmsg, NULL);
6389     upb_handlers_setstring(h, D(FileDescriptorProto_package), &file_onpackage,
6390                            NULL);
6391     upb_handlers_setendsubmsg(h, D(FileDescriptorProto_extension), &pushextension,
6392                               NULL);
6393   } else if (m == D(EnumValueDescriptorProto)) {
6394     upb_handlers_setstartmsg(h, &enumval_startmsg, NULL);
6395     upb_handlers_setendmsg(h, &enumval_endmsg, NULL);
6396     upb_handlers_setstring(h, D(EnumValueDescriptorProto_name), &enumval_onname, NULL);
6397     upb_handlers_setint32(h, D(EnumValueDescriptorProto_number), &enumval_onnumber,
6398                           NULL);
6399   } else if (m == D(EnumDescriptorProto)) {
6400     upb_handlers_setstartmsg(h, &enum_startmsg, NULL);
6401     upb_handlers_setendmsg(h, &enum_endmsg, NULL);
6402     upb_handlers_setstring(h, D(EnumDescriptorProto_name), &enum_onname, NULL);
6403   } else if (m == D(FieldDescriptorProto)) {
6404     upb_handlers_setstartmsg(h, &field_startmsg, NULL);
6405     upb_handlers_setendmsg(h, &field_endmsg, NULL);
6406     upb_handlers_setint32(h, D(FieldDescriptorProto_type), &field_ontype,
6407                           NULL);
6408     upb_handlers_setint32(h, D(FieldDescriptorProto_label), &field_onlabel,
6409                           NULL);
6410     upb_handlers_setint32(h, D(FieldDescriptorProto_number), &field_onnumber,
6411                           NULL);
6412     upb_handlers_setstring(h, D(FieldDescriptorProto_name), &field_onname,
6413                            NULL);
6414     upb_handlers_setstring(h, D(FieldDescriptorProto_type_name),
6415                            &field_ontypename, NULL);
6416     upb_handlers_setstring(h, D(FieldDescriptorProto_extendee),
6417                            &field_onextendee, NULL);
6418     upb_handlers_setstring(h, D(FieldDescriptorProto_default_value),
6419                            &field_ondefaultval, NULL);
6420   } else if (m == D(FieldOptions)) {
6421     upb_handlers_setbool(h, D(FieldOptions_lazy), &field_onlazy, NULL);
6422     upb_handlers_setbool(h, D(FieldOptions_packed), &field_onpacked, NULL);
6423   }
6424 }
6425 
6426 #undef D
6427 
descreader_cleanup(void * _r)6428 void descreader_cleanup(void *_r) {
6429   upb_descreader *r = _r;
6430   free(r->name);
6431   upb_deflist_uninit(&r->defs);
6432   free(r->default_string);
6433   while (r->stack_len > 0) {
6434     upb_descreader_frame *f = &r->stack[--r->stack_len];
6435     free(f->name);
6436   }
6437 }
6438 
6439 
6440 /* Public API  ****************************************************************/
6441 
upb_descreader_create(upb_env * e,const upb_handlers * h)6442 upb_descreader *upb_descreader_create(upb_env *e, const upb_handlers *h) {
6443   upb_descreader *r = upb_env_malloc(e, sizeof(upb_descreader));
6444   if (!r || !upb_env_addcleanup(e, descreader_cleanup, r)) {
6445     return NULL;
6446   }
6447 
6448   upb_deflist_init(&r->defs);
6449   upb_sink_reset(upb_descreader_input(r), h, r);
6450   r->stack_len = 0;
6451   r->name = NULL;
6452   r->default_string = NULL;
6453 
6454   return r;
6455 }
6456 
upb_descreader_getdefs(upb_descreader * r,void * owner,int * n)6457 upb_def **upb_descreader_getdefs(upb_descreader *r, void *owner, int *n) {
6458   *n = r->defs.len;
6459   upb_deflist_donaterefs(&r->defs, owner);
6460   return r->defs.defs;
6461 }
6462 
upb_descreader_input(upb_descreader * r)6463 upb_sink *upb_descreader_input(upb_descreader *r) {
6464   return &r->sink;
6465 }
6466 
upb_descreader_newhandlers(const void * owner)6467 const upb_handlers *upb_descreader_newhandlers(const void *owner) {
6468   const upb_symtab *s = upbdefs_google_protobuf_descriptor(&s);
6469   const upb_handlers *h = upb_handlers_newfrozen(
6470       upbdefs_google_protobuf_FileDescriptorSet(s), owner, reghandlers, s);
6471   upb_symtab_unref(s, &s);
6472   return h;
6473 }
6474 /*
6475 ** protobuf decoder bytecode compiler
6476 **
6477 ** Code to compile a upb::Handlers into bytecode for decoding a protobuf
6478 ** according to that specific schema and destination handlers.
6479 **
6480 ** Compiling to bytecode is always the first step.  If we are using the
6481 ** interpreted decoder we leave it as bytecode and interpret that.  If we are
6482 ** using a JIT decoder we use a code generator to turn the bytecode into native
6483 ** code, LLVM IR, etc.
6484 **
6485 ** Bytecode definition is in decoder.int.h.
6486 */
6487 
6488 #include <stdarg.h>
6489 
6490 #ifdef UPB_DUMP_BYTECODE
6491 #include <stdio.h>
6492 #endif
6493 
6494 #define MAXLABEL 5
6495 #define EMPTYLABEL -1
6496 
6497 /* mgroup *********************************************************************/
6498 
freegroup(upb_refcounted * r)6499 static void freegroup(upb_refcounted *r) {
6500   mgroup *g = (mgroup*)r;
6501   upb_inttable_uninit(&g->methods);
6502 #ifdef UPB_USE_JIT_X64
6503   upb_pbdecoder_freejit(g);
6504 #endif
6505   free(g->bytecode);
6506   free(g);
6507 }
6508 
visitgroup(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)6509 static void visitgroup(const upb_refcounted *r, upb_refcounted_visit *visit,
6510                        void *closure) {
6511   const mgroup *g = (const mgroup*)r;
6512   upb_inttable_iter i;
6513   upb_inttable_begin(&i, &g->methods);
6514   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
6515     upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
6516     visit(r, upb_pbdecodermethod_upcast(method), closure);
6517   }
6518 }
6519 
newgroup(const void * owner)6520 mgroup *newgroup(const void *owner) {
6521   mgroup *g = malloc(sizeof(*g));
6522   static const struct upb_refcounted_vtbl vtbl = {visitgroup, freegroup};
6523   upb_refcounted_init(mgroup_upcast_mutable(g), &vtbl, owner);
6524   upb_inttable_init(&g->methods, UPB_CTYPE_PTR);
6525   g->bytecode = NULL;
6526   g->bytecode_end = NULL;
6527   return g;
6528 }
6529 
6530 
6531 /* upb_pbdecodermethod ********************************************************/
6532 
freemethod(upb_refcounted * r)6533 static void freemethod(upb_refcounted *r) {
6534   upb_pbdecodermethod *method = (upb_pbdecodermethod*)r;
6535 
6536   if (method->dest_handlers_) {
6537     upb_handlers_unref(method->dest_handlers_, method);
6538   }
6539 
6540   upb_inttable_uninit(&method->dispatch);
6541   free(method);
6542 }
6543 
visitmethod(const upb_refcounted * r,upb_refcounted_visit * visit,void * closure)6544 static void visitmethod(const upb_refcounted *r, upb_refcounted_visit *visit,
6545                         void *closure) {
6546   const upb_pbdecodermethod *m = (const upb_pbdecodermethod*)r;
6547   visit(r, m->group, closure);
6548 }
6549 
newmethod(const upb_handlers * dest_handlers,mgroup * group)6550 static upb_pbdecodermethod *newmethod(const upb_handlers *dest_handlers,
6551                                       mgroup *group) {
6552   static const struct upb_refcounted_vtbl vtbl = {visitmethod, freemethod};
6553   upb_pbdecodermethod *ret = malloc(sizeof(*ret));
6554   upb_refcounted_init(upb_pbdecodermethod_upcast_mutable(ret), &vtbl, &ret);
6555   upb_byteshandler_init(&ret->input_handler_);
6556 
6557   /* The method references the group and vice-versa, in a circular reference. */
6558   upb_ref2(ret, group);
6559   upb_ref2(group, ret);
6560   upb_inttable_insertptr(&group->methods, dest_handlers, upb_value_ptr(ret));
6561   upb_pbdecodermethod_unref(ret, &ret);
6562 
6563   ret->group = mgroup_upcast_mutable(group);
6564   ret->dest_handlers_ = dest_handlers;
6565   ret->is_native_ = false;  /* If we JIT, it will update this later. */
6566   upb_inttable_init(&ret->dispatch, UPB_CTYPE_UINT64);
6567 
6568   if (ret->dest_handlers_) {
6569     upb_handlers_ref(ret->dest_handlers_, ret);
6570   }
6571   return ret;
6572 }
6573 
upb_pbdecodermethod_desthandlers(const upb_pbdecodermethod * m)6574 const upb_handlers *upb_pbdecodermethod_desthandlers(
6575     const upb_pbdecodermethod *m) {
6576   return m->dest_handlers_;
6577 }
6578 
upb_pbdecodermethod_inputhandler(const upb_pbdecodermethod * m)6579 const upb_byteshandler *upb_pbdecodermethod_inputhandler(
6580     const upb_pbdecodermethod *m) {
6581   return &m->input_handler_;
6582 }
6583 
upb_pbdecodermethod_isnative(const upb_pbdecodermethod * m)6584 bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m) {
6585   return m->is_native_;
6586 }
6587 
upb_pbdecodermethod_new(const upb_pbdecodermethodopts * opts,const void * owner)6588 const upb_pbdecodermethod *upb_pbdecodermethod_new(
6589     const upb_pbdecodermethodopts *opts, const void *owner) {
6590   const upb_pbdecodermethod *ret;
6591   upb_pbcodecache cache;
6592 
6593   upb_pbcodecache_init(&cache);
6594   ret = upb_pbcodecache_getdecodermethod(&cache, opts);
6595   upb_pbdecodermethod_ref(ret, owner);
6596   upb_pbcodecache_uninit(&cache);
6597   return ret;
6598 }
6599 
6600 
6601 /* bytecode compiler **********************************************************/
6602 
6603 /* Data used only at compilation time. */
6604 typedef struct {
6605   mgroup *group;
6606 
6607   uint32_t *pc;
6608   int fwd_labels[MAXLABEL];
6609   int back_labels[MAXLABEL];
6610 
6611   /* For fields marked "lazy", parse them lazily or eagerly? */
6612   bool lazy;
6613 } compiler;
6614 
newcompiler(mgroup * group,bool lazy)6615 static compiler *newcompiler(mgroup *group, bool lazy) {
6616   compiler *ret = malloc(sizeof(*ret));
6617   int i;
6618 
6619   ret->group = group;
6620   ret->lazy = lazy;
6621   for (i = 0; i < MAXLABEL; i++) {
6622     ret->fwd_labels[i] = EMPTYLABEL;
6623     ret->back_labels[i] = EMPTYLABEL;
6624   }
6625   return ret;
6626 }
6627 
freecompiler(compiler * c)6628 static void freecompiler(compiler *c) {
6629   free(c);
6630 }
6631 
6632 const size_t ptr_words = sizeof(void*) / sizeof(uint32_t);
6633 
6634 /* How many words an instruction is. */
instruction_len(uint32_t instr)6635 static int instruction_len(uint32_t instr) {
6636   switch (getop(instr)) {
6637     case OP_SETDISPATCH: return 1 + ptr_words;
6638     case OP_TAGN: return 3;
6639     case OP_SETBIGGROUPNUM: return 2;
6640     default: return 1;
6641   }
6642 }
6643 
op_has_longofs(int32_t instruction)6644 bool op_has_longofs(int32_t instruction) {
6645   switch (getop(instruction)) {
6646     case OP_CALL:
6647     case OP_BRANCH:
6648     case OP_CHECKDELIM:
6649       return true;
6650     /* The "tag" instructions only have 8 bytes available for the jump target,
6651      * but that is ok because these opcodes only require short jumps. */
6652     case OP_TAG1:
6653     case OP_TAG2:
6654     case OP_TAGN:
6655       return false;
6656     default:
6657       assert(false);
6658       return false;
6659   }
6660 }
6661 
getofs(uint32_t instruction)6662 static int32_t getofs(uint32_t instruction) {
6663   if (op_has_longofs(instruction)) {
6664     return (int32_t)instruction >> 8;
6665   } else {
6666     return (int8_t)(instruction >> 8);
6667   }
6668 }
6669 
setofs(uint32_t * instruction,int32_t ofs)6670 static void setofs(uint32_t *instruction, int32_t ofs) {
6671   if (op_has_longofs(*instruction)) {
6672     *instruction = getop(*instruction) | ofs << 8;
6673   } else {
6674     *instruction = (*instruction & ~0xff00) | ((ofs & 0xff) << 8);
6675   }
6676   assert(getofs(*instruction) == ofs);  /* Would fail in cases of overflow. */
6677 }
6678 
pcofs(compiler * c)6679 static uint32_t pcofs(compiler *c) { return c->pc - c->group->bytecode; }
6680 
6681 /* Defines a local label at the current PC location.  All previous forward
6682  * references are updated to point to this location.  The location is noted
6683  * for any future backward references. */
label(compiler * c,unsigned int label)6684 static void label(compiler *c, unsigned int label) {
6685   int val;
6686   uint32_t *codep;
6687 
6688   assert(label < MAXLABEL);
6689   val = c->fwd_labels[label];
6690   codep = (val == EMPTYLABEL) ? NULL : c->group->bytecode + val;
6691   while (codep) {
6692     int ofs = getofs(*codep);
6693     setofs(codep, c->pc - codep - instruction_len(*codep));
6694     codep = ofs ? codep + ofs : NULL;
6695   }
6696   c->fwd_labels[label] = EMPTYLABEL;
6697   c->back_labels[label] = pcofs(c);
6698 }
6699 
6700 /* Creates a reference to a numbered label; either a forward reference
6701  * (positive arg) or backward reference (negative arg).  For forward references
6702  * the value returned now is actually a "next" pointer into a linked list of all
6703  * instructions that use this label and will be patched later when the label is
6704  * defined with label().
6705  *
6706  * The returned value is the offset that should be written into the instruction.
6707  */
labelref(compiler * c,int label)6708 static int32_t labelref(compiler *c, int label) {
6709   assert(label < MAXLABEL);
6710   if (label == LABEL_DISPATCH) {
6711     /* No resolving required. */
6712     return 0;
6713   } else if (label < 0) {
6714     /* Backward local label.  Relative to the next instruction. */
6715     uint32_t from = (c->pc + 1) - c->group->bytecode;
6716     return c->back_labels[-label] - from;
6717   } else {
6718     /* Forward local label: prepend to (possibly-empty) linked list. */
6719     int *lptr = &c->fwd_labels[label];
6720     int32_t ret = (*lptr == EMPTYLABEL) ? 0 : *lptr - pcofs(c);
6721     *lptr = pcofs(c);
6722     return ret;
6723   }
6724 }
6725 
put32(compiler * c,uint32_t v)6726 static void put32(compiler *c, uint32_t v) {
6727   mgroup *g = c->group;
6728   if (c->pc == g->bytecode_end) {
6729     int ofs = pcofs(c);
6730     size_t oldsize = g->bytecode_end - g->bytecode;
6731     size_t newsize = UPB_MAX(oldsize * 2, 64);
6732     /* TODO(haberman): handle OOM. */
6733     g->bytecode = realloc(g->bytecode, newsize * sizeof(uint32_t));
6734     g->bytecode_end = g->bytecode + newsize;
6735     c->pc = g->bytecode + ofs;
6736   }
6737   *c->pc++ = v;
6738 }
6739 
putop(compiler * c,opcode op,...)6740 static void putop(compiler *c, opcode op, ...) {
6741   va_list ap;
6742   va_start(ap, op);
6743 
6744   switch (op) {
6745     case OP_SETDISPATCH: {
6746       uintptr_t ptr = (uintptr_t)va_arg(ap, void*);
6747       put32(c, OP_SETDISPATCH);
6748       put32(c, ptr);
6749       if (sizeof(uintptr_t) > sizeof(uint32_t))
6750         put32(c, (uint64_t)ptr >> 32);
6751       break;
6752     }
6753     case OP_STARTMSG:
6754     case OP_ENDMSG:
6755     case OP_PUSHLENDELIM:
6756     case OP_POP:
6757     case OP_SETDELIM:
6758     case OP_HALT:
6759     case OP_RET:
6760     case OP_DISPATCH:
6761       put32(c, op);
6762       break;
6763     case OP_PARSE_DOUBLE:
6764     case OP_PARSE_FLOAT:
6765     case OP_PARSE_INT64:
6766     case OP_PARSE_UINT64:
6767     case OP_PARSE_INT32:
6768     case OP_PARSE_FIXED64:
6769     case OP_PARSE_FIXED32:
6770     case OP_PARSE_BOOL:
6771     case OP_PARSE_UINT32:
6772     case OP_PARSE_SFIXED32:
6773     case OP_PARSE_SFIXED64:
6774     case OP_PARSE_SINT32:
6775     case OP_PARSE_SINT64:
6776     case OP_STARTSEQ:
6777     case OP_ENDSEQ:
6778     case OP_STARTSUBMSG:
6779     case OP_ENDSUBMSG:
6780     case OP_STARTSTR:
6781     case OP_STRING:
6782     case OP_ENDSTR:
6783     case OP_PUSHTAGDELIM:
6784       put32(c, op | va_arg(ap, upb_selector_t) << 8);
6785       break;
6786     case OP_SETBIGGROUPNUM:
6787       put32(c, op);
6788       put32(c, va_arg(ap, int));
6789       break;
6790     case OP_CALL: {
6791       const upb_pbdecodermethod *method = va_arg(ap, upb_pbdecodermethod *);
6792       put32(c, op | (method->code_base.ofs - (pcofs(c) + 1)) << 8);
6793       break;
6794     }
6795     case OP_CHECKDELIM:
6796     case OP_BRANCH: {
6797       uint32_t instruction = op;
6798       int label = va_arg(ap, int);
6799       setofs(&instruction, labelref(c, label));
6800       put32(c, instruction);
6801       break;
6802     }
6803     case OP_TAG1:
6804     case OP_TAG2: {
6805       int label = va_arg(ap, int);
6806       uint64_t tag = va_arg(ap, uint64_t);
6807       uint32_t instruction = op | (tag << 16);
6808       assert(tag <= 0xffff);
6809       setofs(&instruction, labelref(c, label));
6810       put32(c, instruction);
6811       break;
6812     }
6813     case OP_TAGN: {
6814       int label = va_arg(ap, int);
6815       uint64_t tag = va_arg(ap, uint64_t);
6816       uint32_t instruction = op | (upb_value_size(tag) << 16);
6817       setofs(&instruction, labelref(c, label));
6818       put32(c, instruction);
6819       put32(c, tag);
6820       put32(c, tag >> 32);
6821       break;
6822     }
6823   }
6824 
6825   va_end(ap);
6826 }
6827 
6828 #if defined(UPB_USE_JIT_X64) || defined(UPB_DUMP_BYTECODE)
6829 
upb_pbdecoder_getopname(unsigned int op)6830 const char *upb_pbdecoder_getopname(unsigned int op) {
6831 #define QUOTE(x) #x
6832 #define EXPAND_AND_QUOTE(x) QUOTE(x)
6833 #define OPNAME(x) OP_##x
6834 #define OP(x) case OPNAME(x): return EXPAND_AND_QUOTE(OPNAME(x));
6835 #define T(x) OP(PARSE_##x)
6836   /* Keep in sync with list in decoder.int.h. */
6837   switch ((opcode)op) {
6838     T(DOUBLE) T(FLOAT) T(INT64) T(UINT64) T(INT32) T(FIXED64) T(FIXED32)
6839     T(BOOL) T(UINT32) T(SFIXED32) T(SFIXED64) T(SINT32) T(SINT64)
6840     OP(STARTMSG) OP(ENDMSG) OP(STARTSEQ) OP(ENDSEQ) OP(STARTSUBMSG)
6841     OP(ENDSUBMSG) OP(STARTSTR) OP(STRING) OP(ENDSTR) OP(CALL) OP(RET)
6842     OP(PUSHLENDELIM) OP(PUSHTAGDELIM) OP(SETDELIM) OP(CHECKDELIM)
6843     OP(BRANCH) OP(TAG1) OP(TAG2) OP(TAGN) OP(SETDISPATCH) OP(POP)
6844     OP(SETBIGGROUPNUM) OP(DISPATCH) OP(HALT)
6845   }
6846   return "<unknown op>";
6847 #undef OP
6848 #undef T
6849 }
6850 
6851 #endif
6852 
6853 #ifdef UPB_DUMP_BYTECODE
6854 
dumpbc(uint32_t * p,uint32_t * end,FILE * f)6855 static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) {
6856 
6857   uint32_t *begin = p;
6858 
6859   while (p < end) {
6860     fprintf(f, "%p  %8tx", p, p - begin);
6861     uint32_t instr = *p++;
6862     uint8_t op = getop(instr);
6863     fprintf(f, " %s", upb_pbdecoder_getopname(op));
6864     switch ((opcode)op) {
6865       case OP_SETDISPATCH: {
6866         const upb_inttable *dispatch;
6867         memcpy(&dispatch, p, sizeof(void*));
6868         p += ptr_words;
6869         const upb_pbdecodermethod *method =
6870             (void *)((char *)dispatch -
6871                      offsetof(upb_pbdecodermethod, dispatch));
6872         fprintf(f, " %s", upb_msgdef_fullname(
6873                               upb_handlers_msgdef(method->dest_handlers_)));
6874         break;
6875       }
6876       case OP_DISPATCH:
6877       case OP_STARTMSG:
6878       case OP_ENDMSG:
6879       case OP_PUSHLENDELIM:
6880       case OP_POP:
6881       case OP_SETDELIM:
6882       case OP_HALT:
6883       case OP_RET:
6884         break;
6885       case OP_PARSE_DOUBLE:
6886       case OP_PARSE_FLOAT:
6887       case OP_PARSE_INT64:
6888       case OP_PARSE_UINT64:
6889       case OP_PARSE_INT32:
6890       case OP_PARSE_FIXED64:
6891       case OP_PARSE_FIXED32:
6892       case OP_PARSE_BOOL:
6893       case OP_PARSE_UINT32:
6894       case OP_PARSE_SFIXED32:
6895       case OP_PARSE_SFIXED64:
6896       case OP_PARSE_SINT32:
6897       case OP_PARSE_SINT64:
6898       case OP_STARTSEQ:
6899       case OP_ENDSEQ:
6900       case OP_STARTSUBMSG:
6901       case OP_ENDSUBMSG:
6902       case OP_STARTSTR:
6903       case OP_STRING:
6904       case OP_ENDSTR:
6905       case OP_PUSHTAGDELIM:
6906         fprintf(f, " %d", instr >> 8);
6907         break;
6908       case OP_SETBIGGROUPNUM:
6909         fprintf(f, " %d", *p++);
6910         break;
6911       case OP_CHECKDELIM:
6912       case OP_CALL:
6913       case OP_BRANCH:
6914         fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
6915         break;
6916       case OP_TAG1:
6917       case OP_TAG2: {
6918         fprintf(f, " tag:0x%x", instr >> 16);
6919         if (getofs(instr)) {
6920           fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
6921         }
6922         break;
6923       }
6924       case OP_TAGN: {
6925         uint64_t tag = *p++;
6926         tag |= (uint64_t)*p++ << 32;
6927         fprintf(f, " tag:0x%llx", (long long)tag);
6928         fprintf(f, " n:%d", instr >> 16);
6929         if (getofs(instr)) {
6930           fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
6931         }
6932         break;
6933       }
6934     }
6935     fputs("\n", f);
6936   }
6937 }
6938 
6939 #endif
6940 
get_encoded_tag(const upb_fielddef * f,int wire_type)6941 static uint64_t get_encoded_tag(const upb_fielddef *f, int wire_type) {
6942   uint32_t tag = (upb_fielddef_number(f) << 3) | wire_type;
6943   uint64_t encoded_tag = upb_vencode32(tag);
6944   /* No tag should be greater than 5 bytes. */
6945   assert(encoded_tag <= 0xffffffffff);
6946   return encoded_tag;
6947 }
6948 
putchecktag(compiler * c,const upb_fielddef * f,int wire_type,int dest)6949 static void putchecktag(compiler *c, const upb_fielddef *f,
6950                         int wire_type, int dest) {
6951   uint64_t tag = get_encoded_tag(f, wire_type);
6952   switch (upb_value_size(tag)) {
6953     case 1:
6954       putop(c, OP_TAG1, dest, tag);
6955       break;
6956     case 2:
6957       putop(c, OP_TAG2, dest, tag);
6958       break;
6959     default:
6960       putop(c, OP_TAGN, dest, tag);
6961       break;
6962   }
6963 }
6964 
getsel(const upb_fielddef * f,upb_handlertype_t type)6965 static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) {
6966   upb_selector_t selector;
6967   bool ok = upb_handlers_getselector(f, type, &selector);
6968   UPB_ASSERT_VAR(ok, ok);
6969   return selector;
6970 }
6971 
6972 /* Takes an existing, primary dispatch table entry and repacks it with a
6973  * different alternate wire type.  Called when we are inserting a secondary
6974  * dispatch table entry for an alternate wire type. */
repack(uint64_t dispatch,int new_wt2)6975 static uint64_t repack(uint64_t dispatch, int new_wt2) {
6976   uint64_t ofs;
6977   uint8_t wt1;
6978   uint8_t old_wt2;
6979   upb_pbdecoder_unpackdispatch(dispatch, &ofs, &wt1, &old_wt2);
6980   assert(old_wt2 == NO_WIRE_TYPE);  /* wt2 should not be set yet. */
6981   return upb_pbdecoder_packdispatch(ofs, wt1, new_wt2);
6982 }
6983 
6984 /* Marks the current bytecode position as the dispatch target for this message,
6985  * field, and wire type. */
dispatchtarget(compiler * c,upb_pbdecodermethod * method,const upb_fielddef * f,int wire_type)6986 static void dispatchtarget(compiler *c, upb_pbdecodermethod *method,
6987                            const upb_fielddef *f, int wire_type) {
6988   /* Offset is relative to msg base. */
6989   uint64_t ofs = pcofs(c) - method->code_base.ofs;
6990   uint32_t fn = upb_fielddef_number(f);
6991   upb_inttable *d = &method->dispatch;
6992   upb_value v;
6993   if (upb_inttable_remove(d, fn, &v)) {
6994     /* TODO: prioritize based on packed setting in .proto file. */
6995     uint64_t repacked = repack(upb_value_getuint64(v), wire_type);
6996     upb_inttable_insert(d, fn, upb_value_uint64(repacked));
6997     upb_inttable_insert(d, fn + UPB_MAX_FIELDNUMBER, upb_value_uint64(ofs));
6998   } else {
6999     uint64_t val = upb_pbdecoder_packdispatch(ofs, wire_type, NO_WIRE_TYPE);
7000     upb_inttable_insert(d, fn, upb_value_uint64(val));
7001   }
7002 }
7003 
putpush(compiler * c,const upb_fielddef * f)7004 static void putpush(compiler *c, const upb_fielddef *f) {
7005   if (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE) {
7006     putop(c, OP_PUSHLENDELIM);
7007   } else {
7008     uint32_t fn = upb_fielddef_number(f);
7009     if (fn >= 1 << 24) {
7010       putop(c, OP_PUSHTAGDELIM, 0);
7011       putop(c, OP_SETBIGGROUPNUM, fn);
7012     } else {
7013       putop(c, OP_PUSHTAGDELIM, fn);
7014     }
7015   }
7016 }
7017 
find_submethod(const compiler * c,const upb_pbdecodermethod * method,const upb_fielddef * f)7018 static upb_pbdecodermethod *find_submethod(const compiler *c,
7019                                            const upb_pbdecodermethod *method,
7020                                            const upb_fielddef *f) {
7021   const upb_handlers *sub =
7022       upb_handlers_getsubhandlers(method->dest_handlers_, f);
7023   upb_value v;
7024   return upb_inttable_lookupptr(&c->group->methods, sub, &v)
7025              ? upb_value_getptr(v)
7026              : NULL;
7027 }
7028 
putsel(compiler * c,opcode op,upb_selector_t sel,const upb_handlers * h)7029 static void putsel(compiler *c, opcode op, upb_selector_t sel,
7030                    const upb_handlers *h) {
7031   if (upb_handlers_gethandler(h, sel)) {
7032     putop(c, op, sel);
7033   }
7034 }
7035 
7036 /* Puts an opcode to call a callback, but only if a callback actually exists for
7037  * this field and handler type. */
maybeput(compiler * c,opcode op,const upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)7038 static void maybeput(compiler *c, opcode op, const upb_handlers *h,
7039                      const upb_fielddef *f, upb_handlertype_t type) {
7040   putsel(c, op, getsel(f, type), h);
7041 }
7042 
haslazyhandlers(const upb_handlers * h,const upb_fielddef * f)7043 static bool haslazyhandlers(const upb_handlers *h, const upb_fielddef *f) {
7044   if (!upb_fielddef_lazy(f))
7045     return false;
7046 
7047   return upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STARTSTR)) ||
7048          upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STRING)) ||
7049          upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_ENDSTR));
7050 }
7051 
7052 
7053 /* bytecode compiler code generation ******************************************/
7054 
7055 /* Symbolic names for our local labels. */
7056 #define LABEL_LOOPSTART 1  /* Top of a repeated field loop. */
7057 #define LABEL_LOOPBREAK 2  /* To jump out of a repeated loop */
7058 #define LABEL_FIELD     3  /* Jump backward to find the most recent field. */
7059 #define LABEL_ENDMSG    4  /* To reach the OP_ENDMSG instr for this msg. */
7060 
7061 /* Generates bytecode to parse a single non-lazy message field. */
generate_msgfield(compiler * c,const upb_fielddef * f,upb_pbdecodermethod * method)7062 static void generate_msgfield(compiler *c, const upb_fielddef *f,
7063                               upb_pbdecodermethod *method) {
7064   const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
7065   const upb_pbdecodermethod *sub_m = find_submethod(c, method, f);
7066   int wire_type;
7067 
7068   if (!sub_m) {
7069     /* Don't emit any code for this field at all; it will be parsed as an
7070      * unknown field. */
7071     return;
7072   }
7073 
7074   label(c, LABEL_FIELD);
7075 
7076   wire_type =
7077       (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE)
7078           ? UPB_WIRE_TYPE_DELIMITED
7079           : UPB_WIRE_TYPE_START_GROUP;
7080 
7081   if (upb_fielddef_isseq(f)) {
7082     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7083     putchecktag(c, f, wire_type, LABEL_DISPATCH);
7084    dispatchtarget(c, method, f, wire_type);
7085     putop(c, OP_PUSHTAGDELIM, 0);
7086     putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
7087    label(c, LABEL_LOOPSTART);
7088     putpush(c, f);
7089     putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
7090     putop(c, OP_CALL, sub_m);
7091     putop(c, OP_POP);
7092     maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
7093     if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
7094       putop(c, OP_SETDELIM);
7095     }
7096     putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
7097     putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
7098     putop(c, OP_BRANCH, -LABEL_LOOPSTART);
7099    label(c, LABEL_LOOPBREAK);
7100     putop(c, OP_POP);
7101     maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
7102   } else {
7103     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7104     putchecktag(c, f, wire_type, LABEL_DISPATCH);
7105    dispatchtarget(c, method, f, wire_type);
7106     putpush(c, f);
7107     putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
7108     putop(c, OP_CALL, sub_m);
7109     putop(c, OP_POP);
7110     maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
7111     if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
7112       putop(c, OP_SETDELIM);
7113     }
7114   }
7115 }
7116 
7117 /* Generates bytecode to parse a single string or lazy submessage field. */
generate_delimfield(compiler * c,const upb_fielddef * f,upb_pbdecodermethod * method)7118 static void generate_delimfield(compiler *c, const upb_fielddef *f,
7119                                 upb_pbdecodermethod *method) {
7120   const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
7121 
7122   label(c, LABEL_FIELD);
7123   if (upb_fielddef_isseq(f)) {
7124     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7125     putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
7126    dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
7127     putop(c, OP_PUSHTAGDELIM, 0);
7128     putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
7129    label(c, LABEL_LOOPSTART);
7130     putop(c, OP_PUSHLENDELIM);
7131     putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
7132     /* Need to emit even if no handler to skip past the string. */
7133     putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
7134     putop(c, OP_POP);
7135     maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
7136     putop(c, OP_SETDELIM);
7137     putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
7138     putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_LOOPBREAK);
7139     putop(c, OP_BRANCH, -LABEL_LOOPSTART);
7140    label(c, LABEL_LOOPBREAK);
7141     putop(c, OP_POP);
7142     maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
7143   } else {
7144     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7145     putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
7146    dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
7147     putop(c, OP_PUSHLENDELIM);
7148     putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
7149     putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
7150     putop(c, OP_POP);
7151     maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
7152     putop(c, OP_SETDELIM);
7153   }
7154 }
7155 
7156 /* Generates bytecode to parse a single primitive field. */
generate_primitivefield(compiler * c,const upb_fielddef * f,upb_pbdecodermethod * method)7157 static void generate_primitivefield(compiler *c, const upb_fielddef *f,
7158                                     upb_pbdecodermethod *method) {
7159   const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
7160   upb_descriptortype_t descriptor_type = upb_fielddef_descriptortype(f);
7161   opcode parse_type;
7162   upb_selector_t sel;
7163   int wire_type;
7164 
7165   label(c, LABEL_FIELD);
7166 
7167   /* From a decoding perspective, ENUM is the same as INT32. */
7168   if (descriptor_type == UPB_DESCRIPTOR_TYPE_ENUM)
7169     descriptor_type = UPB_DESCRIPTOR_TYPE_INT32;
7170 
7171   parse_type = (opcode)descriptor_type;
7172 
7173   /* TODO(haberman): generate packed or non-packed first depending on "packed"
7174    * setting in the fielddef.  This will favor (in speed) whichever was
7175    * specified. */
7176 
7177   assert((int)parse_type >= 0 && parse_type <= OP_MAX);
7178   sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
7179   wire_type = upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
7180   if (upb_fielddef_isseq(f)) {
7181     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7182     putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
7183    dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
7184     putop(c, OP_PUSHLENDELIM);
7185     putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));  /* Packed */
7186    label(c, LABEL_LOOPSTART);
7187     putop(c, parse_type, sel);
7188     putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
7189     putop(c, OP_BRANCH, -LABEL_LOOPSTART);
7190    dispatchtarget(c, method, f, wire_type);
7191     putop(c, OP_PUSHTAGDELIM, 0);
7192     putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));  /* Non-packed */
7193    label(c, LABEL_LOOPSTART);
7194     putop(c, parse_type, sel);
7195     putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
7196     putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
7197     putop(c, OP_BRANCH, -LABEL_LOOPSTART);
7198    label(c, LABEL_LOOPBREAK);
7199     putop(c, OP_POP);  /* Packed and non-packed join. */
7200     maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
7201     putop(c, OP_SETDELIM);  /* Could remove for non-packed by dup ENDSEQ. */
7202   } else {
7203     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7204     putchecktag(c, f, wire_type, LABEL_DISPATCH);
7205    dispatchtarget(c, method, f, wire_type);
7206     putop(c, parse_type, sel);
7207   }
7208 }
7209 
7210 /* Adds bytecode for parsing the given message to the given decoderplan,
7211  * while adding all dispatch targets to this message's dispatch table. */
compile_method(compiler * c,upb_pbdecodermethod * method)7212 static void compile_method(compiler *c, upb_pbdecodermethod *method) {
7213   const upb_handlers *h;
7214   const upb_msgdef *md;
7215   uint32_t* start_pc;
7216   upb_msg_field_iter i;
7217   upb_value val;
7218 
7219   assert(method);
7220 
7221   /* Clear all entries in the dispatch table. */
7222   upb_inttable_uninit(&method->dispatch);
7223   upb_inttable_init(&method->dispatch, UPB_CTYPE_UINT64);
7224 
7225   h = upb_pbdecodermethod_desthandlers(method);
7226   md = upb_handlers_msgdef(h);
7227 
7228  method->code_base.ofs = pcofs(c);
7229   putop(c, OP_SETDISPATCH, &method->dispatch);
7230   putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h);
7231  label(c, LABEL_FIELD);
7232   start_pc = c->pc;
7233   for(upb_msg_field_begin(&i, md);
7234       !upb_msg_field_done(&i);
7235       upb_msg_field_next(&i)) {
7236     const upb_fielddef *f = upb_msg_iter_field(&i);
7237     upb_fieldtype_t type = upb_fielddef_type(f);
7238 
7239     if (type == UPB_TYPE_MESSAGE && !(haslazyhandlers(h, f) && c->lazy)) {
7240       generate_msgfield(c, f, method);
7241     } else if (type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES ||
7242                type == UPB_TYPE_MESSAGE) {
7243       generate_delimfield(c, f, method);
7244     } else {
7245       generate_primitivefield(c, f, method);
7246     }
7247   }
7248 
7249   /* If there were no fields, or if no handlers were defined, we need to
7250    * generate a non-empty loop body so that we can at least dispatch for unknown
7251    * fields and check for the end of the message. */
7252   if (c->pc == start_pc) {
7253     /* Check for end-of-message. */
7254     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
7255     /* Unconditionally dispatch. */
7256     putop(c, OP_DISPATCH, 0);
7257   }
7258 
7259   /* For now we just loop back to the last field of the message (or if none,
7260    * the DISPATCH opcode for the message). */
7261   putop(c, OP_BRANCH, -LABEL_FIELD);
7262 
7263   /* Insert both a label and a dispatch table entry for this end-of-msg. */
7264  label(c, LABEL_ENDMSG);
7265   val = upb_value_uint64(pcofs(c) - method->code_base.ofs);
7266   upb_inttable_insert(&method->dispatch, DISPATCH_ENDMSG, val);
7267 
7268   putsel(c, OP_ENDMSG, UPB_ENDMSG_SELECTOR, h);
7269   putop(c, OP_RET);
7270 
7271   upb_inttable_compact(&method->dispatch);
7272 }
7273 
7274 /* Populate "methods" with new upb_pbdecodermethod objects reachable from "h".
7275  * Returns the method for these handlers.
7276  *
7277  * Generates a new method for every destination handlers reachable from "h". */
find_methods(compiler * c,const upb_handlers * h)7278 static void find_methods(compiler *c, const upb_handlers *h) {
7279   upb_value v;
7280   upb_msg_field_iter i;
7281   const upb_msgdef *md;
7282 
7283   if (upb_inttable_lookupptr(&c->group->methods, h, &v))
7284     return;
7285   newmethod(h, c->group);
7286 
7287   /* Find submethods. */
7288   md = upb_handlers_msgdef(h);
7289   for(upb_msg_field_begin(&i, md);
7290       !upb_msg_field_done(&i);
7291       upb_msg_field_next(&i)) {
7292     const upb_fielddef *f = upb_msg_iter_field(&i);
7293     const upb_handlers *sub_h;
7294     if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
7295         (sub_h = upb_handlers_getsubhandlers(h, f)) != NULL) {
7296       /* We only generate a decoder method for submessages with handlers.
7297        * Others will be parsed as unknown fields. */
7298       find_methods(c, sub_h);
7299     }
7300   }
7301 }
7302 
7303 /* (Re-)compile bytecode for all messages in "msgs."
7304  * Overwrites any existing bytecode in "c". */
compile_methods(compiler * c)7305 static void compile_methods(compiler *c) {
7306   upb_inttable_iter i;
7307 
7308   /* Start over at the beginning of the bytecode. */
7309   c->pc = c->group->bytecode;
7310 
7311   upb_inttable_begin(&i, &c->group->methods);
7312   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
7313     upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
7314     compile_method(c, method);
7315   }
7316 }
7317 
set_bytecode_handlers(mgroup * g)7318 static void set_bytecode_handlers(mgroup *g) {
7319   upb_inttable_iter i;
7320   upb_inttable_begin(&i, &g->methods);
7321   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
7322     upb_pbdecodermethod *m = upb_value_getptr(upb_inttable_iter_value(&i));
7323     upb_byteshandler *h = &m->input_handler_;
7324 
7325     m->code_base.ptr = g->bytecode + m->code_base.ofs;
7326 
7327     upb_byteshandler_setstartstr(h, upb_pbdecoder_startbc, m->code_base.ptr);
7328     upb_byteshandler_setstring(h, upb_pbdecoder_decode, g);
7329     upb_byteshandler_setendstr(h, upb_pbdecoder_end, m);
7330   }
7331 }
7332 
7333 
7334 /* JIT setup. *****************************************************************/
7335 
7336 #ifdef UPB_USE_JIT_X64
7337 
sethandlers(mgroup * g,bool allowjit)7338 static void sethandlers(mgroup *g, bool allowjit) {
7339   g->jit_code = NULL;
7340   if (allowjit) {
7341     /* Compile byte-code into machine code, create handlers. */
7342     upb_pbdecoder_jit(g);
7343   } else {
7344     set_bytecode_handlers(g);
7345   }
7346 }
7347 
7348 #else  /* UPB_USE_JIT_X64 */
7349 
sethandlers(mgroup * g,bool allowjit)7350 static void sethandlers(mgroup *g, bool allowjit) {
7351   /* No JIT compiled in; use bytecode handlers unconditionally. */
7352   UPB_UNUSED(allowjit);
7353   set_bytecode_handlers(g);
7354 }
7355 
7356 #endif  /* UPB_USE_JIT_X64 */
7357 
7358 
7359 /* TODO(haberman): allow this to be constructed for an arbitrary set of dest
7360  * handlers and other mgroups (but verify we have a transitive closure). */
mgroup_new(const upb_handlers * dest,bool allowjit,bool lazy,const void * owner)7361 const mgroup *mgroup_new(const upb_handlers *dest, bool allowjit, bool lazy,
7362                          const void *owner) {
7363   mgroup *g;
7364   compiler *c;
7365 
7366   UPB_UNUSED(allowjit);
7367   assert(upb_handlers_isfrozen(dest));
7368 
7369   g = newgroup(owner);
7370   c = newcompiler(g, lazy);
7371   find_methods(c, dest);
7372 
7373   /* We compile in two passes:
7374    * 1. all messages are assigned relative offsets from the beginning of the
7375    *    bytecode (saved in method->code_base).
7376    * 2. forwards OP_CALL instructions can be correctly linked since message
7377    *    offsets have been previously assigned.
7378    *
7379    * Could avoid the second pass by linking OP_CALL instructions somehow. */
7380   compile_methods(c);
7381   compile_methods(c);
7382   g->bytecode_end = c->pc;
7383   freecompiler(c);
7384 
7385 #ifdef UPB_DUMP_BYTECODE
7386   {
7387     FILE *f = fopen("/tmp/upb-bytecode", "wb");
7388     assert(f);
7389     dumpbc(g->bytecode, g->bytecode_end, stderr);
7390     dumpbc(g->bytecode, g->bytecode_end, f);
7391     fclose(f);
7392   }
7393 #endif
7394 
7395   sethandlers(g, allowjit);
7396   return g;
7397 }
7398 
7399 
7400 /* upb_pbcodecache ************************************************************/
7401 
upb_pbcodecache_init(upb_pbcodecache * c)7402 void upb_pbcodecache_init(upb_pbcodecache *c) {
7403   upb_inttable_init(&c->groups, UPB_CTYPE_CONSTPTR);
7404   c->allow_jit_ = true;
7405 }
7406 
upb_pbcodecache_uninit(upb_pbcodecache * c)7407 void upb_pbcodecache_uninit(upb_pbcodecache *c) {
7408   upb_inttable_iter i;
7409   upb_inttable_begin(&i, &c->groups);
7410   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
7411     const mgroup *group = upb_value_getconstptr(upb_inttable_iter_value(&i));
7412     mgroup_unref(group, c);
7413   }
7414   upb_inttable_uninit(&c->groups);
7415 }
7416 
upb_pbcodecache_allowjit(const upb_pbcodecache * c)7417 bool upb_pbcodecache_allowjit(const upb_pbcodecache *c) {
7418   return c->allow_jit_;
7419 }
7420 
upb_pbcodecache_setallowjit(upb_pbcodecache * c,bool allow)7421 bool upb_pbcodecache_setallowjit(upb_pbcodecache *c, bool allow) {
7422   if (upb_inttable_count(&c->groups) > 0)
7423     return false;
7424   c->allow_jit_ = allow;
7425   return true;
7426 }
7427 
upb_pbcodecache_getdecodermethod(upb_pbcodecache * c,const upb_pbdecodermethodopts * opts)7428 const upb_pbdecodermethod *upb_pbcodecache_getdecodermethod(
7429     upb_pbcodecache *c, const upb_pbdecodermethodopts *opts) {
7430   upb_value v;
7431   bool ok;
7432 
7433   /* Right now we build a new DecoderMethod every time.
7434    * TODO(haberman): properly cache methods by their true key. */
7435   const mgroup *g = mgroup_new(opts->handlers, c->allow_jit_, opts->lazy, c);
7436   upb_inttable_push(&c->groups, upb_value_constptr(g));
7437 
7438   ok = upb_inttable_lookupptr(&g->methods, opts->handlers, &v);
7439   UPB_ASSERT_VAR(ok, ok);
7440   return upb_value_getptr(v);
7441 }
7442 
7443 
7444 /* upb_pbdecodermethodopts ****************************************************/
7445 
upb_pbdecodermethodopts_init(upb_pbdecodermethodopts * opts,const upb_handlers * h)7446 void upb_pbdecodermethodopts_init(upb_pbdecodermethodopts *opts,
7447                                   const upb_handlers *h) {
7448   opts->handlers = h;
7449   opts->lazy = false;
7450 }
7451 
upb_pbdecodermethodopts_setlazy(upb_pbdecodermethodopts * opts,bool lazy)7452 void upb_pbdecodermethodopts_setlazy(upb_pbdecodermethodopts *opts, bool lazy) {
7453   opts->lazy = lazy;
7454 }
7455 /*
7456 ** upb::Decoder (Bytecode Decoder VM)
7457 **
7458 ** Bytecode must previously have been generated using the bytecode compiler in
7459 ** compile_decoder.c.  This decoder then walks through the bytecode op-by-op to
7460 ** parse the input.
7461 **
7462 ** Decoding is fully resumable; we just keep a pointer to the current bytecode
7463 ** instruction and resume from there.  A fair amount of the logic here is to
7464 ** handle the fact that values can span buffer seams and we have to be able to
7465 ** be capable of suspending/resuming from any byte in the stream.  This
7466 ** sometimes requires keeping a few trailing bytes from the last buffer around
7467 ** in the "residual" buffer.
7468 */
7469 
7470 #include <inttypes.h>
7471 #include <stddef.h>
7472 
7473 #ifdef UPB_DUMP_BYTECODE
7474 #include <stdio.h>
7475 #endif
7476 
7477 #define CHECK_SUSPEND(x) if (!(x)) return upb_pbdecoder_suspend(d);
7478 
7479 /* Error messages that are shared between the bytecode and JIT decoders. */
7480 const char *kPbDecoderStackOverflow = "Nesting too deep.";
7481 const char *kPbDecoderSubmessageTooLong =
7482     "Submessage end extends past enclosing submessage.";
7483 
7484 /* Error messages shared within this file. */
7485 static const char *kUnterminatedVarint = "Unterminated varint.";
7486 
7487 /* upb_pbdecoder **************************************************************/
7488 
7489 static opcode halt = OP_HALT;
7490 
7491 /* Whether an op consumes any of the input buffer. */
consumes_input(opcode op)7492 static bool consumes_input(opcode op) {
7493   switch (op) {
7494     case OP_SETDISPATCH:
7495     case OP_STARTMSG:
7496     case OP_ENDMSG:
7497     case OP_STARTSEQ:
7498     case OP_ENDSEQ:
7499     case OP_STARTSUBMSG:
7500     case OP_ENDSUBMSG:
7501     case OP_STARTSTR:
7502     case OP_ENDSTR:
7503     case OP_PUSHTAGDELIM:
7504     case OP_POP:
7505     case OP_SETDELIM:
7506     case OP_SETBIGGROUPNUM:
7507     case OP_CHECKDELIM:
7508     case OP_CALL:
7509     case OP_RET:
7510     case OP_BRANCH:
7511       return false;
7512     default:
7513       return true;
7514   }
7515 }
7516 
stacksize(upb_pbdecoder * d,size_t entries)7517 static size_t stacksize(upb_pbdecoder *d, size_t entries) {
7518   UPB_UNUSED(d);
7519   return entries * sizeof(upb_pbdecoder_frame);
7520 }
7521 
callstacksize(upb_pbdecoder * d,size_t entries)7522 static size_t callstacksize(upb_pbdecoder *d, size_t entries) {
7523   UPB_UNUSED(d);
7524 
7525 #ifdef UPB_USE_JIT_X64
7526   if (d->method_->is_native_) {
7527     /* Each native stack frame needs two pointers, plus we need a few frames for
7528      * the enter/exit trampolines. */
7529     size_t ret = entries * sizeof(void*) * 2;
7530     ret += sizeof(void*) * 10;
7531     return ret;
7532   }
7533 #endif
7534 
7535   return entries * sizeof(uint32_t*);
7536 }
7537 
7538 
7539 static bool in_residual_buf(const upb_pbdecoder *d, const char *p);
7540 
7541 /* It's unfortunate that we have to micro-manage the compiler with
7542  * UPB_FORCEINLINE and UPB_NOINLINE, especially since this tuning is necessarily
7543  * specific to one hardware configuration.  But empirically on a Core i7,
7544  * performance increases 30-50% with these annotations.  Every instance where
7545  * these appear, gcc 4.2.1 made the wrong decision and degraded performance in
7546  * benchmarks. */
7547 
seterr(upb_pbdecoder * d,const char * msg)7548 static void seterr(upb_pbdecoder *d, const char *msg) {
7549   upb_status status = UPB_STATUS_INIT;
7550   upb_status_seterrmsg(&status, msg);
7551   upb_env_reporterror(d->env, &status);
7552 }
7553 
upb_pbdecoder_seterr(upb_pbdecoder * d,const char * msg)7554 void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg) {
7555   seterr(d, msg);
7556 }
7557 
7558 
7559 /* Buffering ******************************************************************/
7560 
7561 /* We operate on one buffer at a time, which is either the user's buffer passed
7562  * to our "decode" callback or some residual bytes from the previous buffer. */
7563 
7564 /* How many bytes can be safely read from d->ptr without reading past end-of-buf
7565  * or past the current delimited end. */
curbufleft(const upb_pbdecoder * d)7566 static size_t curbufleft(const upb_pbdecoder *d) {
7567   assert(d->data_end >= d->ptr);
7568   return d->data_end - d->ptr;
7569 }
7570 
7571 /* How many bytes are available before end-of-buffer. */
bufleft(const upb_pbdecoder * d)7572 static size_t bufleft(const upb_pbdecoder *d) {
7573   return d->end - d->ptr;
7574 }
7575 
7576 /* Overall stream offset of d->ptr. */
offset(const upb_pbdecoder * d)7577 uint64_t offset(const upb_pbdecoder *d) {
7578   return d->bufstart_ofs + (d->ptr - d->buf);
7579 }
7580 
7581 /* How many bytes are available before the end of this delimited region. */
delim_remaining(const upb_pbdecoder * d)7582 size_t delim_remaining(const upb_pbdecoder *d) {
7583   return d->top->end_ofs - offset(d);
7584 }
7585 
7586 /* Advances d->ptr. */
advance(upb_pbdecoder * d,size_t len)7587 static void advance(upb_pbdecoder *d, size_t len) {
7588   assert(curbufleft(d) >= len);
7589   d->ptr += len;
7590 }
7591 
in_buf(const char * p,const char * buf,const char * end)7592 static bool in_buf(const char *p, const char *buf, const char *end) {
7593   return p >= buf && p <= end;
7594 }
7595 
in_residual_buf(const upb_pbdecoder * d,const char * p)7596 static bool in_residual_buf(const upb_pbdecoder *d, const char *p) {
7597   return in_buf(p, d->residual, d->residual_end);
7598 }
7599 
7600 /* Calculates the delim_end value, which is affected by both the current buffer
7601  * and the parsing stack, so must be called whenever either is updated. */
set_delim_end(upb_pbdecoder * d)7602 static void set_delim_end(upb_pbdecoder *d) {
7603   size_t delim_ofs = d->top->end_ofs - d->bufstart_ofs;
7604   if (delim_ofs <= (size_t)(d->end - d->buf)) {
7605     d->delim_end = d->buf + delim_ofs;
7606     d->data_end = d->delim_end;
7607   } else {
7608     d->data_end = d->end;
7609     d->delim_end = NULL;
7610   }
7611 }
7612 
switchtobuf(upb_pbdecoder * d,const char * buf,const char * end)7613 static void switchtobuf(upb_pbdecoder *d, const char *buf, const char *end) {
7614   d->ptr = buf;
7615   d->buf = buf;
7616   d->end = end;
7617   set_delim_end(d);
7618 }
7619 
advancetobuf(upb_pbdecoder * d,const char * buf,size_t len)7620 static void advancetobuf(upb_pbdecoder *d, const char *buf, size_t len) {
7621   assert(curbufleft(d) == 0);
7622   d->bufstart_ofs += (d->end - d->buf);
7623   switchtobuf(d, buf, buf + len);
7624 }
7625 
checkpoint(upb_pbdecoder * d)7626 static void checkpoint(upb_pbdecoder *d) {
7627   /* The assertion here is in the interests of efficiency, not correctness.
7628    * We are trying to ensure that we don't checkpoint() more often than
7629    * necessary. */
7630   assert(d->checkpoint != d->ptr);
7631   d->checkpoint = d->ptr;
7632 }
7633 
7634 /* Skips "bytes" bytes in the stream, which may be more than available.  If we
7635  * skip more bytes than are available, we return a long read count to the caller
7636  * indicating how many bytes can be skipped over before passing actual data
7637  * again.  Skipped bytes can pass a NULL buffer and the decoder guarantees they
7638  * won't actually be read.
7639  */
skip(upb_pbdecoder * d,size_t bytes)7640 static int32_t skip(upb_pbdecoder *d, size_t bytes) {
7641   assert(!in_residual_buf(d, d->ptr) || d->size_param == 0);
7642   assert(d->skip == 0);
7643   if (bytes > delim_remaining(d)) {
7644     seterr(d, "Skipped value extended beyond enclosing submessage.");
7645     return upb_pbdecoder_suspend(d);
7646   } else if (bufleft(d) > bytes) {
7647     /* Skipped data is all in current buffer, and more is still available. */
7648     advance(d, bytes);
7649     d->skip = 0;
7650     return DECODE_OK;
7651   } else {
7652     /* Skipped data extends beyond currently available buffers. */
7653     d->pc = d->last;
7654     d->skip = bytes - curbufleft(d);
7655     d->bufstart_ofs += (d->end - d->buf);
7656     d->residual_end = d->residual;
7657     switchtobuf(d, d->residual, d->residual_end);
7658     return d->size_param + d->skip;
7659   }
7660 }
7661 
7662 
7663 /* Resumes the decoder from an initial state or from a previous suspend. */
upb_pbdecoder_resume(upb_pbdecoder * d,void * p,const char * buf,size_t size,const upb_bufhandle * handle)7664 int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
7665                              size_t size, const upb_bufhandle *handle) {
7666   UPB_UNUSED(p);  /* Useless; just for the benefit of the JIT. */
7667 
7668   d->buf_param = buf;
7669   d->size_param = size;
7670   d->handle = handle;
7671 
7672   if (d->residual_end > d->residual) {
7673     /* We have residual bytes from the last buffer. */
7674     assert(d->ptr == d->residual);
7675   } else {
7676     switchtobuf(d, buf, buf + size);
7677   }
7678 
7679   d->checkpoint = d->ptr;
7680 
7681   if (d->skip) {
7682     size_t skip_bytes = d->skip;
7683     d->skip = 0;
7684     CHECK_RETURN(skip(d, skip_bytes));
7685     d->checkpoint = d->ptr;
7686   }
7687 
7688   if (!buf) {
7689     /* NULL buf is ok if its entire span is covered by the "skip" above, but
7690      * by this point we know that "skip" doesn't cover the buffer. */
7691     seterr(d, "Passed NULL buffer over non-skippable region.");
7692     return upb_pbdecoder_suspend(d);
7693   }
7694 
7695   if (d->top->groupnum < 0) {
7696     CHECK_RETURN(upb_pbdecoder_skipunknown(d, -1, 0));
7697     d->checkpoint = d->ptr;
7698   }
7699 
7700   return DECODE_OK;
7701 }
7702 
7703 /* Suspends the decoder at the last checkpoint, without saving any residual
7704  * bytes.  If there are any unconsumed bytes, returns a short byte count. */
upb_pbdecoder_suspend(upb_pbdecoder * d)7705 size_t upb_pbdecoder_suspend(upb_pbdecoder *d) {
7706   d->pc = d->last;
7707   if (d->checkpoint == d->residual) {
7708     /* Checkpoint was in residual buf; no user bytes were consumed. */
7709     d->ptr = d->residual;
7710     return 0;
7711   } else {
7712     size_t consumed;
7713     assert(!in_residual_buf(d, d->checkpoint));
7714     assert(d->buf == d->buf_param);
7715 
7716     consumed = d->checkpoint - d->buf;
7717     d->bufstart_ofs += consumed;
7718     d->residual_end = d->residual;
7719     switchtobuf(d, d->residual, d->residual_end);
7720     return consumed;
7721   }
7722 }
7723 
7724 /* Suspends the decoder at the last checkpoint, and saves any unconsumed
7725  * bytes in our residual buffer.  This is necessary if we need more user
7726  * bytes to form a complete value, which might not be contiguous in the
7727  * user's buffers.  Always consumes all user bytes. */
suspend_save(upb_pbdecoder * d)7728 static size_t suspend_save(upb_pbdecoder *d) {
7729   /* We hit end-of-buffer before we could parse a full value.
7730    * Save any unconsumed bytes (if any) to the residual buffer. */
7731   d->pc = d->last;
7732 
7733   if (d->checkpoint == d->residual) {
7734     /* Checkpoint was in residual buf; append user byte(s) to residual buf. */
7735     assert((d->residual_end - d->residual) + d->size_param <=
7736            sizeof(d->residual));
7737     if (!in_residual_buf(d, d->ptr)) {
7738       d->bufstart_ofs -= (d->residual_end - d->residual);
7739     }
7740     memcpy(d->residual_end, d->buf_param, d->size_param);
7741     d->residual_end += d->size_param;
7742   } else {
7743     /* Checkpoint was in user buf; old residual bytes not needed. */
7744     size_t save;
7745     assert(!in_residual_buf(d, d->checkpoint));
7746 
7747     d->ptr = d->checkpoint;
7748     save = curbufleft(d);
7749     assert(save <= sizeof(d->residual));
7750     memcpy(d->residual, d->ptr, save);
7751     d->residual_end = d->residual + save;
7752     d->bufstart_ofs = offset(d);
7753   }
7754 
7755   switchtobuf(d, d->residual, d->residual_end);
7756   return d->size_param;
7757 }
7758 
7759 /* Copies the next "bytes" bytes into "buf" and advances the stream.
7760  * Requires that this many bytes are available in the current buffer. */
consumebytes(upb_pbdecoder * d,void * buf,size_t bytes)7761 UPB_FORCEINLINE static void consumebytes(upb_pbdecoder *d, void *buf,
7762                                          size_t bytes) {
7763   assert(bytes <= curbufleft(d));
7764   memcpy(buf, d->ptr, bytes);
7765   advance(d, bytes);
7766 }
7767 
7768 /* Slow path for getting the next "bytes" bytes, regardless of whether they are
7769  * available in the current buffer or not.  Returns a status code as described
7770  * in decoder.int.h. */
getbytes_slow(upb_pbdecoder * d,void * buf,size_t bytes)7771 UPB_NOINLINE static int32_t getbytes_slow(upb_pbdecoder *d, void *buf,
7772                                           size_t bytes) {
7773   const size_t avail = curbufleft(d);
7774   consumebytes(d, buf, avail);
7775   bytes -= avail;
7776   assert(bytes > 0);
7777   if (in_residual_buf(d, d->ptr)) {
7778     advancetobuf(d, d->buf_param, d->size_param);
7779   }
7780   if (curbufleft(d) >= bytes) {
7781     consumebytes(d, (char *)buf + avail, bytes);
7782     return DECODE_OK;
7783   } else if (d->data_end == d->delim_end) {
7784     seterr(d, "Submessage ended in the middle of a value or group");
7785     return upb_pbdecoder_suspend(d);
7786   } else {
7787     return suspend_save(d);
7788   }
7789 }
7790 
7791 /* Gets the next "bytes" bytes, regardless of whether they are available in the
7792  * current buffer or not.  Returns a status code as described in decoder.int.h.
7793  */
getbytes(upb_pbdecoder * d,void * buf,size_t bytes)7794 UPB_FORCEINLINE static int32_t getbytes(upb_pbdecoder *d, void *buf,
7795                                         size_t bytes) {
7796   if (curbufleft(d) >= bytes) {
7797     /* Buffer has enough data to satisfy. */
7798     consumebytes(d, buf, bytes);
7799     return DECODE_OK;
7800   } else {
7801     return getbytes_slow(d, buf, bytes);
7802   }
7803 }
7804 
peekbytes_slow(upb_pbdecoder * d,void * buf,size_t bytes)7805 UPB_NOINLINE static size_t peekbytes_slow(upb_pbdecoder *d, void *buf,
7806                                           size_t bytes) {
7807   size_t ret = curbufleft(d);
7808   memcpy(buf, d->ptr, ret);
7809   if (in_residual_buf(d, d->ptr)) {
7810     size_t copy = UPB_MIN(bytes - ret, d->size_param);
7811     memcpy((char *)buf + ret, d->buf_param, copy);
7812     ret += copy;
7813   }
7814   return ret;
7815 }
7816 
peekbytes(upb_pbdecoder * d,void * buf,size_t bytes)7817 UPB_FORCEINLINE static size_t peekbytes(upb_pbdecoder *d, void *buf,
7818                                         size_t bytes) {
7819   if (curbufleft(d) >= bytes) {
7820     memcpy(buf, d->ptr, bytes);
7821     return bytes;
7822   } else {
7823     return peekbytes_slow(d, buf, bytes);
7824   }
7825 }
7826 
7827 
7828 /* Decoding of wire types *****************************************************/
7829 
7830 /* Slow path for decoding a varint from the current buffer position.
7831  * Returns a status code as described in decoder.int.h. */
upb_pbdecoder_decode_varint_slow(upb_pbdecoder * d,uint64_t * u64)7832 UPB_NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d,
7833                                                       uint64_t *u64) {
7834   uint8_t byte = 0x80;
7835   int bitpos;
7836   *u64 = 0;
7837   for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) {
7838     int32_t ret = getbytes(d, &byte, 1);
7839     if (ret >= 0) return ret;
7840     *u64 |= (uint64_t)(byte & 0x7F) << bitpos;
7841   }
7842   if(bitpos == 70 && (byte & 0x80)) {
7843     seterr(d, kUnterminatedVarint);
7844     return upb_pbdecoder_suspend(d);
7845   }
7846   return DECODE_OK;
7847 }
7848 
7849 /* Decodes a varint from the current buffer position.
7850  * Returns a status code as described in decoder.int.h. */
decode_varint(upb_pbdecoder * d,uint64_t * u64)7851 UPB_FORCEINLINE static int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) {
7852   if (curbufleft(d) > 0 && !(*d->ptr & 0x80)) {
7853     *u64 = *d->ptr;
7854     advance(d, 1);
7855     return DECODE_OK;
7856   } else if (curbufleft(d) >= 10) {
7857     /* Fast case. */
7858     upb_decoderet r = upb_vdecode_fast(d->ptr);
7859     if (r.p == NULL) {
7860       seterr(d, kUnterminatedVarint);
7861       return upb_pbdecoder_suspend(d);
7862     }
7863     advance(d, r.p - d->ptr);
7864     *u64 = r.val;
7865     return DECODE_OK;
7866   } else {
7867     /* Slow case -- varint spans buffer seam. */
7868     return upb_pbdecoder_decode_varint_slow(d, u64);
7869   }
7870 }
7871 
7872 /* Decodes a 32-bit varint from the current buffer position.
7873  * Returns a status code as described in decoder.int.h. */
decode_v32(upb_pbdecoder * d,uint32_t * u32)7874 UPB_FORCEINLINE static int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) {
7875   uint64_t u64;
7876   int32_t ret = decode_varint(d, &u64);
7877   if (ret >= 0) return ret;
7878   if (u64 > UINT32_MAX) {
7879     seterr(d, "Unterminated 32-bit varint");
7880     /* TODO(haberman) guarantee that this function return is >= 0 somehow,
7881      * so we know this path will always be treated as error by our caller.
7882      * Right now the size_t -> int32_t can overflow and produce negative values.
7883      */
7884     *u32 = 0;
7885     return upb_pbdecoder_suspend(d);
7886   }
7887   *u32 = u64;
7888   return DECODE_OK;
7889 }
7890 
7891 /* Decodes a fixed32 from the current buffer position.
7892  * Returns a status code as described in decoder.int.h.
7893  * TODO: proper byte swapping for big-endian machines. */
decode_fixed32(upb_pbdecoder * d,uint32_t * u32)7894 UPB_FORCEINLINE static int32_t decode_fixed32(upb_pbdecoder *d, uint32_t *u32) {
7895   return getbytes(d, u32, 4);
7896 }
7897 
7898 /* Decodes a fixed64 from the current buffer position.
7899  * Returns a status code as described in decoder.int.h.
7900  * TODO: proper byte swapping for big-endian machines. */
decode_fixed64(upb_pbdecoder * d,uint64_t * u64)7901 UPB_FORCEINLINE static int32_t decode_fixed64(upb_pbdecoder *d, uint64_t *u64) {
7902   return getbytes(d, u64, 8);
7903 }
7904 
7905 /* Non-static versions of the above functions.
7906  * These are called by the JIT for fallback paths. */
upb_pbdecoder_decode_f32(upb_pbdecoder * d,uint32_t * u32)7907 int32_t upb_pbdecoder_decode_f32(upb_pbdecoder *d, uint32_t *u32) {
7908   return decode_fixed32(d, u32);
7909 }
7910 
upb_pbdecoder_decode_f64(upb_pbdecoder * d,uint64_t * u64)7911 int32_t upb_pbdecoder_decode_f64(upb_pbdecoder *d, uint64_t *u64) {
7912   return decode_fixed64(d, u64);
7913 }
7914 
as_double(uint64_t n)7915 static double as_double(uint64_t n) { double d; memcpy(&d, &n, 8); return d; }
as_float(uint32_t n)7916 static float  as_float(uint32_t n)  { float  f; memcpy(&f, &n, 4); return f; }
7917 
7918 /* Pushes a frame onto the decoder stack. */
decoder_push(upb_pbdecoder * d,uint64_t end)7919 static bool decoder_push(upb_pbdecoder *d, uint64_t end) {
7920   upb_pbdecoder_frame *fr = d->top;
7921 
7922   if (end > fr->end_ofs) {
7923     seterr(d, kPbDecoderSubmessageTooLong);
7924     return false;
7925   } else if (fr == d->limit) {
7926     seterr(d, kPbDecoderStackOverflow);
7927     return false;
7928   }
7929 
7930   fr++;
7931   fr->end_ofs = end;
7932   fr->dispatch = NULL;
7933   fr->groupnum = 0;
7934   d->top = fr;
7935   return true;
7936 }
7937 
pushtagdelim(upb_pbdecoder * d,uint32_t arg)7938 static bool pushtagdelim(upb_pbdecoder *d, uint32_t arg) {
7939   /* While we expect to see an "end" tag (either ENDGROUP or a non-sequence
7940    * field number) prior to hitting any enclosing submessage end, pushing our
7941    * existing delim end prevents us from continuing to parse values from a
7942    * corrupt proto that doesn't give us an END tag in time. */
7943   if (!decoder_push(d, d->top->end_ofs))
7944     return false;
7945   d->top->groupnum = arg;
7946   return true;
7947 }
7948 
7949 /* Pops a frame from the decoder stack. */
decoder_pop(upb_pbdecoder * d)7950 static void decoder_pop(upb_pbdecoder *d) { d->top--; }
7951 
upb_pbdecoder_checktag_slow(upb_pbdecoder * d,uint64_t expected)7952 UPB_NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d,
7953                                                  uint64_t expected) {
7954   uint64_t data = 0;
7955   size_t bytes = upb_value_size(expected);
7956   size_t read = peekbytes(d, &data, bytes);
7957   if (read == bytes && data == expected) {
7958     /* Advance past matched bytes. */
7959     int32_t ok = getbytes(d, &data, read);
7960     UPB_ASSERT_VAR(ok, ok < 0);
7961     return DECODE_OK;
7962   } else if (read < bytes && memcmp(&data, &expected, read) == 0) {
7963     return suspend_save(d);
7964   } else {
7965     return DECODE_MISMATCH;
7966   }
7967 }
7968 
upb_pbdecoder_skipunknown(upb_pbdecoder * d,int32_t fieldnum,uint8_t wire_type)7969 int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, int32_t fieldnum,
7970                                   uint8_t wire_type) {
7971   if (fieldnum >= 0)
7972     goto have_tag;
7973 
7974   while (true) {
7975     uint32_t tag;
7976     CHECK_RETURN(decode_v32(d, &tag));
7977     wire_type = tag & 0x7;
7978     fieldnum = tag >> 3;
7979 
7980 have_tag:
7981     if (fieldnum == 0) {
7982       seterr(d, "Saw invalid field number (0)");
7983       return upb_pbdecoder_suspend(d);
7984     }
7985 
7986     /* TODO: deliver to unknown field callback. */
7987     switch (wire_type) {
7988       case UPB_WIRE_TYPE_32BIT:
7989         CHECK_RETURN(skip(d, 4));
7990         break;
7991       case UPB_WIRE_TYPE_64BIT:
7992         CHECK_RETURN(skip(d, 8));
7993         break;
7994       case UPB_WIRE_TYPE_VARINT: {
7995         uint64_t u64;
7996         CHECK_RETURN(decode_varint(d, &u64));
7997         break;
7998       }
7999       case UPB_WIRE_TYPE_DELIMITED: {
8000         uint32_t len;
8001         CHECK_RETURN(decode_v32(d, &len));
8002         CHECK_RETURN(skip(d, len));
8003         break;
8004       }
8005       case UPB_WIRE_TYPE_START_GROUP:
8006         CHECK_SUSPEND(pushtagdelim(d, -fieldnum));
8007         break;
8008       case UPB_WIRE_TYPE_END_GROUP:
8009         if (fieldnum == -d->top->groupnum) {
8010           decoder_pop(d);
8011         } else if (fieldnum == d->top->groupnum) {
8012           return DECODE_ENDGROUP;
8013         } else {
8014           seterr(d, "Unmatched ENDGROUP tag.");
8015           return upb_pbdecoder_suspend(d);
8016         }
8017         break;
8018       default:
8019         seterr(d, "Invalid wire type");
8020         return upb_pbdecoder_suspend(d);
8021     }
8022 
8023     if (d->top->groupnum >= 0) {
8024       return DECODE_OK;
8025     }
8026 
8027     /* Unknown group -- continue looping over unknown fields. */
8028     checkpoint(d);
8029   }
8030 }
8031 
goto_endmsg(upb_pbdecoder * d)8032 static void goto_endmsg(upb_pbdecoder *d) {
8033   upb_value v;
8034   bool found = upb_inttable_lookup32(d->top->dispatch, DISPATCH_ENDMSG, &v);
8035   UPB_ASSERT_VAR(found, found);
8036   d->pc = d->top->base + upb_value_getuint64(v);
8037 }
8038 
8039 /* Parses a tag and jumps to the corresponding bytecode instruction for this
8040  * field.
8041  *
8042  * If the tag is unknown (or the wire type doesn't match), parses the field as
8043  * unknown.  If the tag is a valid ENDGROUP tag, jumps to the bytecode
8044  * instruction for the end of message. */
dispatch(upb_pbdecoder * d)8045 static int32_t dispatch(upb_pbdecoder *d) {
8046   upb_inttable *dispatch = d->top->dispatch;
8047   uint32_t tag;
8048   uint8_t wire_type;
8049   uint32_t fieldnum;
8050   upb_value val;
8051   int32_t retval;
8052 
8053   /* Decode tag. */
8054   CHECK_RETURN(decode_v32(d, &tag));
8055   wire_type = tag & 0x7;
8056   fieldnum = tag >> 3;
8057 
8058   /* Lookup tag.  Because of packed/non-packed compatibility, we have to
8059    * check the wire type against two possibilities. */
8060   if (fieldnum != DISPATCH_ENDMSG &&
8061       upb_inttable_lookup32(dispatch, fieldnum, &val)) {
8062     uint64_t v = upb_value_getuint64(val);
8063     if (wire_type == (v & 0xff)) {
8064       d->pc = d->top->base + (v >> 16);
8065       return DECODE_OK;
8066     } else if (wire_type == ((v >> 8) & 0xff)) {
8067       bool found =
8068           upb_inttable_lookup(dispatch, fieldnum + UPB_MAX_FIELDNUMBER, &val);
8069       UPB_ASSERT_VAR(found, found);
8070       d->pc = d->top->base + upb_value_getuint64(val);
8071       return DECODE_OK;
8072     }
8073   }
8074 
8075   /* We have some unknown fields (or ENDGROUP) to parse.  The DISPATCH or TAG
8076    * bytecode that triggered this is preceded by a CHECKDELIM bytecode which
8077    * we need to back up to, so that when we're done skipping unknown data we
8078    * can re-check the delimited end. */
8079   d->last--;  /* Necessary if we get suspended */
8080   d->pc = d->last;
8081   assert(getop(*d->last) == OP_CHECKDELIM);
8082 
8083   /* Unknown field or ENDGROUP. */
8084   retval = upb_pbdecoder_skipunknown(d, fieldnum, wire_type);
8085 
8086   CHECK_RETURN(retval);
8087 
8088   if (retval == DECODE_ENDGROUP) {
8089     goto_endmsg(d);
8090     return DECODE_OK;
8091   }
8092 
8093   return DECODE_OK;
8094 }
8095 
8096 /* Callers know that the stack is more than one deep because the opcodes that
8097  * call this only occur after PUSH operations. */
outer_frame(upb_pbdecoder * d)8098 upb_pbdecoder_frame *outer_frame(upb_pbdecoder *d) {
8099   assert(d->top != d->stack);
8100   return d->top - 1;
8101 }
8102 
8103 
8104 /* The main decoding loop *****************************************************/
8105 
8106 /* The main decoder VM function.  Uses traditional bytecode dispatch loop with a
8107  * switch() statement. */
run_decoder_vm(upb_pbdecoder * d,const mgroup * group,const upb_bufhandle * handle)8108 size_t run_decoder_vm(upb_pbdecoder *d, const mgroup *group,
8109                       const upb_bufhandle* handle) {
8110 
8111 #define VMCASE(op, code) \
8112   case op: { code; if (consumes_input(op)) checkpoint(d); break; }
8113 #define PRIMITIVE_OP(type, wt, name, convfunc, ctype) \
8114   VMCASE(OP_PARSE_ ## type, { \
8115     ctype val; \
8116     CHECK_RETURN(decode_ ## wt(d, &val)); \
8117     upb_sink_put ## name(&d->top->sink, arg, (convfunc)(val)); \
8118   })
8119 
8120   while(1) {
8121     int32_t instruction;
8122     opcode op;
8123     uint32_t arg;
8124     int32_t longofs;
8125 
8126     d->last = d->pc;
8127     instruction = *d->pc++;
8128     op = getop(instruction);
8129     arg = instruction >> 8;
8130     longofs = arg;
8131     assert(d->ptr != d->residual_end);
8132     UPB_UNUSED(group);
8133 #ifdef UPB_DUMP_BYTECODE
8134     fprintf(stderr, "s_ofs=%d buf_ofs=%d data_rem=%d buf_rem=%d delim_rem=%d "
8135                     "%x %s (%d)\n",
8136             (int)offset(d),
8137             (int)(d->ptr - d->buf),
8138             (int)(d->data_end - d->ptr),
8139             (int)(d->end - d->ptr),
8140             (int)((d->top->end_ofs - d->bufstart_ofs) - (d->ptr - d->buf)),
8141             (int)(d->pc - 1 - group->bytecode),
8142             upb_pbdecoder_getopname(op),
8143             arg);
8144 #endif
8145     switch (op) {
8146       /* Technically, we are losing data if we see a 32-bit varint that is not
8147        * properly sign-extended.  We could detect this and error about the data
8148        * loss, but proto2 does not do this, so we pass. */
8149       PRIMITIVE_OP(INT32,    varint,  int32,  int32_t,      uint64_t)
8150       PRIMITIVE_OP(INT64,    varint,  int64,  int64_t,      uint64_t)
8151       PRIMITIVE_OP(UINT32,   varint,  uint32, uint32_t,     uint64_t)
8152       PRIMITIVE_OP(UINT64,   varint,  uint64, uint64_t,     uint64_t)
8153       PRIMITIVE_OP(FIXED32,  fixed32, uint32, uint32_t,     uint32_t)
8154       PRIMITIVE_OP(FIXED64,  fixed64, uint64, uint64_t,     uint64_t)
8155       PRIMITIVE_OP(SFIXED32, fixed32, int32,  int32_t,      uint32_t)
8156       PRIMITIVE_OP(SFIXED64, fixed64, int64,  int64_t,      uint64_t)
8157       PRIMITIVE_OP(BOOL,     varint,  bool,   bool,         uint64_t)
8158       PRIMITIVE_OP(DOUBLE,   fixed64, double, as_double,    uint64_t)
8159       PRIMITIVE_OP(FLOAT,    fixed32, float,  as_float,     uint32_t)
8160       PRIMITIVE_OP(SINT32,   varint,  int32,  upb_zzdec_32, uint64_t)
8161       PRIMITIVE_OP(SINT64,   varint,  int64,  upb_zzdec_64, uint64_t)
8162 
8163       VMCASE(OP_SETDISPATCH,
8164         d->top->base = d->pc - 1;
8165         memcpy(&d->top->dispatch, d->pc, sizeof(void*));
8166         d->pc += sizeof(void*) / sizeof(uint32_t);
8167       )
8168       VMCASE(OP_STARTMSG,
8169         CHECK_SUSPEND(upb_sink_startmsg(&d->top->sink));
8170       )
8171       VMCASE(OP_ENDMSG,
8172         CHECK_SUSPEND(upb_sink_endmsg(&d->top->sink, d->status));
8173       )
8174       VMCASE(OP_STARTSEQ,
8175         upb_pbdecoder_frame *outer = outer_frame(d);
8176         CHECK_SUSPEND(upb_sink_startseq(&outer->sink, arg, &d->top->sink));
8177       )
8178       VMCASE(OP_ENDSEQ,
8179         CHECK_SUSPEND(upb_sink_endseq(&d->top->sink, arg));
8180       )
8181       VMCASE(OP_STARTSUBMSG,
8182         upb_pbdecoder_frame *outer = outer_frame(d);
8183         CHECK_SUSPEND(upb_sink_startsubmsg(&outer->sink, arg, &d->top->sink));
8184       )
8185       VMCASE(OP_ENDSUBMSG,
8186         CHECK_SUSPEND(upb_sink_endsubmsg(&d->top->sink, arg));
8187       )
8188       VMCASE(OP_STARTSTR,
8189         uint32_t len = delim_remaining(d);
8190         upb_pbdecoder_frame *outer = outer_frame(d);
8191         CHECK_SUSPEND(upb_sink_startstr(&outer->sink, arg, len, &d->top->sink));
8192         if (len == 0) {
8193           d->pc++;  /* Skip OP_STRING. */
8194         }
8195       )
8196       VMCASE(OP_STRING,
8197         uint32_t len = curbufleft(d);
8198         size_t n = upb_sink_putstring(&d->top->sink, arg, d->ptr, len, handle);
8199         if (n > len) {
8200           if (n > delim_remaining(d)) {
8201             seterr(d, "Tried to skip past end of string.");
8202             return upb_pbdecoder_suspend(d);
8203           } else {
8204             int32_t ret = skip(d, n);
8205             /* This shouldn't return DECODE_OK, because n > len. */
8206             assert(ret >= 0);
8207             return ret;
8208           }
8209         }
8210         advance(d, n);
8211         if (n < len || d->delim_end == NULL) {
8212           /* We aren't finished with this string yet. */
8213           d->pc--;  /* Repeat OP_STRING. */
8214           if (n > 0) checkpoint(d);
8215           return upb_pbdecoder_suspend(d);
8216         }
8217       )
8218       VMCASE(OP_ENDSTR,
8219         CHECK_SUSPEND(upb_sink_endstr(&d->top->sink, arg));
8220       )
8221       VMCASE(OP_PUSHTAGDELIM,
8222         CHECK_SUSPEND(pushtagdelim(d, arg));
8223       )
8224       VMCASE(OP_SETBIGGROUPNUM,
8225         d->top->groupnum = *d->pc++;
8226       )
8227       VMCASE(OP_POP,
8228         assert(d->top > d->stack);
8229         decoder_pop(d);
8230       )
8231       VMCASE(OP_PUSHLENDELIM,
8232         uint32_t len;
8233         CHECK_RETURN(decode_v32(d, &len));
8234         CHECK_SUSPEND(decoder_push(d, offset(d) + len));
8235         set_delim_end(d);
8236       )
8237       VMCASE(OP_SETDELIM,
8238         set_delim_end(d);
8239       )
8240       VMCASE(OP_CHECKDELIM,
8241         /* We are guaranteed of this assert because we never allow ourselves to
8242          * consume bytes beyond data_end, which covers delim_end when non-NULL.
8243          */
8244         assert(!(d->delim_end && d->ptr > d->delim_end));
8245         if (d->ptr == d->delim_end)
8246           d->pc += longofs;
8247       )
8248       VMCASE(OP_CALL,
8249         d->callstack[d->call_len++] = d->pc;
8250         d->pc += longofs;
8251       )
8252       VMCASE(OP_RET,
8253         assert(d->call_len > 0);
8254         d->pc = d->callstack[--d->call_len];
8255       )
8256       VMCASE(OP_BRANCH,
8257         d->pc += longofs;
8258       )
8259       VMCASE(OP_TAG1,
8260         uint8_t expected;
8261         CHECK_SUSPEND(curbufleft(d) > 0);
8262         expected = (arg >> 8) & 0xff;
8263         if (*d->ptr == expected) {
8264           advance(d, 1);
8265         } else {
8266           int8_t shortofs;
8267          badtag:
8268           shortofs = arg;
8269           if (shortofs == LABEL_DISPATCH) {
8270             CHECK_RETURN(dispatch(d));
8271           } else {
8272             d->pc += shortofs;
8273             break; /* Avoid checkpoint(). */
8274           }
8275         }
8276       )
8277       VMCASE(OP_TAG2,
8278         uint16_t expected;
8279         CHECK_SUSPEND(curbufleft(d) > 0);
8280         expected = (arg >> 8) & 0xffff;
8281         if (curbufleft(d) >= 2) {
8282           uint16_t actual;
8283           memcpy(&actual, d->ptr, 2);
8284           if (expected == actual) {
8285             advance(d, 2);
8286           } else {
8287             goto badtag;
8288           }
8289         } else {
8290           int32_t result = upb_pbdecoder_checktag_slow(d, expected);
8291           if (result == DECODE_MISMATCH) goto badtag;
8292           if (result >= 0) return result;
8293         }
8294       )
8295       VMCASE(OP_TAGN, {
8296         uint64_t expected;
8297         int32_t result;
8298         memcpy(&expected, d->pc, 8);
8299         d->pc += 2;
8300         result = upb_pbdecoder_checktag_slow(d, expected);
8301         if (result == DECODE_MISMATCH) goto badtag;
8302         if (result >= 0) return result;
8303       })
8304       VMCASE(OP_DISPATCH, {
8305         CHECK_RETURN(dispatch(d));
8306       })
8307       VMCASE(OP_HALT, {
8308         return d->size_param;
8309       })
8310     }
8311   }
8312 }
8313 
8314 
8315 /* BytesHandler handlers ******************************************************/
8316 
upb_pbdecoder_startbc(void * closure,const void * pc,size_t size_hint)8317 void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint) {
8318   upb_pbdecoder *d = closure;
8319   UPB_UNUSED(size_hint);
8320   d->top->end_ofs = UINT64_MAX;
8321   d->bufstart_ofs = 0;
8322   d->call_len = 1;
8323   d->callstack[0] = &halt;
8324   d->pc = pc;
8325   d->skip = 0;
8326   return d;
8327 }
8328 
upb_pbdecoder_startjit(void * closure,const void * hd,size_t size_hint)8329 void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint) {
8330   upb_pbdecoder *d = closure;
8331   UPB_UNUSED(hd);
8332   UPB_UNUSED(size_hint);
8333   d->top->end_ofs = UINT64_MAX;
8334   d->bufstart_ofs = 0;
8335   d->call_len = 0;
8336   d->skip = 0;
8337   return d;
8338 }
8339 
upb_pbdecoder_end(void * closure,const void * handler_data)8340 bool upb_pbdecoder_end(void *closure, const void *handler_data) {
8341   upb_pbdecoder *d = closure;
8342   const upb_pbdecodermethod *method = handler_data;
8343   uint64_t end;
8344   char dummy;
8345 
8346   if (d->residual_end > d->residual) {
8347     seterr(d, "Unexpected EOF: decoder still has buffered unparsed data");
8348     return false;
8349   }
8350 
8351   if (d->skip) {
8352     seterr(d, "Unexpected EOF inside skipped data");
8353     return false;
8354   }
8355 
8356   if (d->top->end_ofs != UINT64_MAX) {
8357     seterr(d, "Unexpected EOF inside delimited string");
8358     return false;
8359   }
8360 
8361   /* The user's end() call indicates that the message ends here. */
8362   end = offset(d);
8363   d->top->end_ofs = end;
8364 
8365 #ifdef UPB_USE_JIT_X64
8366   if (method->is_native_) {
8367     const mgroup *group = (const mgroup*)method->group;
8368     if (d->top != d->stack)
8369       d->stack->end_ofs = 0;
8370     group->jit_code(closure, method->code_base.ptr, &dummy, 0, NULL);
8371   } else
8372 #endif
8373   {
8374     const uint32_t *p = d->pc;
8375     d->stack->end_ofs = end;
8376     /* Check the previous bytecode, but guard against beginning. */
8377     if (p != method->code_base.ptr) p--;
8378     if (getop(*p) == OP_CHECKDELIM) {
8379       /* Rewind from OP_TAG* to OP_CHECKDELIM. */
8380       assert(getop(*d->pc) == OP_TAG1 ||
8381              getop(*d->pc) == OP_TAG2 ||
8382              getop(*d->pc) == OP_TAGN ||
8383              getop(*d->pc) == OP_DISPATCH);
8384       d->pc = p;
8385     }
8386     upb_pbdecoder_decode(closure, handler_data, &dummy, 0, NULL);
8387   }
8388 
8389   if (d->call_len != 0) {
8390     seterr(d, "Unexpected EOF inside submessage or group");
8391     return false;
8392   }
8393 
8394   return true;
8395 }
8396 
upb_pbdecoder_decode(void * decoder,const void * group,const char * buf,size_t size,const upb_bufhandle * handle)8397 size_t upb_pbdecoder_decode(void *decoder, const void *group, const char *buf,
8398                             size_t size, const upb_bufhandle *handle) {
8399   int32_t result = upb_pbdecoder_resume(decoder, NULL, buf, size, handle);
8400 
8401   if (result == DECODE_ENDGROUP) goto_endmsg(decoder);
8402   CHECK_RETURN(result);
8403 
8404   return run_decoder_vm(decoder, group, handle);
8405 }
8406 
8407 
8408 /* Public API *****************************************************************/
8409 
upb_pbdecoder_reset(upb_pbdecoder * d)8410 void upb_pbdecoder_reset(upb_pbdecoder *d) {
8411   d->top = d->stack;
8412   d->top->groupnum = 0;
8413   d->ptr = d->residual;
8414   d->buf = d->residual;
8415   d->end = d->residual;
8416   d->residual_end = d->residual;
8417 }
8418 
upb_pbdecoder_create(upb_env * e,const upb_pbdecodermethod * m,upb_sink * sink)8419 upb_pbdecoder *upb_pbdecoder_create(upb_env *e, const upb_pbdecodermethod *m,
8420                                     upb_sink *sink) {
8421   const size_t default_max_nesting = 64;
8422 #ifndef NDEBUG
8423   size_t size_before = upb_env_bytesallocated(e);
8424 #endif
8425 
8426   upb_pbdecoder *d = upb_env_malloc(e, sizeof(upb_pbdecoder));
8427   if (!d) return NULL;
8428 
8429   d->method_ = m;
8430   d->callstack = upb_env_malloc(e, callstacksize(d, default_max_nesting));
8431   d->stack = upb_env_malloc(e, stacksize(d, default_max_nesting));
8432   if (!d->stack || !d->callstack) {
8433     return NULL;
8434   }
8435 
8436   d->env = e;
8437   d->limit = d->stack + default_max_nesting - 1;
8438   d->stack_size = default_max_nesting;
8439 
8440   upb_pbdecoder_reset(d);
8441   upb_bytessink_reset(&d->input_, &m->input_handler_, d);
8442 
8443   assert(sink);
8444   if (d->method_->dest_handlers_) {
8445     if (sink->handlers != d->method_->dest_handlers_)
8446       return NULL;
8447   }
8448   upb_sink_reset(&d->top->sink, sink->handlers, sink->closure);
8449 
8450   /* If this fails, increase the value in decoder.h. */
8451   assert(upb_env_bytesallocated(e) - size_before <= UPB_PB_DECODER_SIZE);
8452   return d;
8453 }
8454 
upb_pbdecoder_bytesparsed(const upb_pbdecoder * d)8455 uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d) {
8456   return offset(d);
8457 }
8458 
upb_pbdecoder_method(const upb_pbdecoder * d)8459 const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d) {
8460   return d->method_;
8461 }
8462 
upb_pbdecoder_input(upb_pbdecoder * d)8463 upb_bytessink *upb_pbdecoder_input(upb_pbdecoder *d) {
8464   return &d->input_;
8465 }
8466 
upb_pbdecoder_maxnesting(const upb_pbdecoder * d)8467 size_t upb_pbdecoder_maxnesting(const upb_pbdecoder *d) {
8468   return d->stack_size;
8469 }
8470 
upb_pbdecoder_setmaxnesting(upb_pbdecoder * d,size_t max)8471 bool upb_pbdecoder_setmaxnesting(upb_pbdecoder *d, size_t max) {
8472   assert(d->top >= d->stack);
8473 
8474   if (max < (size_t)(d->top - d->stack)) {
8475     /* Can't set a limit smaller than what we are currently at. */
8476     return false;
8477   }
8478 
8479   if (max > d->stack_size) {
8480     /* Need to reallocate stack and callstack to accommodate. */
8481     size_t old_size = stacksize(d, d->stack_size);
8482     size_t new_size = stacksize(d, max);
8483     void *p = upb_env_realloc(d->env, d->stack, old_size, new_size);
8484     if (!p) {
8485       return false;
8486     }
8487     d->stack = p;
8488 
8489     old_size = callstacksize(d, d->stack_size);
8490     new_size = callstacksize(d, max);
8491     p = upb_env_realloc(d->env, d->callstack, old_size, new_size);
8492     if (!p) {
8493       return false;
8494     }
8495     d->callstack = p;
8496 
8497     d->stack_size = max;
8498   }
8499 
8500   d->limit = d->stack + max - 1;
8501   return true;
8502 }
8503 /*
8504 ** upb::Encoder
8505 **
8506 ** Since we are implementing pure handlers (ie. without any out-of-band access
8507 ** to pre-computed lengths), we have to buffer all submessages before we can
8508 ** emit even their first byte.
8509 **
8510 ** Not knowing the size of submessages also means we can't write a perfect
8511 ** zero-copy implementation, even with buffering.  Lengths are stored as
8512 ** varints, which means that we don't know how many bytes to reserve for the
8513 ** length until we know what the length is.
8514 **
8515 ** This leaves us with three main choices:
8516 **
8517 ** 1. buffer all submessage data in a temporary buffer, then copy it exactly
8518 **    once into the output buffer.
8519 **
8520 ** 2. attempt to buffer data directly into the output buffer, estimating how
8521 **    many bytes each length will take.  When our guesses are wrong, use
8522 **    memmove() to grow or shrink the allotted space.
8523 **
8524 ** 3. buffer directly into the output buffer, allocating a max length
8525 **    ahead-of-time for each submessage length.  If we overallocated, we waste
8526 **    space, but no memcpy() or memmove() is required.  This approach requires
8527 **    defining a maximum size for submessages and rejecting submessages that
8528 **    exceed that size.
8529 **
8530 ** (2) and (3) have the potential to have better performance, but they are more
8531 ** complicated and subtle to implement:
8532 **
8533 **   (3) requires making an arbitrary choice of the maximum message size; it
8534 **       wastes space when submessages are shorter than this and fails
8535 **       completely when they are longer.  This makes it more finicky and
8536 **       requires configuration based on the input.  It also makes it impossible
8537 **       to perfectly match the output of reference encoders that always use the
8538 **       optimal amount of space for each length.
8539 **
8540 **   (2) requires guessing the the size upfront, and if multiple lengths are
8541 **       guessed wrong the minimum required number of memmove() operations may
8542 **       be complicated to compute correctly.  Implemented properly, it may have
8543 **       a useful amortized or average cost, but more investigation is required
8544 **       to determine this and what the optimal algorithm is to achieve it.
8545 **
8546 **   (1) makes you always pay for exactly one copy, but its implementation is
8547 **       the simplest and its performance is predictable.
8548 **
8549 ** So for now, we implement (1) only.  If we wish to optimize later, we should
8550 ** be able to do it without affecting users.
8551 **
8552 ** The strategy is to buffer the segments of data that do *not* depend on
8553 ** unknown lengths in one buffer, and keep a separate buffer of segment pointers
8554 ** and lengths.  When the top-level submessage ends, we can go beginning to end,
8555 ** alternating the writing of lengths with memcpy() of the rest of the data.
8556 ** At the top level though, no buffering is required.
8557 */
8558 
8559 
8560 #include <stdlib.h>
8561 
8562 /* The output buffer is divided into segments; a segment is a string of data
8563  * that is "ready to go" -- it does not need any varint lengths inserted into
8564  * the middle.  The seams between segments are where varints will be inserted
8565  * once they are known.
8566  *
8567  * We also use the concept of a "run", which is a range of encoded bytes that
8568  * occur at a single submessage level.  Every segment contains one or more runs.
8569  *
8570  * A segment can span messages.  Consider:
8571  *
8572  *                  .--Submessage lengths---------.
8573  *                  |       |                     |
8574  *                  |       V                     V
8575  *                  V      | |---------------    | |-----------------
8576  * Submessages:    | |-----------------------------------------------
8577  * Top-level msg: ------------------------------------------------------------
8578  *
8579  * Segments:          -----   -------------------   -----------------
8580  * Runs:              *----   *--------------*---   *----------------
8581  * (* marks the start)
8582  *
8583  * Note that the top-level menssage is not in any segment because it does not
8584  * have any length preceding it.
8585  *
8586  * A segment is only interrupted when another length needs to be inserted.  So
8587  * observe how the second segment spans both the inner submessage and part of
8588  * the next enclosing message. */
8589 typedef struct {
8590   uint32_t msglen;  /* The length to varint-encode before this segment. */
8591   uint32_t seglen;  /* Length of the segment. */
8592 } upb_pb_encoder_segment;
8593 
8594 struct upb_pb_encoder {
8595   upb_env *env;
8596 
8597   /* Our input and output. */
8598   upb_sink input_;
8599   upb_bytessink *output_;
8600 
8601   /* The "subclosure" -- used as the inner closure as part of the bytessink
8602    * protocol. */
8603   void *subc;
8604 
8605   /* The output buffer and limit, and our current write position.  "buf"
8606    * initially points to "initbuf", but is dynamically allocated if we need to
8607    * grow beyond the initial size. */
8608   char *buf, *ptr, *limit;
8609 
8610   /* The beginning of the current run, or undefined if we are at the top
8611    * level. */
8612   char *runbegin;
8613 
8614   /* The list of segments we are accumulating. */
8615   upb_pb_encoder_segment *segbuf, *segptr, *seglimit;
8616 
8617   /* The stack of enclosing submessages.  Each entry in the stack points to the
8618    * segment where this submessage's length is being accumulated. */
8619   int *stack, *top, *stacklimit;
8620 
8621   /* Depth of startmsg/endmsg calls. */
8622   int depth;
8623 };
8624 
8625 /* low-level buffering ********************************************************/
8626 
8627 /* Low-level functions for interacting with the output buffer. */
8628 
8629 /* TODO(haberman): handle pushback */
putbuf(upb_pb_encoder * e,const char * buf,size_t len)8630 static void putbuf(upb_pb_encoder *e, const char *buf, size_t len) {
8631   size_t n = upb_bytessink_putbuf(e->output_, e->subc, buf, len, NULL);
8632   UPB_ASSERT_VAR(n, n == len);
8633 }
8634 
top(upb_pb_encoder * e)8635 static upb_pb_encoder_segment *top(upb_pb_encoder *e) {
8636   return &e->segbuf[*e->top];
8637 }
8638 
8639 /* Call to ensure that at least "bytes" bytes are available for writing at
8640  * e->ptr.  Returns false if the bytes could not be allocated. */
reserve(upb_pb_encoder * e,size_t bytes)8641 static bool reserve(upb_pb_encoder *e, size_t bytes) {
8642   if ((size_t)(e->limit - e->ptr) < bytes) {
8643     /* Grow buffer. */
8644     char *new_buf;
8645     size_t needed = bytes + (e->ptr - e->buf);
8646     size_t old_size = e->limit - e->buf;
8647 
8648     size_t new_size = old_size;
8649 
8650     while (new_size < needed) {
8651       new_size *= 2;
8652     }
8653 
8654     new_buf = upb_env_realloc(e->env, e->buf, old_size, new_size);
8655 
8656     if (new_buf == NULL) {
8657       return false;
8658     }
8659 
8660     e->ptr = new_buf + (e->ptr - e->buf);
8661     e->runbegin = new_buf + (e->runbegin - e->buf);
8662     e->limit = new_buf + new_size;
8663     e->buf = new_buf;
8664   }
8665 
8666   return true;
8667 }
8668 
8669 /* Call when "bytes" bytes have been writte at e->ptr.  The caller *must* have
8670  * previously called reserve() with at least this many bytes. */
encoder_advance(upb_pb_encoder * e,size_t bytes)8671 static void encoder_advance(upb_pb_encoder *e, size_t bytes) {
8672   assert((size_t)(e->limit - e->ptr) >= bytes);
8673   e->ptr += bytes;
8674 }
8675 
8676 /* Call when all of the bytes for a handler have been written.  Flushes the
8677  * bytes if possible and necessary, returning false if this failed. */
commit(upb_pb_encoder * e)8678 static bool commit(upb_pb_encoder *e) {
8679   if (!e->top) {
8680     /* We aren't inside a delimited region.  Flush our accumulated bytes to
8681      * the output.
8682      *
8683      * TODO(haberman): in the future we may want to delay flushing for
8684      * efficiency reasons. */
8685     putbuf(e, e->buf, e->ptr - e->buf);
8686     e->ptr = e->buf;
8687   }
8688 
8689   return true;
8690 }
8691 
8692 /* Writes the given bytes to the buffer, handling reserve/advance. */
encode_bytes(upb_pb_encoder * e,const void * data,size_t len)8693 static bool encode_bytes(upb_pb_encoder *e, const void *data, size_t len) {
8694   if (!reserve(e, len)) {
8695     return false;
8696   }
8697 
8698   memcpy(e->ptr, data, len);
8699   encoder_advance(e, len);
8700   return true;
8701 }
8702 
8703 /* Finish the current run by adding the run totals to the segment and message
8704  * length. */
accumulate(upb_pb_encoder * e)8705 static void accumulate(upb_pb_encoder *e) {
8706   size_t run_len;
8707   assert(e->ptr >= e->runbegin);
8708   run_len = e->ptr - e->runbegin;
8709   e->segptr->seglen += run_len;
8710   top(e)->msglen += run_len;
8711   e->runbegin = e->ptr;
8712 }
8713 
8714 /* Call to indicate the start of delimited region for which the full length is
8715  * not yet known.  All data will be buffered until the length is known.
8716  * Delimited regions may be nested; their lengths will all be tracked properly. */
start_delim(upb_pb_encoder * e)8717 static bool start_delim(upb_pb_encoder *e) {
8718   if (e->top) {
8719     /* We are already buffering, advance to the next segment and push it on the
8720      * stack. */
8721     accumulate(e);
8722 
8723     if (++e->top == e->stacklimit) {
8724       /* TODO(haberman): grow stack? */
8725       return false;
8726     }
8727 
8728     if (++e->segptr == e->seglimit) {
8729       /* Grow segment buffer. */
8730       size_t old_size =
8731           (e->seglimit - e->segbuf) * sizeof(upb_pb_encoder_segment);
8732       size_t new_size = old_size * 2;
8733       upb_pb_encoder_segment *new_buf =
8734           upb_env_realloc(e->env, e->segbuf, old_size, new_size);
8735 
8736       if (new_buf == NULL) {
8737         return false;
8738       }
8739 
8740       e->segptr = new_buf + (e->segptr - e->segbuf);
8741       e->seglimit = new_buf + (new_size / sizeof(upb_pb_encoder_segment));
8742       e->segbuf = new_buf;
8743     }
8744   } else {
8745     /* We were previously at the top level, start buffering. */
8746     e->segptr = e->segbuf;
8747     e->top = e->stack;
8748     e->runbegin = e->ptr;
8749   }
8750 
8751   *e->top = e->segptr - e->segbuf;
8752   e->segptr->seglen = 0;
8753   e->segptr->msglen = 0;
8754 
8755   return true;
8756 }
8757 
8758 /* Call to indicate the end of a delimited region.  We now know the length of
8759  * the delimited region.  If we are not nested inside any other delimited
8760  * regions, we can now emit all of the buffered data we accumulated. */
end_delim(upb_pb_encoder * e)8761 static bool end_delim(upb_pb_encoder *e) {
8762   size_t msglen;
8763   accumulate(e);
8764   msglen = top(e)->msglen;
8765 
8766   if (e->top == e->stack) {
8767     /* All lengths are now available, emit all buffered data. */
8768     char buf[UPB_PB_VARINT_MAX_LEN];
8769     upb_pb_encoder_segment *s;
8770     const char *ptr = e->buf;
8771     for (s = e->segbuf; s <= e->segptr; s++) {
8772       size_t lenbytes = upb_vencode64(s->msglen, buf);
8773       putbuf(e, buf, lenbytes);
8774       putbuf(e, ptr, s->seglen);
8775       ptr += s->seglen;
8776     }
8777 
8778     e->ptr = e->buf;
8779     e->top = NULL;
8780   } else {
8781     /* Need to keep buffering; propagate length info into enclosing
8782      * submessages. */
8783     --e->top;
8784     top(e)->msglen += msglen + upb_varint_size(msglen);
8785   }
8786 
8787   return true;
8788 }
8789 
8790 
8791 /* tag_t **********************************************************************/
8792 
8793 /* A precomputed (pre-encoded) tag and length. */
8794 
8795 typedef struct {
8796   uint8_t bytes;
8797   char tag[7];
8798 } tag_t;
8799 
8800 /* Allocates a new tag for this field, and sets it in these handlerattr. */
new_tag(upb_handlers * h,const upb_fielddef * f,upb_wiretype_t wt,upb_handlerattr * attr)8801 static void new_tag(upb_handlers *h, const upb_fielddef *f, upb_wiretype_t wt,
8802                     upb_handlerattr *attr) {
8803   uint32_t n = upb_fielddef_number(f);
8804 
8805   tag_t *tag = malloc(sizeof(tag_t));
8806   tag->bytes = upb_vencode64((n << 3) | wt, tag->tag);
8807 
8808   upb_handlerattr_init(attr);
8809   upb_handlerattr_sethandlerdata(attr, tag);
8810   upb_handlers_addcleanup(h, tag, free);
8811 }
8812 
encode_tag(upb_pb_encoder * e,const tag_t * tag)8813 static bool encode_tag(upb_pb_encoder *e, const tag_t *tag) {
8814   return encode_bytes(e, tag->tag, tag->bytes);
8815 }
8816 
8817 
8818 /* encoding of wire types *****************************************************/
8819 
encode_fixed64(upb_pb_encoder * e,uint64_t val)8820 static bool encode_fixed64(upb_pb_encoder *e, uint64_t val) {
8821   /* TODO(haberman): byte-swap for big endian. */
8822   return encode_bytes(e, &val, sizeof(uint64_t));
8823 }
8824 
encode_fixed32(upb_pb_encoder * e,uint32_t val)8825 static bool encode_fixed32(upb_pb_encoder *e, uint32_t val) {
8826   /* TODO(haberman): byte-swap for big endian. */
8827   return encode_bytes(e, &val, sizeof(uint32_t));
8828 }
8829 
encode_varint(upb_pb_encoder * e,uint64_t val)8830 static bool encode_varint(upb_pb_encoder *e, uint64_t val) {
8831   if (!reserve(e, UPB_PB_VARINT_MAX_LEN)) {
8832     return false;
8833   }
8834 
8835   encoder_advance(e, upb_vencode64(val, e->ptr));
8836   return true;
8837 }
8838 
dbl2uint64(double d)8839 static uint64_t dbl2uint64(double d) {
8840   uint64_t ret;
8841   memcpy(&ret, &d, sizeof(uint64_t));
8842   return ret;
8843 }
8844 
flt2uint32(float d)8845 static uint32_t flt2uint32(float d) {
8846   uint32_t ret;
8847   memcpy(&ret, &d, sizeof(uint32_t));
8848   return ret;
8849 }
8850 
8851 
8852 /* encoding of proto types ****************************************************/
8853 
startmsg(void * c,const void * hd)8854 static bool startmsg(void *c, const void *hd) {
8855   upb_pb_encoder *e = c;
8856   UPB_UNUSED(hd);
8857   if (e->depth++ == 0) {
8858     upb_bytessink_start(e->output_, 0, &e->subc);
8859   }
8860   return true;
8861 }
8862 
endmsg(void * c,const void * hd,upb_status * status)8863 static bool endmsg(void *c, const void *hd, upb_status *status) {
8864   upb_pb_encoder *e = c;
8865   UPB_UNUSED(hd);
8866   UPB_UNUSED(status);
8867   if (--e->depth == 0) {
8868     upb_bytessink_end(e->output_);
8869   }
8870   return true;
8871 }
8872 
encode_startdelimfield(void * c,const void * hd)8873 static void *encode_startdelimfield(void *c, const void *hd) {
8874   bool ok = encode_tag(c, hd) && commit(c) && start_delim(c);
8875   return ok ? c : UPB_BREAK;
8876 }
8877 
encode_enddelimfield(void * c,const void * hd)8878 static bool encode_enddelimfield(void *c, const void *hd) {
8879   UPB_UNUSED(hd);
8880   return end_delim(c);
8881 }
8882 
encode_startgroup(void * c,const void * hd)8883 static void *encode_startgroup(void *c, const void *hd) {
8884   return (encode_tag(c, hd) && commit(c)) ? c : UPB_BREAK;
8885 }
8886 
encode_endgroup(void * c,const void * hd)8887 static bool encode_endgroup(void *c, const void *hd) {
8888   return encode_tag(c, hd) && commit(c);
8889 }
8890 
encode_startstr(void * c,const void * hd,size_t size_hint)8891 static void *encode_startstr(void *c, const void *hd, size_t size_hint) {
8892   UPB_UNUSED(size_hint);
8893   return encode_startdelimfield(c, hd);
8894 }
8895 
encode_strbuf(void * c,const void * hd,const char * buf,size_t len,const upb_bufhandle * h)8896 static size_t encode_strbuf(void *c, const void *hd, const char *buf,
8897                             size_t len, const upb_bufhandle *h) {
8898   UPB_UNUSED(hd);
8899   UPB_UNUSED(h);
8900   return encode_bytes(c, buf, len) ? len : 0;
8901 }
8902 
8903 #define T(type, ctype, convert, encode)                                  \
8904   static bool encode_scalar_##type(void *e, const void *hd, ctype val) { \
8905     return encode_tag(e, hd) && encode(e, (convert)(val)) && commit(e);  \
8906   }                                                                      \
8907   static bool encode_packed_##type(void *e, const void *hd, ctype val) { \
8908     UPB_UNUSED(hd);                                                      \
8909     return encode(e, (convert)(val));                                    \
8910   }
8911 
T(double,double,dbl2uint64,encode_fixed64)8912 T(double,   double,   dbl2uint64,   encode_fixed64)
8913 T(float,    float,    flt2uint32,   encode_fixed32)
8914 T(int64,    int64_t,  uint64_t,     encode_varint)
8915 T(int32,    int32_t,  uint32_t,     encode_varint)
8916 T(fixed64,  uint64_t, uint64_t,     encode_fixed64)
8917 T(fixed32,  uint32_t, uint32_t,     encode_fixed32)
8918 T(bool,     bool,     bool,         encode_varint)
8919 T(uint32,   uint32_t, uint32_t,     encode_varint)
8920 T(uint64,   uint64_t, uint64_t,     encode_varint)
8921 T(enum,     int32_t,  uint32_t,     encode_varint)
8922 T(sfixed32, int32_t,  uint32_t,     encode_fixed32)
8923 T(sfixed64, int64_t,  uint64_t,     encode_fixed64)
8924 T(sint32,   int32_t,  upb_zzenc_32, encode_varint)
8925 T(sint64,   int64_t,  upb_zzenc_64, encode_varint)
8926 
8927 #undef T
8928 
8929 
8930 /* code to build the handlers *************************************************/
8931 
8932 static void newhandlers_callback(const void *closure, upb_handlers *h) {
8933   const upb_msgdef *m;
8934   upb_msg_field_iter i;
8935 
8936   UPB_UNUSED(closure);
8937 
8938   upb_handlers_setstartmsg(h, startmsg, NULL);
8939   upb_handlers_setendmsg(h, endmsg, NULL);
8940 
8941   m = upb_handlers_msgdef(h);
8942   for(upb_msg_field_begin(&i, m);
8943       !upb_msg_field_done(&i);
8944       upb_msg_field_next(&i)) {
8945     const upb_fielddef *f = upb_msg_iter_field(&i);
8946     bool packed = upb_fielddef_isseq(f) && upb_fielddef_isprimitive(f) &&
8947                   upb_fielddef_packed(f);
8948     upb_handlerattr attr;
8949     upb_wiretype_t wt =
8950         packed ? UPB_WIRE_TYPE_DELIMITED
8951                : upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
8952 
8953     /* Pre-encode the tag for this field. */
8954     new_tag(h, f, wt, &attr);
8955 
8956     if (packed) {
8957       upb_handlers_setstartseq(h, f, encode_startdelimfield, &attr);
8958       upb_handlers_setendseq(h, f, encode_enddelimfield, &attr);
8959     }
8960 
8961 #define T(upper, lower, upbtype)                                     \
8962   case UPB_DESCRIPTOR_TYPE_##upper:                                  \
8963     if (packed) {                                                    \
8964       upb_handlers_set##upbtype(h, f, encode_packed_##lower, &attr); \
8965     } else {                                                         \
8966       upb_handlers_set##upbtype(h, f, encode_scalar_##lower, &attr); \
8967     }                                                                \
8968     break;
8969 
8970     switch (upb_fielddef_descriptortype(f)) {
8971       T(DOUBLE,   double,   double);
8972       T(FLOAT,    float,    float);
8973       T(INT64,    int64,    int64);
8974       T(INT32,    int32,    int32);
8975       T(FIXED64,  fixed64,  uint64);
8976       T(FIXED32,  fixed32,  uint32);
8977       T(BOOL,     bool,     bool);
8978       T(UINT32,   uint32,   uint32);
8979       T(UINT64,   uint64,   uint64);
8980       T(ENUM,     enum,     int32);
8981       T(SFIXED32, sfixed32, int32);
8982       T(SFIXED64, sfixed64, int64);
8983       T(SINT32,   sint32,   int32);
8984       T(SINT64,   sint64,   int64);
8985       case UPB_DESCRIPTOR_TYPE_STRING:
8986       case UPB_DESCRIPTOR_TYPE_BYTES:
8987         upb_handlers_setstartstr(h, f, encode_startstr, &attr);
8988         upb_handlers_setendstr(h, f, encode_enddelimfield, &attr);
8989         upb_handlers_setstring(h, f, encode_strbuf, &attr);
8990         break;
8991       case UPB_DESCRIPTOR_TYPE_MESSAGE:
8992         upb_handlers_setstartsubmsg(h, f, encode_startdelimfield, &attr);
8993         upb_handlers_setendsubmsg(h, f, encode_enddelimfield, &attr);
8994         break;
8995       case UPB_DESCRIPTOR_TYPE_GROUP: {
8996         /* Endgroup takes a different tag (wire_type = END_GROUP). */
8997         upb_handlerattr attr2;
8998         new_tag(h, f, UPB_WIRE_TYPE_END_GROUP, &attr2);
8999 
9000         upb_handlers_setstartsubmsg(h, f, encode_startgroup, &attr);
9001         upb_handlers_setendsubmsg(h, f, encode_endgroup, &attr2);
9002 
9003         upb_handlerattr_uninit(&attr2);
9004         break;
9005       }
9006     }
9007 
9008 #undef T
9009 
9010     upb_handlerattr_uninit(&attr);
9011   }
9012 }
9013 
upb_pb_encoder_reset(upb_pb_encoder * e)9014 void upb_pb_encoder_reset(upb_pb_encoder *e) {
9015   e->segptr = NULL;
9016   e->top = NULL;
9017   e->depth = 0;
9018 }
9019 
9020 
9021 /* public API *****************************************************************/
9022 
upb_pb_encoder_newhandlers(const upb_msgdef * m,const void * owner)9023 const upb_handlers *upb_pb_encoder_newhandlers(const upb_msgdef *m,
9024                                                const void *owner) {
9025   return upb_handlers_newfrozen(m, owner, newhandlers_callback, NULL);
9026 }
9027 
upb_pb_encoder_create(upb_env * env,const upb_handlers * h,upb_bytessink * output)9028 upb_pb_encoder *upb_pb_encoder_create(upb_env *env, const upb_handlers *h,
9029                                       upb_bytessink *output) {
9030   const size_t initial_bufsize = 256;
9031   const size_t initial_segbufsize = 16;
9032   /* TODO(haberman): make this configurable. */
9033   const size_t stack_size = 64;
9034 #ifndef NDEBUG
9035   const size_t size_before = upb_env_bytesallocated(env);
9036 #endif
9037 
9038   upb_pb_encoder *e = upb_env_malloc(env, sizeof(upb_pb_encoder));
9039   if (!e) return NULL;
9040 
9041   e->buf = upb_env_malloc(env, initial_bufsize);
9042   e->segbuf = upb_env_malloc(env, initial_segbufsize * sizeof(*e->segbuf));
9043   e->stack = upb_env_malloc(env, stack_size * sizeof(*e->stack));
9044 
9045   if (!e->buf || !e->segbuf || !e->stack) {
9046     return NULL;
9047   }
9048 
9049   e->limit = e->buf + initial_bufsize;
9050   e->seglimit = e->segbuf + initial_segbufsize;
9051   e->stacklimit = e->stack + stack_size;
9052 
9053   upb_pb_encoder_reset(e);
9054   upb_sink_reset(&e->input_, h, e);
9055 
9056   e->env = env;
9057   e->output_ = output;
9058   e->subc = output->closure;
9059   e->ptr = e->buf;
9060 
9061   /* If this fails, increase the value in encoder.h. */
9062   assert(upb_env_bytesallocated(env) - size_before <= UPB_PB_ENCODER_SIZE);
9063   return e;
9064 }
9065 
upb_pb_encoder_input(upb_pb_encoder * e)9066 upb_sink *upb_pb_encoder_input(upb_pb_encoder *e) { return &e->input_; }
9067 
9068 
9069 #include <stdio.h>
9070 #include <stdlib.h>
9071 #include <string.h>
9072 
upb_load_defs_from_descriptor(const char * str,size_t len,int * n,void * owner,upb_status * status)9073 upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n,
9074                                         void *owner, upb_status *status) {
9075   /* Create handlers. */
9076   const upb_pbdecodermethod *decoder_m;
9077   const upb_handlers *reader_h = upb_descreader_newhandlers(&reader_h);
9078   upb_env env;
9079   upb_pbdecodermethodopts opts;
9080   upb_pbdecoder *decoder;
9081   upb_descreader *reader;
9082   bool ok;
9083   upb_def **ret = NULL;
9084   upb_def **defs;
9085 
9086   upb_pbdecodermethodopts_init(&opts, reader_h);
9087   decoder_m = upb_pbdecodermethod_new(&opts, &decoder_m);
9088 
9089   upb_env_init(&env);
9090   upb_env_reporterrorsto(&env, status);
9091 
9092   reader = upb_descreader_create(&env, reader_h);
9093   decoder = upb_pbdecoder_create(&env, decoder_m, upb_descreader_input(reader));
9094 
9095   /* Push input data. */
9096   ok = upb_bufsrc_putbuf(str, len, upb_pbdecoder_input(decoder));
9097 
9098   if (!ok) goto cleanup;
9099   defs = upb_descreader_getdefs(reader, owner, n);
9100   ret = malloc(sizeof(upb_def*) * (*n));
9101   memcpy(ret, defs, sizeof(upb_def*) * (*n));
9102 
9103 cleanup:
9104   upb_env_uninit(&env);
9105   upb_handlers_unref(reader_h, &reader_h);
9106   upb_pbdecodermethod_unref(decoder_m, &decoder_m);
9107   return ret;
9108 }
9109 
upb_load_descriptor_into_symtab(upb_symtab * s,const char * str,size_t len,upb_status * status)9110 bool upb_load_descriptor_into_symtab(upb_symtab *s, const char *str, size_t len,
9111                                      upb_status *status) {
9112   int n;
9113   bool success;
9114   upb_def **defs = upb_load_defs_from_descriptor(str, len, &n, &defs, status);
9115   if (!defs) return false;
9116   success = upb_symtab_add(s, defs, n, &defs, status);
9117   free(defs);
9118   return success;
9119 }
9120 
upb_readfile(const char * filename,size_t * len)9121 char *upb_readfile(const char *filename, size_t *len) {
9122   long size;
9123   char *buf;
9124   FILE *f = fopen(filename, "rb");
9125   if(!f) return NULL;
9126   if(fseek(f, 0, SEEK_END) != 0) goto error;
9127   size = ftell(f);
9128   if(size < 0) goto error;
9129   if(fseek(f, 0, SEEK_SET) != 0) goto error;
9130   buf = malloc(size + 1);
9131   if(size && fread(buf, size, 1, f) != 1) goto error;
9132   fclose(f);
9133   if (len) *len = size;
9134   return buf;
9135 
9136 error:
9137   fclose(f);
9138   return NULL;
9139 }
9140 
upb_load_descriptor_file_into_symtab(upb_symtab * symtab,const char * fname,upb_status * status)9141 bool upb_load_descriptor_file_into_symtab(upb_symtab *symtab, const char *fname,
9142                                           upb_status *status) {
9143   size_t len;
9144   bool success;
9145   char *data = upb_readfile(fname, &len);
9146   if (!data) {
9147     if (status) upb_status_seterrf(status, "Couldn't read file: %s", fname);
9148     return false;
9149   }
9150   success = upb_load_descriptor_into_symtab(symtab, data, len, status);
9151   free(data);
9152   return success;
9153 }
9154 /*
9155  * upb::pb::TextPrinter
9156  *
9157  * OPT: This is not optimized at all.  It uses printf() which parses the format
9158  * string every time, and it allocates memory for every put.
9159  */
9160 
9161 
9162 #include <ctype.h>
9163 #include <float.h>
9164 #include <inttypes.h>
9165 #include <stdarg.h>
9166 #include <stdio.h>
9167 #include <stdlib.h>
9168 #include <string.h>
9169 
9170 
9171 struct upb_textprinter {
9172   upb_sink input_;
9173   upb_bytessink *output_;
9174   int indent_depth_;
9175   bool single_line_;
9176   void *subc;
9177 };
9178 
9179 #define CHECK(x) if ((x) < 0) goto err;
9180 
shortname(const char * longname)9181 static const char *shortname(const char *longname) {
9182   const char *last = strrchr(longname, '.');
9183   return last ? last + 1 : longname;
9184 }
9185 
indent(upb_textprinter * p)9186 static int indent(upb_textprinter *p) {
9187   int i;
9188   if (!p->single_line_)
9189     for (i = 0; i < p->indent_depth_; i++)
9190       upb_bytessink_putbuf(p->output_, p->subc, "  ", 2, NULL);
9191   return 0;
9192 }
9193 
endfield(upb_textprinter * p)9194 static int endfield(upb_textprinter *p) {
9195   const char ch = (p->single_line_ ? ' ' : '\n');
9196   upb_bytessink_putbuf(p->output_, p->subc, &ch, 1, NULL);
9197   return 0;
9198 }
9199 
putescaped(upb_textprinter * p,const char * buf,size_t len,bool preserve_utf8)9200 static int putescaped(upb_textprinter *p, const char *buf, size_t len,
9201                       bool preserve_utf8) {
9202   /* Based on CEscapeInternal() from Google's protobuf release. */
9203   char dstbuf[4096], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf);
9204   const char *end = buf + len;
9205 
9206   /* I think hex is prettier and more useful, but proto2 uses octal; should
9207    * investigate whether it can parse hex also. */
9208   const bool use_hex = false;
9209   bool last_hex_escape = false; /* true if last output char was \xNN */
9210 
9211   for (; buf < end; buf++) {
9212     bool is_hex_escape;
9213 
9214     if (dstend - dst < 4) {
9215       upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL);
9216       dst = dstbuf;
9217     }
9218 
9219     is_hex_escape = false;
9220     switch (*buf) {
9221       case '\n': *(dst++) = '\\'; *(dst++) = 'n';  break;
9222       case '\r': *(dst++) = '\\'; *(dst++) = 'r';  break;
9223       case '\t': *(dst++) = '\\'; *(dst++) = 't';  break;
9224       case '\"': *(dst++) = '\\'; *(dst++) = '\"'; break;
9225       case '\'': *(dst++) = '\\'; *(dst++) = '\''; break;
9226       case '\\': *(dst++) = '\\'; *(dst++) = '\\'; break;
9227       default:
9228         /* Note that if we emit \xNN and the buf character after that is a hex
9229          * digit then that digit must be escaped too to prevent it being
9230          * interpreted as part of the character code by C. */
9231         if ((!preserve_utf8 || (uint8_t)*buf < 0x80) &&
9232             (!isprint(*buf) || (last_hex_escape && isxdigit(*buf)))) {
9233           sprintf(dst, (use_hex ? "\\x%02x" : "\\%03o"), (uint8_t)*buf);
9234           is_hex_escape = use_hex;
9235           dst += 4;
9236         } else {
9237           *(dst++) = *buf; break;
9238         }
9239     }
9240     last_hex_escape = is_hex_escape;
9241   }
9242   /* Flush remaining data. */
9243   upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL);
9244   return 0;
9245 }
9246 
putf(upb_textprinter * p,const char * fmt,...)9247 bool putf(upb_textprinter *p, const char *fmt, ...) {
9248   va_list args;
9249   va_list args_copy;
9250   char *str;
9251   int written;
9252   int len;
9253   bool ok;
9254 
9255   va_start(args, fmt);
9256 
9257   /* Run once to get the length of the string. */
9258   _upb_va_copy(args_copy, args);
9259   len = _upb_vsnprintf(NULL, 0, fmt, args_copy);
9260   va_end(args_copy);
9261 
9262   /* + 1 for NULL terminator (vsprintf() requires it even if we don't). */
9263   str = malloc(len + 1);
9264   if (!str) return false;
9265   written = vsprintf(str, fmt, args);
9266   va_end(args);
9267   UPB_ASSERT_VAR(written, written == len);
9268 
9269   ok = upb_bytessink_putbuf(p->output_, p->subc, str, len, NULL);
9270   free(str);
9271   return ok;
9272 }
9273 
9274 
9275 /* handlers *******************************************************************/
9276 
textprinter_startmsg(void * c,const void * hd)9277 static bool textprinter_startmsg(void *c, const void *hd) {
9278   upb_textprinter *p = c;
9279   UPB_UNUSED(hd);
9280   if (p->indent_depth_ == 0) {
9281     upb_bytessink_start(p->output_, 0, &p->subc);
9282   }
9283   return true;
9284 }
9285 
textprinter_endmsg(void * c,const void * hd,upb_status * s)9286 static bool textprinter_endmsg(void *c, const void *hd, upb_status *s) {
9287   upb_textprinter *p = c;
9288   UPB_UNUSED(hd);
9289   UPB_UNUSED(s);
9290   if (p->indent_depth_ == 0) {
9291     upb_bytessink_end(p->output_);
9292   }
9293   return true;
9294 }
9295 
9296 #define TYPE(name, ctype, fmt) \
9297   static bool textprinter_put ## name(void *closure, const void *handler_data, \
9298                                       ctype val) {                             \
9299     upb_textprinter *p = closure;                                              \
9300     const upb_fielddef *f = handler_data;                                      \
9301     CHECK(indent(p));                                                          \
9302     putf(p, "%s: " fmt, upb_fielddef_name(f), val);                            \
9303     CHECK(endfield(p));                                                        \
9304     return true;                                                               \
9305   err:                                                                         \
9306     return false;                                                              \
9307 }
9308 
textprinter_putbool(void * closure,const void * handler_data,bool val)9309 static bool textprinter_putbool(void *closure, const void *handler_data,
9310                                 bool val) {
9311   upb_textprinter *p = closure;
9312   const upb_fielddef *f = handler_data;
9313   CHECK(indent(p));
9314   putf(p, "%s: %s", upb_fielddef_name(f), val ? "true" : "false");
9315   CHECK(endfield(p));
9316   return true;
9317 err:
9318   return false;
9319 }
9320 
9321 #define STRINGIFY_HELPER(x) #x
9322 #define STRINGIFY_MACROVAL(x) STRINGIFY_HELPER(x)
9323 
9324 TYPE(int32,  int32_t,  "%" PRId32)
9325 TYPE(int64,  int64_t,  "%" PRId64)
9326 TYPE(uint32, uint32_t, "%" PRIu32)
9327 TYPE(uint64, uint64_t, "%" PRIu64)
STRINGIFY_MACROVAL(FLT_DIG)9328 TYPE(float,  float,    "%." STRINGIFY_MACROVAL(FLT_DIG) "g")
9329 TYPE(double, double,   "%." STRINGIFY_MACROVAL(DBL_DIG) "g")
9330 
9331 #undef TYPE
9332 
9333 /* Output a symbolic value from the enum if found, else just print as int32. */
9334 static bool textprinter_putenum(void *closure, const void *handler_data,
9335                                 int32_t val) {
9336   upb_textprinter *p = closure;
9337   const upb_fielddef *f = handler_data;
9338   const upb_enumdef *enum_def = upb_downcast_enumdef(upb_fielddef_subdef(f));
9339   const char *label = upb_enumdef_iton(enum_def, val);
9340   if (label) {
9341     indent(p);
9342     putf(p, "%s: %s", upb_fielddef_name(f), label);
9343     endfield(p);
9344   } else {
9345     if (!textprinter_putint32(closure, handler_data, val))
9346       return false;
9347   }
9348   return true;
9349 }
9350 
textprinter_startstr(void * closure,const void * handler_data,size_t size_hint)9351 static void *textprinter_startstr(void *closure, const void *handler_data,
9352                       size_t size_hint) {
9353   upb_textprinter *p = closure;
9354   const upb_fielddef *f = handler_data;
9355   UPB_UNUSED(size_hint);
9356   indent(p);
9357   putf(p, "%s: \"", upb_fielddef_name(f));
9358   return p;
9359 }
9360 
textprinter_endstr(void * closure,const void * handler_data)9361 static bool textprinter_endstr(void *closure, const void *handler_data) {
9362   upb_textprinter *p = closure;
9363   UPB_UNUSED(handler_data);
9364   putf(p, "\"");
9365   endfield(p);
9366   return true;
9367 }
9368 
textprinter_putstr(void * closure,const void * hd,const char * buf,size_t len,const upb_bufhandle * handle)9369 static size_t textprinter_putstr(void *closure, const void *hd, const char *buf,
9370                                  size_t len, const upb_bufhandle *handle) {
9371   upb_textprinter *p = closure;
9372   const upb_fielddef *f = hd;
9373   UPB_UNUSED(handle);
9374   CHECK(putescaped(p, buf, len, upb_fielddef_type(f) == UPB_TYPE_STRING));
9375   return len;
9376 err:
9377   return 0;
9378 }
9379 
textprinter_startsubmsg(void * closure,const void * handler_data)9380 static void *textprinter_startsubmsg(void *closure, const void *handler_data) {
9381   upb_textprinter *p = closure;
9382   const char *name = handler_data;
9383   CHECK(indent(p));
9384   putf(p, "%s {%c", name, p->single_line_ ? ' ' : '\n');
9385   p->indent_depth_++;
9386   return p;
9387 err:
9388   return UPB_BREAK;
9389 }
9390 
textprinter_endsubmsg(void * closure,const void * handler_data)9391 static bool textprinter_endsubmsg(void *closure, const void *handler_data) {
9392   upb_textprinter *p = closure;
9393   UPB_UNUSED(handler_data);
9394   p->indent_depth_--;
9395   CHECK(indent(p));
9396   upb_bytessink_putbuf(p->output_, p->subc, "}", 1, NULL);
9397   CHECK(endfield(p));
9398   return true;
9399 err:
9400   return false;
9401 }
9402 
onmreg(const void * c,upb_handlers * h)9403 static void onmreg(const void *c, upb_handlers *h) {
9404   const upb_msgdef *m = upb_handlers_msgdef(h);
9405   upb_msg_field_iter i;
9406   UPB_UNUSED(c);
9407 
9408   upb_handlers_setstartmsg(h, textprinter_startmsg, NULL);
9409   upb_handlers_setendmsg(h, textprinter_endmsg, NULL);
9410 
9411   for(upb_msg_field_begin(&i, m);
9412       !upb_msg_field_done(&i);
9413       upb_msg_field_next(&i)) {
9414     upb_fielddef *f = upb_msg_iter_field(&i);
9415     upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
9416     upb_handlerattr_sethandlerdata(&attr, f);
9417     switch (upb_fielddef_type(f)) {
9418       case UPB_TYPE_INT32:
9419         upb_handlers_setint32(h, f, textprinter_putint32, &attr);
9420         break;
9421       case UPB_TYPE_INT64:
9422         upb_handlers_setint64(h, f, textprinter_putint64, &attr);
9423         break;
9424       case UPB_TYPE_UINT32:
9425         upb_handlers_setuint32(h, f, textprinter_putuint32, &attr);
9426         break;
9427       case UPB_TYPE_UINT64:
9428         upb_handlers_setuint64(h, f, textprinter_putuint64, &attr);
9429         break;
9430       case UPB_TYPE_FLOAT:
9431         upb_handlers_setfloat(h, f, textprinter_putfloat, &attr);
9432         break;
9433       case UPB_TYPE_DOUBLE:
9434         upb_handlers_setdouble(h, f, textprinter_putdouble, &attr);
9435         break;
9436       case UPB_TYPE_BOOL:
9437         upb_handlers_setbool(h, f, textprinter_putbool, &attr);
9438         break;
9439       case UPB_TYPE_STRING:
9440       case UPB_TYPE_BYTES:
9441         upb_handlers_setstartstr(h, f, textprinter_startstr, &attr);
9442         upb_handlers_setstring(h, f, textprinter_putstr, &attr);
9443         upb_handlers_setendstr(h, f, textprinter_endstr, &attr);
9444         break;
9445       case UPB_TYPE_MESSAGE: {
9446         const char *name =
9447             upb_fielddef_istagdelim(f)
9448                 ? shortname(upb_msgdef_fullname(upb_fielddef_msgsubdef(f)))
9449                 : upb_fielddef_name(f);
9450         upb_handlerattr_sethandlerdata(&attr, name);
9451         upb_handlers_setstartsubmsg(h, f, textprinter_startsubmsg, &attr);
9452         upb_handlers_setendsubmsg(h, f, textprinter_endsubmsg, &attr);
9453         break;
9454       }
9455       case UPB_TYPE_ENUM:
9456         upb_handlers_setint32(h, f, textprinter_putenum, &attr);
9457         break;
9458     }
9459   }
9460 }
9461 
textprinter_reset(upb_textprinter * p,bool single_line)9462 static void textprinter_reset(upb_textprinter *p, bool single_line) {
9463   p->single_line_ = single_line;
9464   p->indent_depth_ = 0;
9465 }
9466 
9467 
9468 /* Public API *****************************************************************/
9469 
upb_textprinter_create(upb_env * env,const upb_handlers * h,upb_bytessink * output)9470 upb_textprinter *upb_textprinter_create(upb_env *env, const upb_handlers *h,
9471                                         upb_bytessink *output) {
9472   upb_textprinter *p = upb_env_malloc(env, sizeof(upb_textprinter));
9473   if (!p) return NULL;
9474 
9475   p->output_ = output;
9476   upb_sink_reset(&p->input_, h, p);
9477   textprinter_reset(p, false);
9478 
9479   return p;
9480 }
9481 
upb_textprinter_newhandlers(const upb_msgdef * m,const void * owner)9482 const upb_handlers *upb_textprinter_newhandlers(const upb_msgdef *m,
9483                                                 const void *owner) {
9484   return upb_handlers_newfrozen(m, owner, &onmreg, NULL);
9485 }
9486 
upb_textprinter_input(upb_textprinter * p)9487 upb_sink *upb_textprinter_input(upb_textprinter *p) { return &p->input_; }
9488 
upb_textprinter_setsingleline(upb_textprinter * p,bool single_line)9489 void upb_textprinter_setsingleline(upb_textprinter *p, bool single_line) {
9490   p->single_line_ = single_line;
9491 }
9492 
9493 
9494 /* Index is descriptor type. */
9495 const uint8_t upb_pb_native_wire_types[] = {
9496   UPB_WIRE_TYPE_END_GROUP,     /* ENDGROUP */
9497   UPB_WIRE_TYPE_64BIT,         /* DOUBLE */
9498   UPB_WIRE_TYPE_32BIT,         /* FLOAT */
9499   UPB_WIRE_TYPE_VARINT,        /* INT64 */
9500   UPB_WIRE_TYPE_VARINT,        /* UINT64 */
9501   UPB_WIRE_TYPE_VARINT,        /* INT32 */
9502   UPB_WIRE_TYPE_64BIT,         /* FIXED64 */
9503   UPB_WIRE_TYPE_32BIT,         /* FIXED32 */
9504   UPB_WIRE_TYPE_VARINT,        /* BOOL */
9505   UPB_WIRE_TYPE_DELIMITED,     /* STRING */
9506   UPB_WIRE_TYPE_START_GROUP,   /* GROUP */
9507   UPB_WIRE_TYPE_DELIMITED,     /* MESSAGE */
9508   UPB_WIRE_TYPE_DELIMITED,     /* BYTES */
9509   UPB_WIRE_TYPE_VARINT,        /* UINT32 */
9510   UPB_WIRE_TYPE_VARINT,        /* ENUM */
9511   UPB_WIRE_TYPE_32BIT,         /* SFIXED32 */
9512   UPB_WIRE_TYPE_64BIT,         /* SFIXED64 */
9513   UPB_WIRE_TYPE_VARINT,        /* SINT32 */
9514   UPB_WIRE_TYPE_VARINT,        /* SINT64 */
9515 };
9516 
9517 /* A basic branch-based decoder, uses 32-bit values to get good performance
9518  * on 32-bit architectures (but performs well on 64-bits also).
9519  * This scheme comes from the original Google Protobuf implementation
9520  * (proto2). */
upb_vdecode_max8_branch32(upb_decoderet r)9521 upb_decoderet upb_vdecode_max8_branch32(upb_decoderet r) {
9522   upb_decoderet err = {NULL, 0};
9523   const char *p = r.p;
9524   uint32_t low = (uint32_t)r.val;
9525   uint32_t high = 0;
9526   uint32_t b;
9527   b = *(p++); low  |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done;
9528   b = *(p++); low  |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done;
9529   b = *(p++); low  |= (b & 0x7fU) << 28;
9530               high  = (b & 0x7fU) >>  4; if (!(b & 0x80)) goto done;
9531   b = *(p++); high |= (b & 0x7fU) <<  3; if (!(b & 0x80)) goto done;
9532   b = *(p++); high |= (b & 0x7fU) << 10; if (!(b & 0x80)) goto done;
9533   b = *(p++); high |= (b & 0x7fU) << 17; if (!(b & 0x80)) goto done;
9534   b = *(p++); high |= (b & 0x7fU) << 24; if (!(b & 0x80)) goto done;
9535   b = *(p++); high |= (b & 0x7fU) << 31; if (!(b & 0x80)) goto done;
9536   return err;
9537 
9538 done:
9539   r.val = ((uint64_t)high << 32) | low;
9540   r.p = p;
9541   return r;
9542 }
9543 
9544 /* Like the previous, but uses 64-bit values. */
upb_vdecode_max8_branch64(upb_decoderet r)9545 upb_decoderet upb_vdecode_max8_branch64(upb_decoderet r) {
9546   const char *p = r.p;
9547   uint64_t val = r.val;
9548   uint64_t b;
9549   upb_decoderet err = {NULL, 0};
9550   b = *(p++); val |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done;
9551   b = *(p++); val |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done;
9552   b = *(p++); val |= (b & 0x7fU) << 28; if (!(b & 0x80)) goto done;
9553   b = *(p++); val |= (b & 0x7fU) << 35; if (!(b & 0x80)) goto done;
9554   b = *(p++); val |= (b & 0x7fU) << 42; if (!(b & 0x80)) goto done;
9555   b = *(p++); val |= (b & 0x7fU) << 49; if (!(b & 0x80)) goto done;
9556   b = *(p++); val |= (b & 0x7fU) << 56; if (!(b & 0x80)) goto done;
9557   b = *(p++); val |= (b & 0x7fU) << 63; if (!(b & 0x80)) goto done;
9558   return err;
9559 
9560 done:
9561   r.val = val;
9562   r.p = p;
9563   return r;
9564 }
9565 
9566 /* Given an encoded varint v, returns an integer with a single bit set that
9567  * indicates the end of the varint.  Subtracting one from this value will
9568  * yield a mask that leaves only bits that are part of the varint.  Returns
9569  * 0 if the varint is unterminated. */
upb_get_vstopbit(uint64_t v)9570 static uint64_t upb_get_vstopbit(uint64_t v) {
9571   uint64_t cbits = v | 0x7f7f7f7f7f7f7f7fULL;
9572   return ~cbits & (cbits+1);
9573 }
9574 
9575 /* A branchless decoder.  Credit to Pascal Massimino for the bit-twiddling. */
upb_vdecode_max8_massimino(upb_decoderet r)9576 upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r) {
9577   uint64_t b;
9578   uint64_t stop_bit;
9579   upb_decoderet my_r;
9580   memcpy(&b, r.p, sizeof(b));
9581   stop_bit = upb_get_vstopbit(b);
9582   b =  (b & 0x7f7f7f7f7f7f7f7fULL) & (stop_bit - 1);
9583   b +=       b & 0x007f007f007f007fULL;
9584   b +=  3 * (b & 0x0000ffff0000ffffULL);
9585   b += 15 * (b & 0x00000000ffffffffULL);
9586   if (stop_bit == 0) {
9587     /* Error: unterminated varint. */
9588     upb_decoderet err_r = {(void*)0, 0};
9589     return err_r;
9590   }
9591   my_r = upb_decoderet_make(r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
9592                             r.val | (b << 7));
9593   return my_r;
9594 }
9595 
9596 /* A branchless decoder.  Credit to Daniel Wright for the bit-twiddling. */
upb_vdecode_max8_wright(upb_decoderet r)9597 upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) {
9598   uint64_t b;
9599   uint64_t stop_bit;
9600   upb_decoderet my_r;
9601   memcpy(&b, r.p, sizeof(b));
9602   stop_bit = upb_get_vstopbit(b);
9603   b &= (stop_bit - 1);
9604   b = ((b & 0x7f007f007f007f00ULL) >> 1) | (b & 0x007f007f007f007fULL);
9605   b = ((b & 0xffff0000ffff0000ULL) >> 2) | (b & 0x0000ffff0000ffffULL);
9606   b = ((b & 0xffffffff00000000ULL) >> 4) | (b & 0x00000000ffffffffULL);
9607   if (stop_bit == 0) {
9608     /* Error: unterminated varint. */
9609     upb_decoderet err_r = {(void*)0, 0};
9610     return err_r;
9611   }
9612   my_r = upb_decoderet_make(r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
9613                             r.val | (b << 14));
9614   return my_r;
9615 }
9616 
9617 #line 1 "upb/json/parser.rl"
9618 /*
9619 ** upb::json::Parser (upb_json_parser)
9620 **
9621 ** A parser that uses the Ragel State Machine Compiler to generate
9622 ** the finite automata.
9623 **
9624 ** Ragel only natively handles regular languages, but we can manually
9625 ** program it a bit to handle context-free languages like JSON, by using
9626 ** the "fcall" and "fret" constructs.
9627 **
9628 ** This parser can handle the basics, but needs several things to be fleshed
9629 ** out:
9630 **
9631 ** - handling of unicode escape sequences (including high surrogate pairs).
9632 ** - properly check and report errors for unknown fields, stack overflow,
9633 **   improper array nesting (or lack of nesting).
9634 ** - handling of base64 sequences with padding characters.
9635 ** - handling of push-back (non-success returns from sink functions).
9636 ** - handling of keys/escape-sequences/etc that span input buffers.
9637 */
9638 
9639 #include <stdio.h>
9640 #include <stdint.h>
9641 #include <assert.h>
9642 #include <string.h>
9643 #include <stdlib.h>
9644 #include <errno.h>
9645 
9646 
9647 #define UPB_JSON_MAX_DEPTH 64
9648 
9649 typedef struct {
9650   upb_sink sink;
9651 
9652   /* The current message in which we're parsing, and the field whose value we're
9653    * expecting next. */
9654   const upb_msgdef *m;
9655   const upb_fielddef *f;
9656 
9657   /* We are in a repeated-field context, ready to emit mapentries as
9658    * submessages. This flag alters the start-of-object (open-brace) behavior to
9659    * begin a sequence of mapentry messages rather than a single submessage. */
9660   bool is_map;
9661 
9662   /* We are in a map-entry message context. This flag is set when parsing the
9663    * value field of a single map entry and indicates to all value-field parsers
9664    * (subobjects, strings, numbers, and bools) that the map-entry submessage
9665    * should end as soon as the value is parsed. */
9666   bool is_mapentry;
9667 
9668   /* If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent
9669    * message's map field that we're currently parsing. This differs from |f|
9670    * because |f| is the field in the *current* message (i.e., the map-entry
9671    * message itself), not the parent's field that leads to this map. */
9672   const upb_fielddef *mapfield;
9673 } upb_jsonparser_frame;
9674 
9675 struct upb_json_parser {
9676   upb_env *env;
9677   upb_byteshandler input_handler_;
9678   upb_bytessink input_;
9679 
9680   /* Stack to track the JSON scopes we are in. */
9681   upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH];
9682   upb_jsonparser_frame *top;
9683   upb_jsonparser_frame *limit;
9684 
9685   upb_status status;
9686 
9687   /* Ragel's internal parsing stack for the parsing state machine. */
9688   int current_state;
9689   int parser_stack[UPB_JSON_MAX_DEPTH];
9690   int parser_top;
9691 
9692   /* The handle for the current buffer. */
9693   const upb_bufhandle *handle;
9694 
9695   /* Accumulate buffer.  See details in parser.rl. */
9696   const char *accumulated;
9697   size_t accumulated_len;
9698   char *accumulate_buf;
9699   size_t accumulate_buf_size;
9700 
9701   /* Multi-part text data.  See details in parser.rl. */
9702   int multipart_state;
9703   upb_selector_t string_selector;
9704 
9705   /* Input capture.  See details in parser.rl. */
9706   const char *capture;
9707 
9708   /* Intermediate result of parsing a unicode escape sequence. */
9709   uint32_t digit;
9710 };
9711 
9712 #define PARSER_CHECK_RETURN(x) if (!(x)) return false
9713 
9714 /* Used to signal that a capture has been suspended. */
9715 static char suspend_capture;
9716 
getsel_for_handlertype(upb_json_parser * p,upb_handlertype_t type)9717 static upb_selector_t getsel_for_handlertype(upb_json_parser *p,
9718                                              upb_handlertype_t type) {
9719   upb_selector_t sel;
9720   bool ok = upb_handlers_getselector(p->top->f, type, &sel);
9721   UPB_ASSERT_VAR(ok, ok);
9722   return sel;
9723 }
9724 
parser_getsel(upb_json_parser * p)9725 static upb_selector_t parser_getsel(upb_json_parser *p) {
9726   return getsel_for_handlertype(
9727       p, upb_handlers_getprimitivehandlertype(p->top->f));
9728 }
9729 
check_stack(upb_json_parser * p)9730 static bool check_stack(upb_json_parser *p) {
9731   if ((p->top + 1) == p->limit) {
9732     upb_status_seterrmsg(&p->status, "Nesting too deep");
9733     upb_env_reporterror(p->env, &p->status);
9734     return false;
9735   }
9736 
9737   return true;
9738 }
9739 
9740 /* There are GCC/Clang built-ins for overflow checking which we could start
9741  * using if there was any performance benefit to it. */
9742 
checked_add(size_t a,size_t b,size_t * c)9743 static bool checked_add(size_t a, size_t b, size_t *c) {
9744   if (SIZE_MAX - a < b) return false;
9745   *c = a + b;
9746   return true;
9747 }
9748 
saturating_multiply(size_t a,size_t b)9749 static size_t saturating_multiply(size_t a, size_t b) {
9750   /* size_t is unsigned, so this is defined behavior even on overflow. */
9751   size_t ret = a * b;
9752   if (b != 0 && ret / b != a) {
9753     ret = SIZE_MAX;
9754   }
9755   return ret;
9756 }
9757 
9758 
9759 /* Base64 decoding ************************************************************/
9760 
9761 /* TODO(haberman): make this streaming. */
9762 
9763 static const signed char b64table[] = {
9764   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9765   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9766   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9767   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9768   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9769   -1,      -1,      -1,      62/*+*/, -1,      -1,      -1,      63/*/ */,
9770   52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/,
9771   60/*8*/, 61/*9*/, -1,      -1,      -1,      -1,      -1,      -1,
9772   -1,       0/*A*/,  1/*B*/,  2/*C*/,  3/*D*/,  4/*E*/,  5/*F*/,  6/*G*/,
9773   07/*H*/,  8/*I*/,  9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/,
9774   15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/,
9775   23/*X*/, 24/*Y*/, 25/*Z*/, -1,      -1,      -1,      -1,      -1,
9776   -1,      26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/,
9777   33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/,
9778   41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/,
9779   49/*x*/, 50/*y*/, 51/*z*/, -1,      -1,      -1,      -1,      -1,
9780   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9781   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9782   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9783   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9784   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9785   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9786   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9787   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9788   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9789   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9790   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9791   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9792   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9793   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9794   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9795   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1
9796 };
9797 
9798 /* Returns the table value sign-extended to 32 bits.  Knowing that the upper
9799  * bits will be 1 for unrecognized characters makes it easier to check for
9800  * this error condition later (see below). */
b64lookup(unsigned char ch)9801 int32_t b64lookup(unsigned char ch) { return b64table[ch]; }
9802 
9803 /* Returns true if the given character is not a valid base64 character or
9804  * padding. */
nonbase64(unsigned char ch)9805 bool nonbase64(unsigned char ch) { return b64lookup(ch) == -1 && ch != '='; }
9806 
base64_push(upb_json_parser * p,upb_selector_t sel,const char * ptr,size_t len)9807 static bool base64_push(upb_json_parser *p, upb_selector_t sel, const char *ptr,
9808                         size_t len) {
9809   const char *limit = ptr + len;
9810   for (; ptr < limit; ptr += 4) {
9811     uint32_t val;
9812     char output[3];
9813 
9814     if (limit - ptr < 4) {
9815       upb_status_seterrf(&p->status,
9816                          "Base64 input for bytes field not a multiple of 4: %s",
9817                          upb_fielddef_name(p->top->f));
9818       upb_env_reporterror(p->env, &p->status);
9819       return false;
9820     }
9821 
9822     val = b64lookup(ptr[0]) << 18 |
9823           b64lookup(ptr[1]) << 12 |
9824           b64lookup(ptr[2]) << 6  |
9825           b64lookup(ptr[3]);
9826 
9827     /* Test the upper bit; returns true if any of the characters returned -1. */
9828     if (val & 0x80000000) {
9829       goto otherchar;
9830     }
9831 
9832     output[0] = val >> 16;
9833     output[1] = (val >> 8) & 0xff;
9834     output[2] = val & 0xff;
9835     upb_sink_putstring(&p->top->sink, sel, output, 3, NULL);
9836   }
9837   return true;
9838 
9839 otherchar:
9840   if (nonbase64(ptr[0]) || nonbase64(ptr[1]) || nonbase64(ptr[2]) ||
9841       nonbase64(ptr[3]) ) {
9842     upb_status_seterrf(&p->status,
9843                        "Non-base64 characters in bytes field: %s",
9844                        upb_fielddef_name(p->top->f));
9845     upb_env_reporterror(p->env, &p->status);
9846     return false;
9847   } if (ptr[2] == '=') {
9848     uint32_t val;
9849     char output;
9850 
9851     /* Last group contains only two input bytes, one output byte. */
9852     if (ptr[0] == '=' || ptr[1] == '=' || ptr[3] != '=') {
9853       goto badpadding;
9854     }
9855 
9856     val = b64lookup(ptr[0]) << 18 |
9857           b64lookup(ptr[1]) << 12;
9858 
9859     assert(!(val & 0x80000000));
9860     output = val >> 16;
9861     upb_sink_putstring(&p->top->sink, sel, &output, 1, NULL);
9862     return true;
9863   } else {
9864     uint32_t val;
9865     char output[2];
9866 
9867     /* Last group contains only three input bytes, two output bytes. */
9868     if (ptr[0] == '=' || ptr[1] == '=' || ptr[2] == '=') {
9869       goto badpadding;
9870     }
9871 
9872     val = b64lookup(ptr[0]) << 18 |
9873           b64lookup(ptr[1]) << 12 |
9874           b64lookup(ptr[2]) << 6;
9875 
9876     output[0] = val >> 16;
9877     output[1] = (val >> 8) & 0xff;
9878     upb_sink_putstring(&p->top->sink, sel, output, 2, NULL);
9879     return true;
9880   }
9881 
9882 badpadding:
9883   upb_status_seterrf(&p->status,
9884                      "Incorrect base64 padding for field: %s (%.*s)",
9885                      upb_fielddef_name(p->top->f),
9886                      4, ptr);
9887   upb_env_reporterror(p->env, &p->status);
9888   return false;
9889 }
9890 
9891 
9892 /* Accumulate buffer **********************************************************/
9893 
9894 /* Functionality for accumulating a buffer.
9895  *
9896  * Some parts of the parser need an entire value as a contiguous string.  For
9897  * example, to look up a member name in a hash table, or to turn a string into
9898  * a number, the relevant library routines need the input string to be in
9899  * contiguous memory, even if the value spanned two or more buffers in the
9900  * input.  These routines handle that.
9901  *
9902  * In the common case we can just point to the input buffer to get this
9903  * contiguous string and avoid any actual copy.  So we optimistically begin
9904  * this way.  But there are a few cases where we must instead copy into a
9905  * separate buffer:
9906  *
9907  *   1. The string was not contiguous in the input (it spanned buffers).
9908  *
9909  *   2. The string included escape sequences that need to be interpreted to get
9910  *      the true value in a contiguous buffer. */
9911 
assert_accumulate_empty(upb_json_parser * p)9912 static void assert_accumulate_empty(upb_json_parser *p) {
9913   UPB_UNUSED(p);
9914   assert(p->accumulated == NULL);
9915   assert(p->accumulated_len == 0);
9916 }
9917 
accumulate_clear(upb_json_parser * p)9918 static void accumulate_clear(upb_json_parser *p) {
9919   p->accumulated = NULL;
9920   p->accumulated_len = 0;
9921 }
9922 
9923 /* Used internally by accumulate_append(). */
accumulate_realloc(upb_json_parser * p,size_t need)9924 static bool accumulate_realloc(upb_json_parser *p, size_t need) {
9925   void *mem;
9926   size_t old_size = p->accumulate_buf_size;
9927   size_t new_size = UPB_MAX(old_size, 128);
9928   while (new_size < need) {
9929     new_size = saturating_multiply(new_size, 2);
9930   }
9931 
9932   mem = upb_env_realloc(p->env, p->accumulate_buf, old_size, new_size);
9933   if (!mem) {
9934     upb_status_seterrmsg(&p->status, "Out of memory allocating buffer.");
9935     upb_env_reporterror(p->env, &p->status);
9936     return false;
9937   }
9938 
9939   p->accumulate_buf = mem;
9940   p->accumulate_buf_size = new_size;
9941   return true;
9942 }
9943 
9944 /* Logically appends the given data to the append buffer.
9945  * If "can_alias" is true, we will try to avoid actually copying, but the buffer
9946  * must be valid until the next accumulate_append() call (if any). */
accumulate_append(upb_json_parser * p,const char * buf,size_t len,bool can_alias)9947 static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len,
9948                               bool can_alias) {
9949   size_t need;
9950 
9951   if (!p->accumulated && can_alias) {
9952     p->accumulated = buf;
9953     p->accumulated_len = len;
9954     return true;
9955   }
9956 
9957   if (!checked_add(p->accumulated_len, len, &need)) {
9958     upb_status_seterrmsg(&p->status, "Integer overflow.");
9959     upb_env_reporterror(p->env, &p->status);
9960     return false;
9961   }
9962 
9963   if (need > p->accumulate_buf_size && !accumulate_realloc(p, need)) {
9964     return false;
9965   }
9966 
9967   if (p->accumulated != p->accumulate_buf) {
9968     memcpy(p->accumulate_buf, p->accumulated, p->accumulated_len);
9969     p->accumulated = p->accumulate_buf;
9970   }
9971 
9972   memcpy(p->accumulate_buf + p->accumulated_len, buf, len);
9973   p->accumulated_len += len;
9974   return true;
9975 }
9976 
9977 /* Returns a pointer to the data accumulated since the last accumulate_clear()
9978  * call, and writes the length to *len.  This with point either to the input
9979  * buffer or a temporary accumulate buffer. */
accumulate_getptr(upb_json_parser * p,size_t * len)9980 static const char *accumulate_getptr(upb_json_parser *p, size_t *len) {
9981   assert(p->accumulated);
9982   *len = p->accumulated_len;
9983   return p->accumulated;
9984 }
9985 
9986 
9987 /* Mult-part text data ********************************************************/
9988 
9989 /* When we have text data in the input, it can often come in multiple segments.
9990  * For example, there may be some raw string data followed by an escape
9991  * sequence.  The two segments are processed with different logic.  Also buffer
9992  * seams in the input can cause multiple segments.
9993  *
9994  * As we see segments, there are two main cases for how we want to process them:
9995  *
9996  *  1. we want to push the captured input directly to string handlers.
9997  *
9998  *  2. we need to accumulate all the parts into a contiguous buffer for further
9999  *     processing (field name lookup, string->number conversion, etc). */
10000 
10001 /* This is the set of states for p->multipart_state. */
10002 enum {
10003   /* We are not currently processing multipart data. */
10004   MULTIPART_INACTIVE = 0,
10005 
10006   /* We are processing multipart data by accumulating it into a contiguous
10007    * buffer. */
10008   MULTIPART_ACCUMULATE = 1,
10009 
10010   /* We are processing multipart data by pushing each part directly to the
10011    * current string handlers. */
10012   MULTIPART_PUSHEAGERLY = 2
10013 };
10014 
10015 /* Start a multi-part text value where we accumulate the data for processing at
10016  * the end. */
multipart_startaccum(upb_json_parser * p)10017 static void multipart_startaccum(upb_json_parser *p) {
10018   assert_accumulate_empty(p);
10019   assert(p->multipart_state == MULTIPART_INACTIVE);
10020   p->multipart_state = MULTIPART_ACCUMULATE;
10021 }
10022 
10023 /* Start a multi-part text value where we immediately push text data to a string
10024  * value with the given selector. */
multipart_start(upb_json_parser * p,upb_selector_t sel)10025 static void multipart_start(upb_json_parser *p, upb_selector_t sel) {
10026   assert_accumulate_empty(p);
10027   assert(p->multipart_state == MULTIPART_INACTIVE);
10028   p->multipart_state = MULTIPART_PUSHEAGERLY;
10029   p->string_selector = sel;
10030 }
10031 
multipart_text(upb_json_parser * p,const char * buf,size_t len,bool can_alias)10032 static bool multipart_text(upb_json_parser *p, const char *buf, size_t len,
10033                            bool can_alias) {
10034   switch (p->multipart_state) {
10035     case MULTIPART_INACTIVE:
10036       upb_status_seterrmsg(
10037           &p->status, "Internal error: unexpected state MULTIPART_INACTIVE");
10038       upb_env_reporterror(p->env, &p->status);
10039       return false;
10040 
10041     case MULTIPART_ACCUMULATE:
10042       if (!accumulate_append(p, buf, len, can_alias)) {
10043         return false;
10044       }
10045       break;
10046 
10047     case MULTIPART_PUSHEAGERLY: {
10048       const upb_bufhandle *handle = can_alias ? p->handle : NULL;
10049       upb_sink_putstring(&p->top->sink, p->string_selector, buf, len, handle);
10050       break;
10051     }
10052   }
10053 
10054   return true;
10055 }
10056 
10057 /* Note: this invalidates the accumulate buffer!  Call only after reading its
10058  * contents. */
multipart_end(upb_json_parser * p)10059 static void multipart_end(upb_json_parser *p) {
10060   assert(p->multipart_state != MULTIPART_INACTIVE);
10061   p->multipart_state = MULTIPART_INACTIVE;
10062   accumulate_clear(p);
10063 }
10064 
10065 
10066 /* Input capture **************************************************************/
10067 
10068 /* Functionality for capturing a region of the input as text.  Gracefully
10069  * handles the case where a buffer seam occurs in the middle of the captured
10070  * region. */
10071 
capture_begin(upb_json_parser * p,const char * ptr)10072 static void capture_begin(upb_json_parser *p, const char *ptr) {
10073   assert(p->multipart_state != MULTIPART_INACTIVE);
10074   assert(p->capture == NULL);
10075   p->capture = ptr;
10076 }
10077 
capture_end(upb_json_parser * p,const char * ptr)10078 static bool capture_end(upb_json_parser *p, const char *ptr) {
10079   assert(p->capture);
10080   if (multipart_text(p, p->capture, ptr - p->capture, true)) {
10081     p->capture = NULL;
10082     return true;
10083   } else {
10084     return false;
10085   }
10086 }
10087 
10088 /* This is called at the end of each input buffer (ie. when we have hit a
10089  * buffer seam).  If we are in the middle of capturing the input, this
10090  * processes the unprocessed capture region. */
capture_suspend(upb_json_parser * p,const char ** ptr)10091 static void capture_suspend(upb_json_parser *p, const char **ptr) {
10092   if (!p->capture) return;
10093 
10094   if (multipart_text(p, p->capture, *ptr - p->capture, false)) {
10095     /* We use this as a signal that we were in the middle of capturing, and
10096      * that capturing should resume at the beginning of the next buffer.
10097      *
10098      * We can't use *ptr here, because we have no guarantee that this pointer
10099      * will be valid when we resume (if the underlying memory is freed, then
10100      * using the pointer at all, even to compare to NULL, is likely undefined
10101      * behavior). */
10102     p->capture = &suspend_capture;
10103   } else {
10104     /* Need to back up the pointer to the beginning of the capture, since
10105      * we were not able to actually preserve it. */
10106     *ptr = p->capture;
10107   }
10108 }
10109 
capture_resume(upb_json_parser * p,const char * ptr)10110 static void capture_resume(upb_json_parser *p, const char *ptr) {
10111   if (p->capture) {
10112     assert(p->capture == &suspend_capture);
10113     p->capture = ptr;
10114   }
10115 }
10116 
10117 
10118 /* Callbacks from the parser **************************************************/
10119 
10120 /* These are the functions called directly from the parser itself.
10121  * We define these in the same order as their declarations in the parser. */
10122 
escape_char(char in)10123 static char escape_char(char in) {
10124   switch (in) {
10125     case 'r': return '\r';
10126     case 't': return '\t';
10127     case 'n': return '\n';
10128     case 'f': return '\f';
10129     case 'b': return '\b';
10130     case '/': return '/';
10131     case '"': return '"';
10132     case '\\': return '\\';
10133     default:
10134       assert(0);
10135       return 'x';
10136   }
10137 }
10138 
escape(upb_json_parser * p,const char * ptr)10139 static bool escape(upb_json_parser *p, const char *ptr) {
10140   char ch = escape_char(*ptr);
10141   return multipart_text(p, &ch, 1, false);
10142 }
10143 
start_hex(upb_json_parser * p)10144 static void start_hex(upb_json_parser *p) {
10145   p->digit = 0;
10146 }
10147 
hexdigit(upb_json_parser * p,const char * ptr)10148 static void hexdigit(upb_json_parser *p, const char *ptr) {
10149   char ch = *ptr;
10150 
10151   p->digit <<= 4;
10152 
10153   if (ch >= '0' && ch <= '9') {
10154     p->digit += (ch - '0');
10155   } else if (ch >= 'a' && ch <= 'f') {
10156     p->digit += ((ch - 'a') + 10);
10157   } else {
10158     assert(ch >= 'A' && ch <= 'F');
10159     p->digit += ((ch - 'A') + 10);
10160   }
10161 }
10162 
end_hex(upb_json_parser * p)10163 static bool end_hex(upb_json_parser *p) {
10164   uint32_t codepoint = p->digit;
10165 
10166   /* emit the codepoint as UTF-8. */
10167   char utf8[3]; /* support \u0000 -- \uFFFF -- need only three bytes. */
10168   int length = 0;
10169   if (codepoint <= 0x7F) {
10170     utf8[0] = codepoint;
10171     length = 1;
10172   } else if (codepoint <= 0x07FF) {
10173     utf8[1] = (codepoint & 0x3F) | 0x80;
10174     codepoint >>= 6;
10175     utf8[0] = (codepoint & 0x1F) | 0xC0;
10176     length = 2;
10177   } else /* codepoint <= 0xFFFF */ {
10178     utf8[2] = (codepoint & 0x3F) | 0x80;
10179     codepoint >>= 6;
10180     utf8[1] = (codepoint & 0x3F) | 0x80;
10181     codepoint >>= 6;
10182     utf8[0] = (codepoint & 0x0F) | 0xE0;
10183     length = 3;
10184   }
10185   /* TODO(haberman): Handle high surrogates: if codepoint is a high surrogate
10186    * we have to wait for the next escape to get the full code point). */
10187 
10188   return multipart_text(p, utf8, length, false);
10189 }
10190 
start_text(upb_json_parser * p,const char * ptr)10191 static void start_text(upb_json_parser *p, const char *ptr) {
10192   capture_begin(p, ptr);
10193 }
10194 
end_text(upb_json_parser * p,const char * ptr)10195 static bool end_text(upb_json_parser *p, const char *ptr) {
10196   return capture_end(p, ptr);
10197 }
10198 
start_number(upb_json_parser * p,const char * ptr)10199 static void start_number(upb_json_parser *p, const char *ptr) {
10200   multipart_startaccum(p);
10201   capture_begin(p, ptr);
10202 }
10203 
10204 static bool parse_number(upb_json_parser *p);
10205 
end_number(upb_json_parser * p,const char * ptr)10206 static bool end_number(upb_json_parser *p, const char *ptr) {
10207   if (!capture_end(p, ptr)) {
10208     return false;
10209   }
10210 
10211   return parse_number(p);
10212 }
10213 
parse_number(upb_json_parser * p)10214 static bool parse_number(upb_json_parser *p) {
10215   size_t len;
10216   const char *buf;
10217   const char *myend;
10218   char *end;
10219 
10220   /* strtol() and friends unfortunately do not support specifying the length of
10221    * the input string, so we need to force a copy into a NULL-terminated buffer. */
10222   if (!multipart_text(p, "\0", 1, false)) {
10223     return false;
10224   }
10225 
10226   buf = accumulate_getptr(p, &len);
10227   myend = buf + len - 1;  /* One for NULL. */
10228 
10229   /* XXX: We are using strtol to parse integers, but this is wrong as even
10230    * integers can be represented as 1e6 (for example), which strtol can't
10231    * handle correctly.
10232    *
10233    * XXX: Also, we can't handle large integers properly because strto[u]ll
10234    * isn't in C89.
10235    *
10236    * XXX: Also, we don't properly check floats for overflow, since strtof
10237    * isn't in C89. */
10238   switch (upb_fielddef_type(p->top->f)) {
10239     case UPB_TYPE_ENUM:
10240     case UPB_TYPE_INT32: {
10241       long val = strtol(p->accumulated, &end, 0);
10242       if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || end != myend)
10243         goto err;
10244       else
10245         upb_sink_putint32(&p->top->sink, parser_getsel(p), val);
10246       break;
10247     }
10248     case UPB_TYPE_INT64: {
10249       long long val = strtol(p->accumulated, &end, 0);
10250       if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || end != myend)
10251         goto err;
10252       else
10253         upb_sink_putint64(&p->top->sink, parser_getsel(p), val);
10254       break;
10255     }
10256     case UPB_TYPE_UINT32: {
10257       unsigned long val = strtoul(p->accumulated, &end, 0);
10258       if (val > UINT32_MAX || errno == ERANGE || end != myend)
10259         goto err;
10260       else
10261         upb_sink_putuint32(&p->top->sink, parser_getsel(p), val);
10262       break;
10263     }
10264     case UPB_TYPE_UINT64: {
10265       unsigned long long val = strtoul(p->accumulated, &end, 0);
10266       if (val > UINT64_MAX || errno == ERANGE || end != myend)
10267         goto err;
10268       else
10269         upb_sink_putuint64(&p->top->sink, parser_getsel(p), val);
10270       break;
10271     }
10272     case UPB_TYPE_DOUBLE: {
10273       double val = strtod(p->accumulated, &end);
10274       if (errno == ERANGE || end != myend)
10275         goto err;
10276       else
10277         upb_sink_putdouble(&p->top->sink, parser_getsel(p), val);
10278       break;
10279     }
10280     case UPB_TYPE_FLOAT: {
10281       float val = strtod(p->accumulated, &end);
10282       if (errno == ERANGE || end != myend)
10283         goto err;
10284       else
10285         upb_sink_putfloat(&p->top->sink, parser_getsel(p), val);
10286       break;
10287     }
10288     default:
10289       assert(false);
10290   }
10291 
10292   multipart_end(p);
10293 
10294   return true;
10295 
10296 err:
10297   upb_status_seterrf(&p->status, "error parsing number: %s", buf);
10298   upb_env_reporterror(p->env, &p->status);
10299   multipart_end(p);
10300   return false;
10301 }
10302 
parser_putbool(upb_json_parser * p,bool val)10303 static bool parser_putbool(upb_json_parser *p, bool val) {
10304   bool ok;
10305 
10306   if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) {
10307     upb_status_seterrf(&p->status,
10308                        "Boolean value specified for non-bool field: %s",
10309                        upb_fielddef_name(p->top->f));
10310     upb_env_reporterror(p->env, &p->status);
10311     return false;
10312   }
10313 
10314   ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val);
10315   UPB_ASSERT_VAR(ok, ok);
10316 
10317   return true;
10318 }
10319 
start_stringval(upb_json_parser * p)10320 static bool start_stringval(upb_json_parser *p) {
10321   assert(p->top->f);
10322 
10323   if (upb_fielddef_isstring(p->top->f)) {
10324     upb_jsonparser_frame *inner;
10325     upb_selector_t sel;
10326 
10327     if (!check_stack(p)) return false;
10328 
10329     /* Start a new parser frame: parser frames correspond one-to-one with
10330      * handler frames, and string events occur in a sub-frame. */
10331     inner = p->top + 1;
10332     sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
10333     upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
10334     inner->m = p->top->m;
10335     inner->f = p->top->f;
10336     inner->is_map = false;
10337     inner->is_mapentry = false;
10338     p->top = inner;
10339 
10340     if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) {
10341       /* For STRING fields we push data directly to the handlers as it is
10342        * parsed.  We don't do this yet for BYTES fields, because our base64
10343        * decoder is not streaming.
10344        *
10345        * TODO(haberman): make base64 decoding streaming also. */
10346       multipart_start(p, getsel_for_handlertype(p, UPB_HANDLER_STRING));
10347       return true;
10348     } else {
10349       multipart_startaccum(p);
10350       return true;
10351     }
10352   } else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) {
10353     /* No need to push a frame -- symbolic enum names in quotes remain in the
10354      * current parser frame.
10355      *
10356      * Enum string values must accumulate so we can look up the value in a table
10357      * once it is complete. */
10358     multipart_startaccum(p);
10359     return true;
10360   } else {
10361     upb_status_seterrf(&p->status,
10362                        "String specified for non-string/non-enum field: %s",
10363                        upb_fielddef_name(p->top->f));
10364     upb_env_reporterror(p->env, &p->status);
10365     return false;
10366   }
10367 }
10368 
end_stringval(upb_json_parser * p)10369 static bool end_stringval(upb_json_parser *p) {
10370   bool ok = true;
10371 
10372   switch (upb_fielddef_type(p->top->f)) {
10373     case UPB_TYPE_BYTES:
10374       if (!base64_push(p, getsel_for_handlertype(p, UPB_HANDLER_STRING),
10375                        p->accumulated, p->accumulated_len)) {
10376         return false;
10377       }
10378       /* Fall through. */
10379 
10380     case UPB_TYPE_STRING: {
10381       upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
10382       upb_sink_endstr(&p->top->sink, sel);
10383       p->top--;
10384       break;
10385     }
10386 
10387     case UPB_TYPE_ENUM: {
10388       /* Resolve enum symbolic name to integer value. */
10389       const upb_enumdef *enumdef =
10390           (const upb_enumdef*)upb_fielddef_subdef(p->top->f);
10391 
10392       size_t len;
10393       const char *buf = accumulate_getptr(p, &len);
10394 
10395       int32_t int_val = 0;
10396       ok = upb_enumdef_ntoi(enumdef, buf, len, &int_val);
10397 
10398       if (ok) {
10399         upb_selector_t sel = parser_getsel(p);
10400         upb_sink_putint32(&p->top->sink, sel, int_val);
10401       } else {
10402         upb_status_seterrf(&p->status, "Enum value unknown: '%.*s'", len, buf);
10403         upb_env_reporterror(p->env, &p->status);
10404       }
10405 
10406       break;
10407     }
10408 
10409     default:
10410       assert(false);
10411       upb_status_seterrmsg(&p->status, "Internal error in JSON decoder");
10412       upb_env_reporterror(p->env, &p->status);
10413       ok = false;
10414       break;
10415   }
10416 
10417   multipart_end(p);
10418 
10419   return ok;
10420 }
10421 
start_member(upb_json_parser * p)10422 static void start_member(upb_json_parser *p) {
10423   assert(!p->top->f);
10424   multipart_startaccum(p);
10425 }
10426 
10427 /* Helper: invoked during parse_mapentry() to emit the mapentry message's key
10428  * field based on the current contents of the accumulate buffer. */
parse_mapentry_key(upb_json_parser * p)10429 static bool parse_mapentry_key(upb_json_parser *p) {
10430 
10431   size_t len;
10432   const char *buf = accumulate_getptr(p, &len);
10433 
10434   /* Emit the key field. We do a bit of ad-hoc parsing here because the
10435    * parser state machine has already decided that this is a string field
10436    * name, and we are reinterpreting it as some arbitrary key type. In
10437    * particular, integer and bool keys are quoted, so we need to parse the
10438    * quoted string contents here. */
10439 
10440   p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_KEY);
10441   if (p->top->f == NULL) {
10442     upb_status_seterrmsg(&p->status, "mapentry message has no key");
10443     upb_env_reporterror(p->env, &p->status);
10444     return false;
10445   }
10446   switch (upb_fielddef_type(p->top->f)) {
10447     case UPB_TYPE_INT32:
10448     case UPB_TYPE_INT64:
10449     case UPB_TYPE_UINT32:
10450     case UPB_TYPE_UINT64:
10451       /* Invoke end_number. The accum buffer has the number's text already. */
10452       if (!parse_number(p)) {
10453         return false;
10454       }
10455       break;
10456     case UPB_TYPE_BOOL:
10457       if (len == 4 && !strncmp(buf, "true", 4)) {
10458         if (!parser_putbool(p, true)) {
10459           return false;
10460         }
10461       } else if (len == 5 && !strncmp(buf, "false", 5)) {
10462         if (!parser_putbool(p, false)) {
10463           return false;
10464         }
10465       } else {
10466         upb_status_seterrmsg(&p->status,
10467                              "Map bool key not 'true' or 'false'");
10468         upb_env_reporterror(p->env, &p->status);
10469         return false;
10470       }
10471       multipart_end(p);
10472       break;
10473     case UPB_TYPE_STRING:
10474     case UPB_TYPE_BYTES: {
10475       upb_sink subsink;
10476       upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
10477       upb_sink_startstr(&p->top->sink, sel, len, &subsink);
10478       sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
10479       upb_sink_putstring(&subsink, sel, buf, len, NULL);
10480       sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
10481       upb_sink_endstr(&subsink, sel);
10482       multipart_end(p);
10483       break;
10484     }
10485     default:
10486       upb_status_seterrmsg(&p->status, "Invalid field type for map key");
10487       upb_env_reporterror(p->env, &p->status);
10488       return false;
10489   }
10490 
10491   return true;
10492 }
10493 
10494 /* Helper: emit one map entry (as a submessage in the map field sequence). This
10495  * is invoked from end_membername(), at the end of the map entry's key string,
10496  * with the map key in the accumulate buffer. It parses the key from that
10497  * buffer, emits the handler calls to start the mapentry submessage (setting up
10498  * its subframe in the process), and sets up state in the subframe so that the
10499  * value parser (invoked next) will emit the mapentry's value field and then
10500  * end the mapentry message. */
10501 
handle_mapentry(upb_json_parser * p)10502 static bool handle_mapentry(upb_json_parser *p) {
10503   const upb_fielddef *mapfield;
10504   const upb_msgdef *mapentrymsg;
10505   upb_jsonparser_frame *inner;
10506   upb_selector_t sel;
10507 
10508   /* Map entry: p->top->sink is the seq frame, so we need to start a frame
10509    * for the mapentry itself, and then set |f| in that frame so that the map
10510    * value field is parsed, and also set a flag to end the frame after the
10511    * map-entry value is parsed. */
10512   if (!check_stack(p)) return false;
10513 
10514   mapfield = p->top->mapfield;
10515   mapentrymsg = upb_fielddef_msgsubdef(mapfield);
10516 
10517   inner = p->top + 1;
10518   p->top->f = mapfield;
10519   sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
10520   upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
10521   inner->m = mapentrymsg;
10522   inner->mapfield = mapfield;
10523   inner->is_map = false;
10524 
10525   /* Don't set this to true *yet* -- we reuse parsing handlers below to push
10526    * the key field value to the sink, and these handlers will pop the frame
10527    * if they see is_mapentry (when invoked by the parser state machine, they
10528    * would have just seen the map-entry value, not key). */
10529   inner->is_mapentry = false;
10530   p->top = inner;
10531 
10532   /* send STARTMSG in submsg frame. */
10533   upb_sink_startmsg(&p->top->sink);
10534 
10535   parse_mapentry_key(p);
10536 
10537   /* Set up the value field to receive the map-entry value. */
10538   p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_VALUE);
10539   p->top->is_mapentry = true;  /* set up to pop frame after value is parsed. */
10540   p->top->mapfield = mapfield;
10541   if (p->top->f == NULL) {
10542     upb_status_seterrmsg(&p->status, "mapentry message has no value");
10543     upb_env_reporterror(p->env, &p->status);
10544     return false;
10545   }
10546 
10547   return true;
10548 }
10549 
end_membername(upb_json_parser * p)10550 static bool end_membername(upb_json_parser *p) {
10551   assert(!p->top->f);
10552 
10553   if (p->top->is_map) {
10554     return handle_mapentry(p);
10555   } else {
10556     size_t len;
10557     const char *buf = accumulate_getptr(p, &len);
10558     const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len);
10559 
10560     if (!f) {
10561       /* TODO(haberman): Ignore unknown fields if requested/configured to do
10562        * so. */
10563       upb_status_seterrf(&p->status, "No such field: %.*s\n", (int)len, buf);
10564       upb_env_reporterror(p->env, &p->status);
10565       return false;
10566     }
10567 
10568     p->top->f = f;
10569     multipart_end(p);
10570 
10571     return true;
10572   }
10573 }
10574 
end_member(upb_json_parser * p)10575 static void end_member(upb_json_parser *p) {
10576   /* If we just parsed a map-entry value, end that frame too. */
10577   if (p->top->is_mapentry) {
10578     upb_status s = UPB_STATUS_INIT;
10579     upb_selector_t sel;
10580     bool ok;
10581     const upb_fielddef *mapfield;
10582 
10583     assert(p->top > p->stack);
10584     /* send ENDMSG on submsg. */
10585     upb_sink_endmsg(&p->top->sink, &s);
10586     mapfield = p->top->mapfield;
10587 
10588     /* send ENDSUBMSG in repeated-field-of-mapentries frame. */
10589     p->top--;
10590     ok = upb_handlers_getselector(mapfield, UPB_HANDLER_ENDSUBMSG, &sel);
10591     UPB_ASSERT_VAR(ok, ok);
10592     upb_sink_endsubmsg(&p->top->sink, sel);
10593   }
10594 
10595   p->top->f = NULL;
10596 }
10597 
start_subobject(upb_json_parser * p)10598 static bool start_subobject(upb_json_parser *p) {
10599   assert(p->top->f);
10600 
10601   if (upb_fielddef_ismap(p->top->f)) {
10602     upb_jsonparser_frame *inner;
10603     upb_selector_t sel;
10604 
10605     /* Beginning of a map. Start a new parser frame in a repeated-field
10606      * context. */
10607     if (!check_stack(p)) return false;
10608 
10609     inner = p->top + 1;
10610     sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
10611     upb_sink_startseq(&p->top->sink, sel, &inner->sink);
10612     inner->m = upb_fielddef_msgsubdef(p->top->f);
10613     inner->mapfield = p->top->f;
10614     inner->f = NULL;
10615     inner->is_map = true;
10616     inner->is_mapentry = false;
10617     p->top = inner;
10618 
10619     return true;
10620   } else if (upb_fielddef_issubmsg(p->top->f)) {
10621     upb_jsonparser_frame *inner;
10622     upb_selector_t sel;
10623 
10624     /* Beginning of a subobject. Start a new parser frame in the submsg
10625      * context. */
10626     if (!check_stack(p)) return false;
10627 
10628     inner = p->top + 1;
10629 
10630     sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
10631     upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
10632     inner->m = upb_fielddef_msgsubdef(p->top->f);
10633     inner->f = NULL;
10634     inner->is_map = false;
10635     inner->is_mapentry = false;
10636     p->top = inner;
10637 
10638     return true;
10639   } else {
10640     upb_status_seterrf(&p->status,
10641                        "Object specified for non-message/group field: %s",
10642                        upb_fielddef_name(p->top->f));
10643     upb_env_reporterror(p->env, &p->status);
10644     return false;
10645   }
10646 }
10647 
end_subobject(upb_json_parser * p)10648 static void end_subobject(upb_json_parser *p) {
10649   if (p->top->is_map) {
10650     upb_selector_t sel;
10651     p->top--;
10652     sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
10653     upb_sink_endseq(&p->top->sink, sel);
10654   } else {
10655     upb_selector_t sel;
10656     p->top--;
10657     sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
10658     upb_sink_endsubmsg(&p->top->sink, sel);
10659   }
10660 }
10661 
start_array(upb_json_parser * p)10662 static bool start_array(upb_json_parser *p) {
10663   upb_jsonparser_frame *inner;
10664   upb_selector_t sel;
10665 
10666   assert(p->top->f);
10667 
10668   if (!upb_fielddef_isseq(p->top->f)) {
10669     upb_status_seterrf(&p->status,
10670                        "Array specified for non-repeated field: %s",
10671                        upb_fielddef_name(p->top->f));
10672     upb_env_reporterror(p->env, &p->status);
10673     return false;
10674   }
10675 
10676   if (!check_stack(p)) return false;
10677 
10678   inner = p->top + 1;
10679   sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
10680   upb_sink_startseq(&p->top->sink, sel, &inner->sink);
10681   inner->m = p->top->m;
10682   inner->f = p->top->f;
10683   inner->is_map = false;
10684   inner->is_mapentry = false;
10685   p->top = inner;
10686 
10687   return true;
10688 }
10689 
end_array(upb_json_parser * p)10690 static void end_array(upb_json_parser *p) {
10691   upb_selector_t sel;
10692 
10693   assert(p->top > p->stack);
10694 
10695   p->top--;
10696   sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
10697   upb_sink_endseq(&p->top->sink, sel);
10698 }
10699 
start_object(upb_json_parser * p)10700 static void start_object(upb_json_parser *p) {
10701   if (!p->top->is_map) {
10702     upb_sink_startmsg(&p->top->sink);
10703   }
10704 }
10705 
end_object(upb_json_parser * p)10706 static void end_object(upb_json_parser *p) {
10707   if (!p->top->is_map) {
10708     upb_status status;
10709     upb_status_clear(&status);
10710     upb_sink_endmsg(&p->top->sink, &status);
10711     if (!upb_ok(&status)) {
10712       upb_env_reporterror(p->env, &status);
10713     }
10714   }
10715 }
10716 
10717 
10718 #define CHECK_RETURN_TOP(x) if (!(x)) goto error
10719 
10720 
10721 /* The actual parser **********************************************************/
10722 
10723 /* What follows is the Ragel parser itself.  The language is specified in Ragel
10724  * and the actions call our C functions above.
10725  *
10726  * Ragel has an extensive set of functionality, and we use only a small part of
10727  * it.  There are many action types but we only use a few:
10728  *
10729  *   ">" -- transition into a machine
10730  *   "%" -- transition out of a machine
10731  *   "@" -- transition into a final state of a machine.
10732  *
10733  * "@" transitions are tricky because a machine can transition into a final
10734  * state repeatedly.  But in some cases we know this can't happen, for example
10735  * a string which is delimited by a final '"' can only transition into its
10736  * final state once, when the closing '"' is seen. */
10737 
10738 
10739 #line 1218 "upb/json/parser.rl"
10740 
10741 
10742 
10743 #line 1130 "upb/json/parser.c"
10744 static const char _json_actions[] = {
10745 	0, 1, 0, 1, 2, 1, 3, 1,
10746 	5, 1, 6, 1, 7, 1, 8, 1,
10747 	10, 1, 12, 1, 13, 1, 14, 1,
10748 	15, 1, 16, 1, 17, 1, 21, 1,
10749 	25, 1, 27, 2, 3, 8, 2, 4,
10750 	5, 2, 6, 2, 2, 6, 8, 2,
10751 	11, 9, 2, 13, 15, 2, 14, 15,
10752 	2, 18, 1, 2, 19, 27, 2, 20,
10753 	9, 2, 22, 27, 2, 23, 27, 2,
10754 	24, 27, 2, 26, 27, 3, 14, 11,
10755 	9
10756 };
10757 
10758 static const unsigned char _json_key_offsets[] = {
10759 	0, 0, 4, 9, 14, 15, 19, 24,
10760 	29, 34, 38, 42, 45, 48, 50, 54,
10761 	58, 60, 62, 67, 69, 71, 80, 86,
10762 	92, 98, 104, 106, 115, 116, 116, 116,
10763 	121, 126, 131, 132, 133, 134, 135, 135,
10764 	136, 137, 138, 138, 139, 140, 141, 141,
10765 	146, 151, 152, 156, 161, 166, 171, 175,
10766 	175, 178, 178, 178
10767 };
10768 
10769 static const char _json_trans_keys[] = {
10770 	32, 123, 9, 13, 32, 34, 125, 9,
10771 	13, 32, 34, 125, 9, 13, 34, 32,
10772 	58, 9, 13, 32, 93, 125, 9, 13,
10773 	32, 44, 125, 9, 13, 32, 44, 125,
10774 	9, 13, 32, 34, 9, 13, 45, 48,
10775 	49, 57, 48, 49, 57, 46, 69, 101,
10776 	48, 57, 69, 101, 48, 57, 43, 45,
10777 	48, 57, 48, 57, 48, 57, 46, 69,
10778 	101, 48, 57, 34, 92, 34, 92, 34,
10779 	47, 92, 98, 102, 110, 114, 116, 117,
10780 	48, 57, 65, 70, 97, 102, 48, 57,
10781 	65, 70, 97, 102, 48, 57, 65, 70,
10782 	97, 102, 48, 57, 65, 70, 97, 102,
10783 	34, 92, 34, 45, 91, 102, 110, 116,
10784 	123, 48, 57, 34, 32, 93, 125, 9,
10785 	13, 32, 44, 93, 9, 13, 32, 93,
10786 	125, 9, 13, 97, 108, 115, 101, 117,
10787 	108, 108, 114, 117, 101, 32, 34, 125,
10788 	9, 13, 32, 34, 125, 9, 13, 34,
10789 	32, 58, 9, 13, 32, 93, 125, 9,
10790 	13, 32, 44, 125, 9, 13, 32, 44,
10791 	125, 9, 13, 32, 34, 9, 13, 32,
10792 	9, 13, 0
10793 };
10794 
10795 static const char _json_single_lengths[] = {
10796 	0, 2, 3, 3, 1, 2, 3, 3,
10797 	3, 2, 2, 1, 3, 0, 2, 2,
10798 	0, 0, 3, 2, 2, 9, 0, 0,
10799 	0, 0, 2, 7, 1, 0, 0, 3,
10800 	3, 3, 1, 1, 1, 1, 0, 1,
10801 	1, 1, 0, 1, 1, 1, 0, 3,
10802 	3, 1, 2, 3, 3, 3, 2, 0,
10803 	1, 0, 0, 0
10804 };
10805 
10806 static const char _json_range_lengths[] = {
10807 	0, 1, 1, 1, 0, 1, 1, 1,
10808 	1, 1, 1, 1, 0, 1, 1, 1,
10809 	1, 1, 1, 0, 0, 0, 3, 3,
10810 	3, 3, 0, 1, 0, 0, 0, 1,
10811 	1, 1, 0, 0, 0, 0, 0, 0,
10812 	0, 0, 0, 0, 0, 0, 0, 1,
10813 	1, 0, 1, 1, 1, 1, 1, 0,
10814 	1, 0, 0, 0
10815 };
10816 
10817 static const short _json_index_offsets[] = {
10818 	0, 0, 4, 9, 14, 16, 20, 25,
10819 	30, 35, 39, 43, 46, 50, 52, 56,
10820 	60, 62, 64, 69, 72, 75, 85, 89,
10821 	93, 97, 101, 104, 113, 115, 116, 117,
10822 	122, 127, 132, 134, 136, 138, 140, 141,
10823 	143, 145, 147, 148, 150, 152, 154, 155,
10824 	160, 165, 167, 171, 176, 181, 186, 190,
10825 	191, 194, 195, 196
10826 };
10827 
10828 static const char _json_indicies[] = {
10829 	0, 2, 0, 1, 3, 4, 5, 3,
10830 	1, 6, 7, 8, 6, 1, 9, 1,
10831 	10, 11, 10, 1, 11, 1, 1, 11,
10832 	12, 13, 14, 15, 13, 1, 16, 17,
10833 	8, 16, 1, 17, 7, 17, 1, 18,
10834 	19, 20, 1, 19, 20, 1, 22, 23,
10835 	23, 21, 24, 1, 23, 23, 24, 21,
10836 	25, 25, 26, 1, 26, 1, 26, 21,
10837 	22, 23, 23, 20, 21, 28, 29, 27,
10838 	31, 32, 30, 33, 33, 33, 33, 33,
10839 	33, 33, 33, 34, 1, 35, 35, 35,
10840 	1, 36, 36, 36, 1, 37, 37, 37,
10841 	1, 38, 38, 38, 1, 40, 41, 39,
10842 	42, 43, 44, 45, 46, 47, 48, 43,
10843 	1, 49, 1, 50, 51, 53, 54, 1,
10844 	53, 52, 55, 56, 54, 55, 1, 56,
10845 	1, 1, 56, 52, 57, 1, 58, 1,
10846 	59, 1, 60, 1, 61, 62, 1, 63,
10847 	1, 64, 1, 65, 66, 1, 67, 1,
10848 	68, 1, 69, 70, 71, 72, 70, 1,
10849 	73, 74, 75, 73, 1, 76, 1, 77,
10850 	78, 77, 1, 78, 1, 1, 78, 79,
10851 	80, 81, 82, 80, 1, 83, 84, 75,
10852 	83, 1, 84, 74, 84, 1, 85, 86,
10853 	86, 1, 1, 1, 1, 0
10854 };
10855 
10856 static const char _json_trans_targs[] = {
10857 	1, 0, 2, 3, 4, 56, 3, 4,
10858 	56, 5, 5, 6, 7, 8, 9, 56,
10859 	8, 9, 11, 12, 18, 57, 13, 15,
10860 	14, 16, 17, 20, 58, 21, 20, 58,
10861 	21, 19, 22, 23, 24, 25, 26, 20,
10862 	58, 21, 28, 30, 31, 34, 39, 43,
10863 	47, 29, 59, 59, 32, 31, 29, 32,
10864 	33, 35, 36, 37, 38, 59, 40, 41,
10865 	42, 59, 44, 45, 46, 59, 48, 49,
10866 	55, 48, 49, 55, 50, 50, 51, 52,
10867 	53, 54, 55, 53, 54, 59, 56
10868 };
10869 
10870 static const char _json_trans_actions[] = {
10871 	0, 0, 0, 21, 77, 53, 0, 47,
10872 	23, 17, 0, 0, 15, 19, 19, 50,
10873 	0, 0, 0, 0, 0, 1, 0, 0,
10874 	0, 0, 0, 3, 13, 0, 0, 35,
10875 	5, 11, 0, 38, 7, 7, 7, 41,
10876 	44, 9, 62, 56, 25, 0, 0, 0,
10877 	31, 29, 33, 59, 15, 0, 27, 0,
10878 	0, 0, 0, 0, 0, 68, 0, 0,
10879 	0, 71, 0, 0, 0, 65, 21, 77,
10880 	53, 0, 47, 23, 17, 0, 0, 15,
10881 	19, 19, 50, 0, 0, 74, 0
10882 };
10883 
10884 static const int json_start = 1;
10885 
10886 static const int json_en_number_machine = 10;
10887 static const int json_en_string_machine = 19;
10888 static const int json_en_value_machine = 27;
10889 static const int json_en_main = 1;
10890 
10891 
10892 #line 1221 "upb/json/parser.rl"
10893 
parse(void * closure,const void * hd,const char * buf,size_t size,const upb_bufhandle * handle)10894 size_t parse(void *closure, const void *hd, const char *buf, size_t size,
10895              const upb_bufhandle *handle) {
10896   upb_json_parser *parser = closure;
10897 
10898   /* Variables used by Ragel's generated code. */
10899   int cs = parser->current_state;
10900   int *stack = parser->parser_stack;
10901   int top = parser->parser_top;
10902 
10903   const char *p = buf;
10904   const char *pe = buf + size;
10905 
10906   parser->handle = handle;
10907 
10908   UPB_UNUSED(hd);
10909   UPB_UNUSED(handle);
10910 
10911   capture_resume(parser, buf);
10912 
10913 
10914 #line 1301 "upb/json/parser.c"
10915 	{
10916 	int _klen;
10917 	unsigned int _trans;
10918 	const char *_acts;
10919 	unsigned int _nacts;
10920 	const char *_keys;
10921 
10922 	if ( p == pe )
10923 		goto _test_eof;
10924 	if ( cs == 0 )
10925 		goto _out;
10926 _resume:
10927 	_keys = _json_trans_keys + _json_key_offsets[cs];
10928 	_trans = _json_index_offsets[cs];
10929 
10930 	_klen = _json_single_lengths[cs];
10931 	if ( _klen > 0 ) {
10932 		const char *_lower = _keys;
10933 		const char *_mid;
10934 		const char *_upper = _keys + _klen - 1;
10935 		while (1) {
10936 			if ( _upper < _lower )
10937 				break;
10938 
10939 			_mid = _lower + ((_upper-_lower) >> 1);
10940 			if ( (*p) < *_mid )
10941 				_upper = _mid - 1;
10942 			else if ( (*p) > *_mid )
10943 				_lower = _mid + 1;
10944 			else {
10945 				_trans += (unsigned int)(_mid - _keys);
10946 				goto _match;
10947 			}
10948 		}
10949 		_keys += _klen;
10950 		_trans += _klen;
10951 	}
10952 
10953 	_klen = _json_range_lengths[cs];
10954 	if ( _klen > 0 ) {
10955 		const char *_lower = _keys;
10956 		const char *_mid;
10957 		const char *_upper = _keys + (_klen<<1) - 2;
10958 		while (1) {
10959 			if ( _upper < _lower )
10960 				break;
10961 
10962 			_mid = _lower + (((_upper-_lower) >> 1) & ~1);
10963 			if ( (*p) < _mid[0] )
10964 				_upper = _mid - 2;
10965 			else if ( (*p) > _mid[1] )
10966 				_lower = _mid + 2;
10967 			else {
10968 				_trans += (unsigned int)((_mid - _keys)>>1);
10969 				goto _match;
10970 			}
10971 		}
10972 		_trans += _klen;
10973 	}
10974 
10975 _match:
10976 	_trans = _json_indicies[_trans];
10977 	cs = _json_trans_targs[_trans];
10978 
10979 	if ( _json_trans_actions[_trans] == 0 )
10980 		goto _again;
10981 
10982 	_acts = _json_actions + _json_trans_actions[_trans];
10983 	_nacts = (unsigned int) *_acts++;
10984 	while ( _nacts-- > 0 )
10985 	{
10986 		switch ( *_acts++ )
10987 		{
10988 	case 0:
10989 #line 1133 "upb/json/parser.rl"
10990 	{ p--; {cs = stack[--top]; goto _again;} }
10991 	break;
10992 	case 1:
10993 #line 1134 "upb/json/parser.rl"
10994 	{ p--; {stack[top++] = cs; cs = 10; goto _again;} }
10995 	break;
10996 	case 2:
10997 #line 1138 "upb/json/parser.rl"
10998 	{ start_text(parser, p); }
10999 	break;
11000 	case 3:
11001 #line 1139 "upb/json/parser.rl"
11002 	{ CHECK_RETURN_TOP(end_text(parser, p)); }
11003 	break;
11004 	case 4:
11005 #line 1145 "upb/json/parser.rl"
11006 	{ start_hex(parser); }
11007 	break;
11008 	case 5:
11009 #line 1146 "upb/json/parser.rl"
11010 	{ hexdigit(parser, p); }
11011 	break;
11012 	case 6:
11013 #line 1147 "upb/json/parser.rl"
11014 	{ CHECK_RETURN_TOP(end_hex(parser)); }
11015 	break;
11016 	case 7:
11017 #line 1153 "upb/json/parser.rl"
11018 	{ CHECK_RETURN_TOP(escape(parser, p)); }
11019 	break;
11020 	case 8:
11021 #line 1159 "upb/json/parser.rl"
11022 	{ p--; {cs = stack[--top]; goto _again;} }
11023 	break;
11024 	case 9:
11025 #line 1162 "upb/json/parser.rl"
11026 	{ {stack[top++] = cs; cs = 19; goto _again;} }
11027 	break;
11028 	case 10:
11029 #line 1164 "upb/json/parser.rl"
11030 	{ p--; {stack[top++] = cs; cs = 27; goto _again;} }
11031 	break;
11032 	case 11:
11033 #line 1169 "upb/json/parser.rl"
11034 	{ start_member(parser); }
11035 	break;
11036 	case 12:
11037 #line 1170 "upb/json/parser.rl"
11038 	{ CHECK_RETURN_TOP(end_membername(parser)); }
11039 	break;
11040 	case 13:
11041 #line 1173 "upb/json/parser.rl"
11042 	{ end_member(parser); }
11043 	break;
11044 	case 14:
11045 #line 1179 "upb/json/parser.rl"
11046 	{ start_object(parser); }
11047 	break;
11048 	case 15:
11049 #line 1182 "upb/json/parser.rl"
11050 	{ end_object(parser); }
11051 	break;
11052 	case 16:
11053 #line 1188 "upb/json/parser.rl"
11054 	{ CHECK_RETURN_TOP(start_array(parser)); }
11055 	break;
11056 	case 17:
11057 #line 1192 "upb/json/parser.rl"
11058 	{ end_array(parser); }
11059 	break;
11060 	case 18:
11061 #line 1197 "upb/json/parser.rl"
11062 	{ start_number(parser, p); }
11063 	break;
11064 	case 19:
11065 #line 1198 "upb/json/parser.rl"
11066 	{ CHECK_RETURN_TOP(end_number(parser, p)); }
11067 	break;
11068 	case 20:
11069 #line 1200 "upb/json/parser.rl"
11070 	{ CHECK_RETURN_TOP(start_stringval(parser)); }
11071 	break;
11072 	case 21:
11073 #line 1201 "upb/json/parser.rl"
11074 	{ CHECK_RETURN_TOP(end_stringval(parser)); }
11075 	break;
11076 	case 22:
11077 #line 1203 "upb/json/parser.rl"
11078 	{ CHECK_RETURN_TOP(parser_putbool(parser, true)); }
11079 	break;
11080 	case 23:
11081 #line 1205 "upb/json/parser.rl"
11082 	{ CHECK_RETURN_TOP(parser_putbool(parser, false)); }
11083 	break;
11084 	case 24:
11085 #line 1207 "upb/json/parser.rl"
11086 	{ /* null value */ }
11087 	break;
11088 	case 25:
11089 #line 1209 "upb/json/parser.rl"
11090 	{ CHECK_RETURN_TOP(start_subobject(parser)); }
11091 	break;
11092 	case 26:
11093 #line 1210 "upb/json/parser.rl"
11094 	{ end_subobject(parser); }
11095 	break;
11096 	case 27:
11097 #line 1215 "upb/json/parser.rl"
11098 	{ p--; {cs = stack[--top]; goto _again;} }
11099 	break;
11100 #line 1487 "upb/json/parser.c"
11101 		}
11102 	}
11103 
11104 _again:
11105 	if ( cs == 0 )
11106 		goto _out;
11107 	if ( ++p != pe )
11108 		goto _resume;
11109 	_test_eof: {}
11110 	_out: {}
11111 	}
11112 
11113 #line 1242 "upb/json/parser.rl"
11114 
11115   if (p != pe) {
11116     upb_status_seterrf(&parser->status, "Parse error at %s\n", p);
11117     upb_env_reporterror(parser->env, &parser->status);
11118   } else {
11119     capture_suspend(parser, &p);
11120   }
11121 
11122 error:
11123   /* Save parsing state back to parser. */
11124   parser->current_state = cs;
11125   parser->parser_top = top;
11126 
11127   return p - buf;
11128 }
11129 
end(void * closure,const void * hd)11130 bool end(void *closure, const void *hd) {
11131   UPB_UNUSED(closure);
11132   UPB_UNUSED(hd);
11133 
11134   /* Prevent compile warning on unused static constants. */
11135   UPB_UNUSED(json_start);
11136   UPB_UNUSED(json_en_number_machine);
11137   UPB_UNUSED(json_en_string_machine);
11138   UPB_UNUSED(json_en_value_machine);
11139   UPB_UNUSED(json_en_main);
11140   return true;
11141 }
11142 
json_parser_reset(upb_json_parser * p)11143 static void json_parser_reset(upb_json_parser *p) {
11144   int cs;
11145   int top;
11146 
11147   p->top = p->stack;
11148   p->top->f = NULL;
11149   p->top->is_map = false;
11150   p->top->is_mapentry = false;
11151 
11152   /* Emit Ragel initialization of the parser. */
11153 
11154 #line 1541 "upb/json/parser.c"
11155 	{
11156 	cs = json_start;
11157 	top = 0;
11158 	}
11159 
11160 #line 1282 "upb/json/parser.rl"
11161   p->current_state = cs;
11162   p->parser_top = top;
11163   accumulate_clear(p);
11164   p->multipart_state = MULTIPART_INACTIVE;
11165   p->capture = NULL;
11166   p->accumulated = NULL;
11167   upb_status_clear(&p->status);
11168 }
11169 
11170 
11171 /* Public API *****************************************************************/
11172 
upb_json_parser_create(upb_env * env,upb_sink * output)11173 upb_json_parser *upb_json_parser_create(upb_env *env, upb_sink *output) {
11174 #ifndef NDEBUG
11175   const size_t size_before = upb_env_bytesallocated(env);
11176 #endif
11177   upb_json_parser *p = upb_env_malloc(env, sizeof(upb_json_parser));
11178   if (!p) return false;
11179 
11180   p->env = env;
11181   p->limit = p->stack + UPB_JSON_MAX_DEPTH;
11182   p->accumulate_buf = NULL;
11183   p->accumulate_buf_size = 0;
11184   upb_byteshandler_init(&p->input_handler_);
11185   upb_byteshandler_setstring(&p->input_handler_, parse, NULL);
11186   upb_byteshandler_setendstr(&p->input_handler_, end, NULL);
11187   upb_bytessink_reset(&p->input_, &p->input_handler_, p);
11188 
11189   json_parser_reset(p);
11190   upb_sink_reset(&p->top->sink, output->handlers, output->closure);
11191   p->top->m = upb_handlers_msgdef(output->handlers);
11192 
11193   /* If this fails, uncomment and increase the value in parser.h. */
11194   /* fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before); */
11195   assert(upb_env_bytesallocated(env) - size_before <= UPB_JSON_PARSER_SIZE);
11196   return p;
11197 }
11198 
upb_json_parser_input(upb_json_parser * p)11199 upb_bytessink *upb_json_parser_input(upb_json_parser *p) {
11200   return &p->input_;
11201 }
11202 /*
11203 ** This currently uses snprintf() to format primitives, and could be optimized
11204 ** further.
11205 */
11206 
11207 
11208 #include <stdlib.h>
11209 #include <stdio.h>
11210 #include <string.h>
11211 #include <stdint.h>
11212 
11213 struct upb_json_printer {
11214   upb_sink input_;
11215   /* BytesSink closure. */
11216   void *subc_;
11217   upb_bytessink *output_;
11218 
11219   /* We track the depth so that we know when to emit startstr/endstr on the
11220    * output. */
11221   int depth_;
11222 
11223   /* Have we emitted the first element? This state is necessary to emit commas
11224    * without leaving a trailing comma in arrays/maps. We keep this state per
11225    * frame depth.
11226    *
11227    * Why max_depth * 2? UPB_MAX_HANDLER_DEPTH counts depth as nested messages.
11228    * We count frames (contexts in which we separate elements by commas) as both
11229    * repeated fields and messages (maps), and the worst case is a
11230    * message->repeated field->submessage->repeated field->... nesting. */
11231   bool first_elem_[UPB_MAX_HANDLER_DEPTH * 2];
11232 };
11233 
11234 /* StringPiece; a pointer plus a length. */
11235 typedef struct {
11236   const char *ptr;
11237   size_t len;
11238 } strpc;
11239 
newstrpc(upb_handlers * h,const upb_fielddef * f)11240 strpc *newstrpc(upb_handlers *h, const upb_fielddef *f) {
11241   strpc *ret = malloc(sizeof(*ret));
11242   ret->ptr = upb_fielddef_name(f);
11243   ret->len = strlen(ret->ptr);
11244   upb_handlers_addcleanup(h, ret, free);
11245   return ret;
11246 }
11247 
11248 /* ------------ JSON string printing: values, maps, arrays ------------------ */
11249 
print_data(upb_json_printer * p,const char * buf,unsigned int len)11250 static void print_data(
11251     upb_json_printer *p, const char *buf, unsigned int len) {
11252   /* TODO: Will need to change if we support pushback from the sink. */
11253   size_t n = upb_bytessink_putbuf(p->output_, p->subc_, buf, len, NULL);
11254   UPB_ASSERT_VAR(n, n == len);
11255 }
11256 
print_comma(upb_json_printer * p)11257 static void print_comma(upb_json_printer *p) {
11258   if (!p->first_elem_[p->depth_]) {
11259     print_data(p, ",", 1);
11260   }
11261   p->first_elem_[p->depth_] = false;
11262 }
11263 
11264 /* Helpers that print properly formatted elements to the JSON output stream. */
11265 
11266 /* Used for escaping control chars in strings. */
11267 static const char kControlCharLimit = 0x20;
11268 
is_json_escaped(char c)11269 UPB_INLINE bool is_json_escaped(char c) {
11270   /* See RFC 4627. */
11271   unsigned char uc = (unsigned char)c;
11272   return uc < kControlCharLimit || uc == '"' || uc == '\\';
11273 }
11274 
json_nice_escape(char c)11275 UPB_INLINE char* json_nice_escape(char c) {
11276   switch (c) {
11277     case '"':  return "\\\"";
11278     case '\\': return "\\\\";
11279     case '\b': return "\\b";
11280     case '\f': return "\\f";
11281     case '\n': return "\\n";
11282     case '\r': return "\\r";
11283     case '\t': return "\\t";
11284     default:   return NULL;
11285   }
11286 }
11287 
11288 /* Write a properly escaped string chunk. The surrounding quotes are *not*
11289  * printed; this is so that the caller has the option of emitting the string
11290  * content in chunks. */
putstring(upb_json_printer * p,const char * buf,unsigned int len)11291 static void putstring(upb_json_printer *p, const char *buf, unsigned int len) {
11292   const char* unescaped_run = NULL;
11293   unsigned int i;
11294   for (i = 0; i < len; i++) {
11295     char c = buf[i];
11296     /* Handle escaping. */
11297     if (is_json_escaped(c)) {
11298       /* Use a "nice" escape, like \n, if one exists for this character. */
11299       const char* escape = json_nice_escape(c);
11300       /* If we don't have a specific 'nice' escape code, use a \uXXXX-style
11301        * escape. */
11302       char escape_buf[8];
11303       if (!escape) {
11304         unsigned char byte = (unsigned char)c;
11305         _upb_snprintf(escape_buf, sizeof(escape_buf), "\\u%04x", (int)byte);
11306         escape = escape_buf;
11307       }
11308 
11309       /* N.B. that we assume that the input encoding is equal to the output
11310        * encoding (both UTF-8 for  now), so for chars >= 0x20 and != \, ", we
11311        * can simply pass the bytes through. */
11312 
11313       /* If there's a current run of unescaped chars, print that run first. */
11314       if (unescaped_run) {
11315         print_data(p, unescaped_run, &buf[i] - unescaped_run);
11316         unescaped_run = NULL;
11317       }
11318       /* Then print the escape code. */
11319       print_data(p, escape, strlen(escape));
11320     } else {
11321       /* Add to the current unescaped run of characters. */
11322       if (unescaped_run == NULL) {
11323         unescaped_run = &buf[i];
11324       }
11325     }
11326   }
11327 
11328   /* If the string ended in a run of unescaped characters, print that last run. */
11329   if (unescaped_run) {
11330     print_data(p, unescaped_run, &buf[len] - unescaped_run);
11331   }
11332 }
11333 
11334 #define CHKLENGTH(x) if (!(x)) return -1;
11335 
11336 /* Helpers that format floating point values according to our custom formats.
11337  * Right now we use %.8g and %.17g for float/double, respectively, to match
11338  * proto2::util::JsonFormat's defaults.  May want to change this later. */
11339 
fmt_double(double val,char * buf,size_t length)11340 static size_t fmt_double(double val, char* buf, size_t length) {
11341   size_t n = _upb_snprintf(buf, length, "%.17g", val);
11342   CHKLENGTH(n > 0 && n < length);
11343   return n;
11344 }
11345 
fmt_float(float val,char * buf,size_t length)11346 static size_t fmt_float(float val, char* buf, size_t length) {
11347   size_t n = _upb_snprintf(buf, length, "%.8g", val);
11348   CHKLENGTH(n > 0 && n < length);
11349   return n;
11350 }
11351 
fmt_bool(bool val,char * buf,size_t length)11352 static size_t fmt_bool(bool val, char* buf, size_t length) {
11353   size_t n = _upb_snprintf(buf, length, "%s", (val ? "true" : "false"));
11354   CHKLENGTH(n > 0 && n < length);
11355   return n;
11356 }
11357 
fmt_int64(long val,char * buf,size_t length)11358 static size_t fmt_int64(long val, char* buf, size_t length) {
11359   size_t n = _upb_snprintf(buf, length, "%ld", val);
11360   CHKLENGTH(n > 0 && n < length);
11361   return n;
11362 }
11363 
fmt_uint64(unsigned long long val,char * buf,size_t length)11364 static size_t fmt_uint64(unsigned long long val, char* buf, size_t length) {
11365   size_t n = _upb_snprintf(buf, length, "%llu", val);
11366   CHKLENGTH(n > 0 && n < length);
11367   return n;
11368 }
11369 
11370 /* Print a map key given a field name. Called by scalar field handlers and by
11371  * startseq for repeated fields. */
putkey(void * closure,const void * handler_data)11372 static bool putkey(void *closure, const void *handler_data) {
11373   upb_json_printer *p = closure;
11374   const strpc *key = handler_data;
11375   print_comma(p);
11376   print_data(p, "\"", 1);
11377   putstring(p, key->ptr, key->len);
11378   print_data(p, "\":", 2);
11379   return true;
11380 }
11381 
11382 #define CHKFMT(val) if ((val) == (size_t)-1) return false;
11383 #define CHK(val)    if (!(val)) return false;
11384 
11385 #define TYPE_HANDLERS(type, fmt_func)                                        \
11386   static bool put##type(void *closure, const void *handler_data, type val) { \
11387     upb_json_printer *p = closure;                                           \
11388     char data[64];                                                           \
11389     size_t length = fmt_func(val, data, sizeof(data));                       \
11390     UPB_UNUSED(handler_data);                                                \
11391     CHKFMT(length);                                                          \
11392     print_data(p, data, length);                                             \
11393     return true;                                                             \
11394   }                                                                          \
11395   static bool scalar_##type(void *closure, const void *handler_data,         \
11396                             type val) {                                      \
11397     CHK(putkey(closure, handler_data));                                      \
11398     CHK(put##type(closure, handler_data, val));                              \
11399     return true;                                                             \
11400   }                                                                          \
11401   static bool repeated_##type(void *closure, const void *handler_data,       \
11402                               type val) {                                    \
11403     upb_json_printer *p = closure;                                           \
11404     print_comma(p);                                                          \
11405     CHK(put##type(closure, handler_data, val));                              \
11406     return true;                                                             \
11407   }
11408 
11409 #define TYPE_HANDLERS_MAPKEY(type, fmt_func)                                 \
11410   static bool putmapkey_##type(void *closure, const void *handler_data,      \
11411                             type val) {                                      \
11412     upb_json_printer *p = closure;                                           \
11413     print_data(p, "\"", 1);                                                  \
11414     CHK(put##type(closure, handler_data, val));                              \
11415     print_data(p, "\":", 2);                                                 \
11416     return true;                                                             \
11417   }
11418 
11419 TYPE_HANDLERS(double,   fmt_double)
11420 TYPE_HANDLERS(float,    fmt_float)
11421 TYPE_HANDLERS(bool,     fmt_bool)
11422 TYPE_HANDLERS(int32_t,  fmt_int64)
11423 TYPE_HANDLERS(uint32_t, fmt_int64)
11424 TYPE_HANDLERS(int64_t,  fmt_int64)
11425 TYPE_HANDLERS(uint64_t, fmt_uint64)
11426 
11427 /* double and float are not allowed to be map keys. */
11428 TYPE_HANDLERS_MAPKEY(bool,     fmt_bool)
11429 TYPE_HANDLERS_MAPKEY(int32_t,  fmt_int64)
11430 TYPE_HANDLERS_MAPKEY(uint32_t, fmt_int64)
11431 TYPE_HANDLERS_MAPKEY(int64_t,  fmt_int64)
11432 TYPE_HANDLERS_MAPKEY(uint64_t, fmt_uint64)
11433 
11434 #undef TYPE_HANDLERS
11435 #undef TYPE_HANDLERS_MAPKEY
11436 
11437 typedef struct {
11438   void *keyname;
11439   const upb_enumdef *enumdef;
11440 } EnumHandlerData;
11441 
scalar_enum(void * closure,const void * handler_data,int32_t val)11442 static bool scalar_enum(void *closure, const void *handler_data,
11443                         int32_t val) {
11444   const EnumHandlerData *hd = handler_data;
11445   upb_json_printer *p = closure;
11446   const char *symbolic_name;
11447 
11448   CHK(putkey(closure, hd->keyname));
11449 
11450   symbolic_name = upb_enumdef_iton(hd->enumdef, val);
11451   if (symbolic_name) {
11452     print_data(p, "\"", 1);
11453     putstring(p, symbolic_name, strlen(symbolic_name));
11454     print_data(p, "\"", 1);
11455   } else {
11456     putint32_t(closure, NULL, val);
11457   }
11458 
11459   return true;
11460 }
11461 
print_enum_symbolic_name(upb_json_printer * p,const upb_enumdef * def,int32_t val)11462 static void print_enum_symbolic_name(upb_json_printer *p,
11463                                      const upb_enumdef *def,
11464                                      int32_t val) {
11465   const char *symbolic_name = upb_enumdef_iton(def, val);
11466   if (symbolic_name) {
11467     print_data(p, "\"", 1);
11468     putstring(p, symbolic_name, strlen(symbolic_name));
11469     print_data(p, "\"", 1);
11470   } else {
11471     putint32_t(p, NULL, val);
11472   }
11473 }
11474 
repeated_enum(void * closure,const void * handler_data,int32_t val)11475 static bool repeated_enum(void *closure, const void *handler_data,
11476                           int32_t val) {
11477   const EnumHandlerData *hd = handler_data;
11478   upb_json_printer *p = closure;
11479   print_comma(p);
11480 
11481   print_enum_symbolic_name(p, hd->enumdef, val);
11482 
11483   return true;
11484 }
11485 
mapvalue_enum(void * closure,const void * handler_data,int32_t val)11486 static bool mapvalue_enum(void *closure, const void *handler_data,
11487                           int32_t val) {
11488   const EnumHandlerData *hd = handler_data;
11489   upb_json_printer *p = closure;
11490 
11491   print_enum_symbolic_name(p, hd->enumdef, val);
11492 
11493   return true;
11494 }
11495 
scalar_startsubmsg(void * closure,const void * handler_data)11496 static void *scalar_startsubmsg(void *closure, const void *handler_data) {
11497   return putkey(closure, handler_data) ? closure : UPB_BREAK;
11498 }
11499 
repeated_startsubmsg(void * closure,const void * handler_data)11500 static void *repeated_startsubmsg(void *closure, const void *handler_data) {
11501   upb_json_printer *p = closure;
11502   UPB_UNUSED(handler_data);
11503   print_comma(p);
11504   return closure;
11505 }
11506 
start_frame(upb_json_printer * p)11507 static void start_frame(upb_json_printer *p) {
11508   p->depth_++;
11509   p->first_elem_[p->depth_] = true;
11510   print_data(p, "{", 1);
11511 }
11512 
end_frame(upb_json_printer * p)11513 static void end_frame(upb_json_printer *p) {
11514   print_data(p, "}", 1);
11515   p->depth_--;
11516 }
11517 
printer_startmsg(void * closure,const void * handler_data)11518 static bool printer_startmsg(void *closure, const void *handler_data) {
11519   upb_json_printer *p = closure;
11520   UPB_UNUSED(handler_data);
11521   if (p->depth_ == 0) {
11522     upb_bytessink_start(p->output_, 0, &p->subc_);
11523   }
11524   start_frame(p);
11525   return true;
11526 }
11527 
printer_endmsg(void * closure,const void * handler_data,upb_status * s)11528 static bool printer_endmsg(void *closure, const void *handler_data, upb_status *s) {
11529   upb_json_printer *p = closure;
11530   UPB_UNUSED(handler_data);
11531   UPB_UNUSED(s);
11532   end_frame(p);
11533   if (p->depth_ == 0) {
11534     upb_bytessink_end(p->output_);
11535   }
11536   return true;
11537 }
11538 
startseq(void * closure,const void * handler_data)11539 static void *startseq(void *closure, const void *handler_data) {
11540   upb_json_printer *p = closure;
11541   CHK(putkey(closure, handler_data));
11542   p->depth_++;
11543   p->first_elem_[p->depth_] = true;
11544   print_data(p, "[", 1);
11545   return closure;
11546 }
11547 
endseq(void * closure,const void * handler_data)11548 static bool endseq(void *closure, const void *handler_data) {
11549   upb_json_printer *p = closure;
11550   UPB_UNUSED(handler_data);
11551   print_data(p, "]", 1);
11552   p->depth_--;
11553   return true;
11554 }
11555 
startmap(void * closure,const void * handler_data)11556 static void *startmap(void *closure, const void *handler_data) {
11557   upb_json_printer *p = closure;
11558   CHK(putkey(closure, handler_data));
11559   p->depth_++;
11560   p->first_elem_[p->depth_] = true;
11561   print_data(p, "{", 1);
11562   return closure;
11563 }
11564 
endmap(void * closure,const void * handler_data)11565 static bool endmap(void *closure, const void *handler_data) {
11566   upb_json_printer *p = closure;
11567   UPB_UNUSED(handler_data);
11568   print_data(p, "}", 1);
11569   p->depth_--;
11570   return true;
11571 }
11572 
putstr(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)11573 static size_t putstr(void *closure, const void *handler_data, const char *str,
11574                      size_t len, const upb_bufhandle *handle) {
11575   upb_json_printer *p = closure;
11576   UPB_UNUSED(handler_data);
11577   UPB_UNUSED(handle);
11578   putstring(p, str, len);
11579   return len;
11580 }
11581 
11582 /* This has to Base64 encode the bytes, because JSON has no "bytes" type. */
putbytes(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)11583 static size_t putbytes(void *closure, const void *handler_data, const char *str,
11584                        size_t len, const upb_bufhandle *handle) {
11585   upb_json_printer *p = closure;
11586 
11587   /* This is the regular base64, not the "web-safe" version. */
11588   static const char base64[] =
11589       "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
11590 
11591   /* Base64-encode. */
11592   char data[16000];
11593   const char *limit = data + sizeof(data);
11594   const unsigned char *from = (const unsigned char*)str;
11595   char *to = data;
11596   size_t remaining = len;
11597   size_t bytes;
11598 
11599   UPB_UNUSED(handler_data);
11600   UPB_UNUSED(handle);
11601 
11602   while (remaining > 2) {
11603     /* TODO(haberman): handle encoded lengths > sizeof(data) */
11604     UPB_ASSERT_VAR(limit, (limit - to) >= 4);
11605 
11606     to[0] = base64[from[0] >> 2];
11607     to[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)];
11608     to[2] = base64[((from[1] & 0xf) << 2) | (from[2] >> 6)];
11609     to[3] = base64[from[2] & 0x3f];
11610 
11611     remaining -= 3;
11612     to += 4;
11613     from += 3;
11614   }
11615 
11616   switch (remaining) {
11617     case 2:
11618       to[0] = base64[from[0] >> 2];
11619       to[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)];
11620       to[2] = base64[(from[1] & 0xf) << 2];
11621       to[3] = '=';
11622       to += 4;
11623       from += 2;
11624       break;
11625     case 1:
11626       to[0] = base64[from[0] >> 2];
11627       to[1] = base64[((from[0] & 0x3) << 4)];
11628       to[2] = '=';
11629       to[3] = '=';
11630       to += 4;
11631       from += 1;
11632       break;
11633   }
11634 
11635   bytes = to - data;
11636   print_data(p, "\"", 1);
11637   putstring(p, data, bytes);
11638   print_data(p, "\"", 1);
11639   return len;
11640 }
11641 
scalar_startstr(void * closure,const void * handler_data,size_t size_hint)11642 static void *scalar_startstr(void *closure, const void *handler_data,
11643                              size_t size_hint) {
11644   upb_json_printer *p = closure;
11645   UPB_UNUSED(handler_data);
11646   UPB_UNUSED(size_hint);
11647   CHK(putkey(closure, handler_data));
11648   print_data(p, "\"", 1);
11649   return p;
11650 }
11651 
scalar_str(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)11652 static size_t scalar_str(void *closure, const void *handler_data,
11653                          const char *str, size_t len,
11654                          const upb_bufhandle *handle) {
11655   CHK(putstr(closure, handler_data, str, len, handle));
11656   return len;
11657 }
11658 
scalar_endstr(void * closure,const void * handler_data)11659 static bool scalar_endstr(void *closure, const void *handler_data) {
11660   upb_json_printer *p = closure;
11661   UPB_UNUSED(handler_data);
11662   print_data(p, "\"", 1);
11663   return true;
11664 }
11665 
repeated_startstr(void * closure,const void * handler_data,size_t size_hint)11666 static void *repeated_startstr(void *closure, const void *handler_data,
11667                                size_t size_hint) {
11668   upb_json_printer *p = closure;
11669   UPB_UNUSED(handler_data);
11670   UPB_UNUSED(size_hint);
11671   print_comma(p);
11672   print_data(p, "\"", 1);
11673   return p;
11674 }
11675 
repeated_str(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)11676 static size_t repeated_str(void *closure, const void *handler_data,
11677                            const char *str, size_t len,
11678                            const upb_bufhandle *handle) {
11679   CHK(putstr(closure, handler_data, str, len, handle));
11680   return len;
11681 }
11682 
repeated_endstr(void * closure,const void * handler_data)11683 static bool repeated_endstr(void *closure, const void *handler_data) {
11684   upb_json_printer *p = closure;
11685   UPB_UNUSED(handler_data);
11686   print_data(p, "\"", 1);
11687   return true;
11688 }
11689 
mapkeyval_startstr(void * closure,const void * handler_data,size_t size_hint)11690 static void *mapkeyval_startstr(void *closure, const void *handler_data,
11691                                 size_t size_hint) {
11692   upb_json_printer *p = closure;
11693   UPB_UNUSED(handler_data);
11694   UPB_UNUSED(size_hint);
11695   print_data(p, "\"", 1);
11696   return p;
11697 }
11698 
mapkey_str(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)11699 static size_t mapkey_str(void *closure, const void *handler_data,
11700                          const char *str, size_t len,
11701                          const upb_bufhandle *handle) {
11702   CHK(putstr(closure, handler_data, str, len, handle));
11703   return len;
11704 }
11705 
mapkey_endstr(void * closure,const void * handler_data)11706 static bool mapkey_endstr(void *closure, const void *handler_data) {
11707   upb_json_printer *p = closure;
11708   UPB_UNUSED(handler_data);
11709   print_data(p, "\":", 2);
11710   return true;
11711 }
11712 
mapvalue_endstr(void * closure,const void * handler_data)11713 static bool mapvalue_endstr(void *closure, const void *handler_data) {
11714   upb_json_printer *p = closure;
11715   UPB_UNUSED(handler_data);
11716   print_data(p, "\"", 1);
11717   return true;
11718 }
11719 
scalar_bytes(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)11720 static size_t scalar_bytes(void *closure, const void *handler_data,
11721                            const char *str, size_t len,
11722                            const upb_bufhandle *handle) {
11723   CHK(putkey(closure, handler_data));
11724   CHK(putbytes(closure, handler_data, str, len, handle));
11725   return len;
11726 }
11727 
repeated_bytes(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)11728 static size_t repeated_bytes(void *closure, const void *handler_data,
11729                              const char *str, size_t len,
11730                              const upb_bufhandle *handle) {
11731   upb_json_printer *p = closure;
11732   print_comma(p);
11733   CHK(putbytes(closure, handler_data, str, len, handle));
11734   return len;
11735 }
11736 
mapkey_bytes(void * closure,const void * handler_data,const char * str,size_t len,const upb_bufhandle * handle)11737 static size_t mapkey_bytes(void *closure, const void *handler_data,
11738                            const char *str, size_t len,
11739                            const upb_bufhandle *handle) {
11740   upb_json_printer *p = closure;
11741   CHK(putbytes(closure, handler_data, str, len, handle));
11742   print_data(p, ":", 1);
11743   return len;
11744 }
11745 
set_enum_hd(upb_handlers * h,const upb_fielddef * f,upb_handlerattr * attr)11746 static void set_enum_hd(upb_handlers *h,
11747                         const upb_fielddef *f,
11748                         upb_handlerattr *attr) {
11749   EnumHandlerData *hd = malloc(sizeof(EnumHandlerData));
11750   hd->enumdef = (const upb_enumdef *)upb_fielddef_subdef(f);
11751   hd->keyname = newstrpc(h, f);
11752   upb_handlers_addcleanup(h, hd, free);
11753   upb_handlerattr_sethandlerdata(attr, hd);
11754 }
11755 
11756 /* Set up handlers for a mapentry submessage (i.e., an individual key/value pair
11757  * in a map).
11758  *
11759  * TODO: Handle missing key, missing value, out-of-order key/value, or repeated
11760  * key or value cases properly. The right way to do this is to allocate a
11761  * temporary structure at the start of a mapentry submessage, store key and
11762  * value data in it as key and value handlers are called, and then print the
11763  * key/value pair once at the end of the submessage. If we don't do this, we
11764  * should at least detect the case and throw an error. However, so far all of
11765  * our sources that emit mapentry messages do so canonically (with one key
11766  * field, and then one value field), so this is not a pressing concern at the
11767  * moment. */
printer_sethandlers_mapentry(const void * closure,upb_handlers * h)11768 void printer_sethandlers_mapentry(const void *closure, upb_handlers *h) {
11769   const upb_msgdef *md = upb_handlers_msgdef(h);
11770 
11771   /* A mapentry message is printed simply as '"key": value'. Rather than
11772    * special-case key and value for every type below, we just handle both
11773    * fields explicitly here. */
11774   const upb_fielddef* key_field = upb_msgdef_itof(md, UPB_MAPENTRY_KEY);
11775   const upb_fielddef* value_field = upb_msgdef_itof(md, UPB_MAPENTRY_VALUE);
11776 
11777   upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
11778 
11779   UPB_UNUSED(closure);
11780 
11781   switch (upb_fielddef_type(key_field)) {
11782     case UPB_TYPE_INT32:
11783       upb_handlers_setint32(h, key_field, putmapkey_int32_t, &empty_attr);
11784       break;
11785     case UPB_TYPE_INT64:
11786       upb_handlers_setint64(h, key_field, putmapkey_int64_t, &empty_attr);
11787       break;
11788     case UPB_TYPE_UINT32:
11789       upb_handlers_setuint32(h, key_field, putmapkey_uint32_t, &empty_attr);
11790       break;
11791     case UPB_TYPE_UINT64:
11792       upb_handlers_setuint64(h, key_field, putmapkey_uint64_t, &empty_attr);
11793       break;
11794     case UPB_TYPE_BOOL:
11795       upb_handlers_setbool(h, key_field, putmapkey_bool, &empty_attr);
11796       break;
11797     case UPB_TYPE_STRING:
11798       upb_handlers_setstartstr(h, key_field, mapkeyval_startstr, &empty_attr);
11799       upb_handlers_setstring(h, key_field, mapkey_str, &empty_attr);
11800       upb_handlers_setendstr(h, key_field, mapkey_endstr, &empty_attr);
11801       break;
11802     case UPB_TYPE_BYTES:
11803       upb_handlers_setstring(h, key_field, mapkey_bytes, &empty_attr);
11804       break;
11805     default:
11806       assert(false);
11807       break;
11808   }
11809 
11810   switch (upb_fielddef_type(value_field)) {
11811     case UPB_TYPE_INT32:
11812       upb_handlers_setint32(h, value_field, putint32_t, &empty_attr);
11813       break;
11814     case UPB_TYPE_INT64:
11815       upb_handlers_setint64(h, value_field, putint64_t, &empty_attr);
11816       break;
11817     case UPB_TYPE_UINT32:
11818       upb_handlers_setuint32(h, value_field, putuint32_t, &empty_attr);
11819       break;
11820     case UPB_TYPE_UINT64:
11821       upb_handlers_setuint64(h, value_field, putuint64_t, &empty_attr);
11822       break;
11823     case UPB_TYPE_BOOL:
11824       upb_handlers_setbool(h, value_field, putbool, &empty_attr);
11825       break;
11826     case UPB_TYPE_FLOAT:
11827       upb_handlers_setfloat(h, value_field, putfloat, &empty_attr);
11828       break;
11829     case UPB_TYPE_DOUBLE:
11830       upb_handlers_setdouble(h, value_field, putdouble, &empty_attr);
11831       break;
11832     case UPB_TYPE_STRING:
11833       upb_handlers_setstartstr(h, value_field, mapkeyval_startstr, &empty_attr);
11834       upb_handlers_setstring(h, value_field, putstr, &empty_attr);
11835       upb_handlers_setendstr(h, value_field, mapvalue_endstr, &empty_attr);
11836       break;
11837     case UPB_TYPE_BYTES:
11838       upb_handlers_setstring(h, value_field, putbytes, &empty_attr);
11839       break;
11840     case UPB_TYPE_ENUM: {
11841       upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER;
11842       set_enum_hd(h, value_field, &enum_attr);
11843       upb_handlers_setint32(h, value_field, mapvalue_enum, &enum_attr);
11844       upb_handlerattr_uninit(&enum_attr);
11845       break;
11846     }
11847     case UPB_TYPE_MESSAGE:
11848       /* No handler necessary -- the submsg handlers will print the message
11849        * as appropriate. */
11850       break;
11851   }
11852 
11853   upb_handlerattr_uninit(&empty_attr);
11854 }
11855 
printer_sethandlers(const void * closure,upb_handlers * h)11856 void printer_sethandlers(const void *closure, upb_handlers *h) {
11857   const upb_msgdef *md = upb_handlers_msgdef(h);
11858   bool is_mapentry = upb_msgdef_mapentry(md);
11859   upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
11860   upb_msg_field_iter i;
11861 
11862   UPB_UNUSED(closure);
11863 
11864   if (is_mapentry) {
11865     /* mapentry messages are sufficiently different that we handle them
11866      * separately. */
11867     printer_sethandlers_mapentry(closure, h);
11868     return;
11869   }
11870 
11871   upb_handlers_setstartmsg(h, printer_startmsg, &empty_attr);
11872   upb_handlers_setendmsg(h, printer_endmsg, &empty_attr);
11873 
11874 #define TYPE(type, name, ctype)                                               \
11875   case type:                                                                  \
11876     if (upb_fielddef_isseq(f)) {                                              \
11877       upb_handlers_set##name(h, f, repeated_##ctype, &empty_attr);            \
11878     } else {                                                                  \
11879       upb_handlers_set##name(h, f, scalar_##ctype, &name_attr);               \
11880     }                                                                         \
11881     break;
11882 
11883   upb_msg_field_begin(&i, md);
11884   for(; !upb_msg_field_done(&i); upb_msg_field_next(&i)) {
11885     const upb_fielddef *f = upb_msg_iter_field(&i);
11886 
11887     upb_handlerattr name_attr = UPB_HANDLERATTR_INITIALIZER;
11888     upb_handlerattr_sethandlerdata(&name_attr, newstrpc(h, f));
11889 
11890     if (upb_fielddef_ismap(f)) {
11891       upb_handlers_setstartseq(h, f, startmap, &name_attr);
11892       upb_handlers_setendseq(h, f, endmap, &name_attr);
11893     } else if (upb_fielddef_isseq(f)) {
11894       upb_handlers_setstartseq(h, f, startseq, &name_attr);
11895       upb_handlers_setendseq(h, f, endseq, &empty_attr);
11896     }
11897 
11898     switch (upb_fielddef_type(f)) {
11899       TYPE(UPB_TYPE_FLOAT,  float,  float);
11900       TYPE(UPB_TYPE_DOUBLE, double, double);
11901       TYPE(UPB_TYPE_BOOL,   bool,   bool);
11902       TYPE(UPB_TYPE_INT32,  int32,  int32_t);
11903       TYPE(UPB_TYPE_UINT32, uint32, uint32_t);
11904       TYPE(UPB_TYPE_INT64,  int64,  int64_t);
11905       TYPE(UPB_TYPE_UINT64, uint64, uint64_t);
11906       case UPB_TYPE_ENUM: {
11907         /* For now, we always emit symbolic names for enums. We may want an
11908          * option later to control this behavior, but we will wait for a real
11909          * need first. */
11910         upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER;
11911         set_enum_hd(h, f, &enum_attr);
11912 
11913         if (upb_fielddef_isseq(f)) {
11914           upb_handlers_setint32(h, f, repeated_enum, &enum_attr);
11915         } else {
11916           upb_handlers_setint32(h, f, scalar_enum, &enum_attr);
11917         }
11918 
11919         upb_handlerattr_uninit(&enum_attr);
11920         break;
11921       }
11922       case UPB_TYPE_STRING:
11923         if (upb_fielddef_isseq(f)) {
11924           upb_handlers_setstartstr(h, f, repeated_startstr, &empty_attr);
11925           upb_handlers_setstring(h, f, repeated_str, &empty_attr);
11926           upb_handlers_setendstr(h, f, repeated_endstr, &empty_attr);
11927         } else {
11928           upb_handlers_setstartstr(h, f, scalar_startstr, &name_attr);
11929           upb_handlers_setstring(h, f, scalar_str, &empty_attr);
11930           upb_handlers_setendstr(h, f, scalar_endstr, &empty_attr);
11931         }
11932         break;
11933       case UPB_TYPE_BYTES:
11934         /* XXX: this doesn't support strings that span buffers yet. The base64
11935          * encoder will need to be made resumable for this to work properly. */
11936         if (upb_fielddef_isseq(f)) {
11937           upb_handlers_setstring(h, f, repeated_bytes, &empty_attr);
11938         } else {
11939           upb_handlers_setstring(h, f, scalar_bytes, &name_attr);
11940         }
11941         break;
11942       case UPB_TYPE_MESSAGE:
11943         if (upb_fielddef_isseq(f)) {
11944           upb_handlers_setstartsubmsg(h, f, repeated_startsubmsg, &name_attr);
11945         } else {
11946           upb_handlers_setstartsubmsg(h, f, scalar_startsubmsg, &name_attr);
11947         }
11948         break;
11949     }
11950 
11951     upb_handlerattr_uninit(&name_attr);
11952   }
11953 
11954   upb_handlerattr_uninit(&empty_attr);
11955 #undef TYPE
11956 }
11957 
json_printer_reset(upb_json_printer * p)11958 static void json_printer_reset(upb_json_printer *p) {
11959   p->depth_ = 0;
11960 }
11961 
11962 
11963 /* Public API *****************************************************************/
11964 
upb_json_printer_create(upb_env * e,const upb_handlers * h,upb_bytessink * output)11965 upb_json_printer *upb_json_printer_create(upb_env *e, const upb_handlers *h,
11966                                           upb_bytessink *output) {
11967 #ifndef NDEBUG
11968   size_t size_before = upb_env_bytesallocated(e);
11969 #endif
11970 
11971   upb_json_printer *p = upb_env_malloc(e, sizeof(upb_json_printer));
11972   if (!p) return NULL;
11973 
11974   p->output_ = output;
11975   json_printer_reset(p);
11976   upb_sink_reset(&p->input_, h, p);
11977 
11978   /* If this fails, increase the value in printer.h. */
11979   assert(upb_env_bytesallocated(e) - size_before <= UPB_JSON_PRINTER_SIZE);
11980   return p;
11981 }
11982 
upb_json_printer_input(upb_json_printer * p)11983 upb_sink *upb_json_printer_input(upb_json_printer *p) {
11984   return &p->input_;
11985 }
11986 
upb_json_printer_newhandlers(const upb_msgdef * md,const void * owner)11987 const upb_handlers *upb_json_printer_newhandlers(const upb_msgdef *md,
11988                                                  const void *owner) {
11989   return upb_handlers_newfrozen(md, owner, printer_sethandlers, NULL);
11990 }
11991