1 /*
2 ** protobuf decoder bytecode compiler
3 **
4 ** Code to compile a upb::Handlers into bytecode for decoding a protobuf
5 ** according to that specific schema and destination handlers.
6 **
7 ** Bytecode definition is in decoder.int.h.
8 */
9 
10 #include <stdarg.h>
11 #include "upb/pb/decoder.int.h"
12 #include "upb/pb/varint.int.h"
13 
14 #ifdef UPB_DUMP_BYTECODE
15 #include <stdio.h>
16 #endif
17 
18 #include "upb/port_def.inc"
19 
20 #define MAXLABEL 5
21 #define EMPTYLABEL -1
22 
23 /* upb_pbdecodermethod ********************************************************/
24 
freemethod(upb_pbdecodermethod * method)25 static void freemethod(upb_pbdecodermethod *method) {
26   upb_inttable_uninit(&method->dispatch);
27   upb_gfree(method);
28 }
29 
newmethod(const upb_handlers * dest_handlers,mgroup * group)30 static upb_pbdecodermethod *newmethod(const upb_handlers *dest_handlers,
31                                       mgroup *group) {
32   upb_pbdecodermethod *ret = upb_gmalloc(sizeof(*ret));
33   upb_byteshandler_init(&ret->input_handler_);
34 
35   ret->group = group;
36   ret->dest_handlers_ = dest_handlers;
37   upb_inttable_init(&ret->dispatch, UPB_CTYPE_UINT64);
38 
39   return ret;
40 }
41 
upb_pbdecodermethod_desthandlers(const upb_pbdecodermethod * m)42 const upb_handlers *upb_pbdecodermethod_desthandlers(
43     const upb_pbdecodermethod *m) {
44   return m->dest_handlers_;
45 }
46 
upb_pbdecodermethod_inputhandler(const upb_pbdecodermethod * m)47 const upb_byteshandler *upb_pbdecodermethod_inputhandler(
48     const upb_pbdecodermethod *m) {
49   return &m->input_handler_;
50 }
51 
upb_pbdecodermethod_isnative(const upb_pbdecodermethod * m)52 bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m) {
53   return m->is_native_;
54 }
55 
56 
57 /* mgroup *********************************************************************/
58 
freegroup(mgroup * g)59 static void freegroup(mgroup *g) {
60   upb_inttable_iter i;
61 
62   upb_inttable_begin(&i, &g->methods);
63   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
64     freemethod(upb_value_getptr(upb_inttable_iter_value(&i)));
65   }
66 
67   upb_inttable_uninit(&g->methods);
68   upb_gfree(g->bytecode);
69   upb_gfree(g);
70 }
71 
newgroup(void)72 mgroup *newgroup(void) {
73   mgroup *g = upb_gmalloc(sizeof(*g));
74   upb_inttable_init(&g->methods, UPB_CTYPE_PTR);
75   g->bytecode = NULL;
76   g->bytecode_end = NULL;
77   return g;
78 }
79 
80 
81 /* bytecode compiler **********************************************************/
82 
83 /* Data used only at compilation time. */
84 typedef struct {
85   mgroup *group;
86 
87   uint32_t *pc;
88   int fwd_labels[MAXLABEL];
89   int back_labels[MAXLABEL];
90 
91   /* For fields marked "lazy", parse them lazily or eagerly? */
92   bool lazy;
93 } compiler;
94 
newcompiler(mgroup * group,bool lazy)95 static compiler *newcompiler(mgroup *group, bool lazy) {
96   compiler *ret = upb_gmalloc(sizeof(*ret));
97   int i;
98 
99   ret->group = group;
100   ret->lazy = lazy;
101   for (i = 0; i < MAXLABEL; i++) {
102     ret->fwd_labels[i] = EMPTYLABEL;
103     ret->back_labels[i] = EMPTYLABEL;
104   }
105   return ret;
106 }
107 
freecompiler(compiler * c)108 static void freecompiler(compiler *c) {
109   upb_gfree(c);
110 }
111 
112 const size_t ptr_words = sizeof(void*) / sizeof(uint32_t);
113 
114 /* How many words an instruction is. */
instruction_len(uint32_t instr)115 static int instruction_len(uint32_t instr) {
116   switch (getop(instr)) {
117     case OP_SETDISPATCH: return 1 + ptr_words;
118     case OP_TAGN: return 3;
119     case OP_SETBIGGROUPNUM: return 2;
120     default: return 1;
121   }
122 }
123 
op_has_longofs(int32_t instruction)124 bool op_has_longofs(int32_t instruction) {
125   switch (getop(instruction)) {
126     case OP_CALL:
127     case OP_BRANCH:
128     case OP_CHECKDELIM:
129       return true;
130     /* The "tag" instructions only have 8 bytes available for the jump target,
131      * but that is ok because these opcodes only require short jumps. */
132     case OP_TAG1:
133     case OP_TAG2:
134     case OP_TAGN:
135       return false;
136     default:
137       UPB_ASSERT(false);
138       return false;
139   }
140 }
141 
getofs(uint32_t instruction)142 static int32_t getofs(uint32_t instruction) {
143   if (op_has_longofs(instruction)) {
144     return (int32_t)instruction >> 8;
145   } else {
146     return (int8_t)(instruction >> 8);
147   }
148 }
149 
setofs(uint32_t * instruction,int32_t ofs)150 static void setofs(uint32_t *instruction, int32_t ofs) {
151   if (op_has_longofs(*instruction)) {
152     *instruction = getop(*instruction) | (uint32_t)ofs << 8;
153   } else {
154     *instruction = (*instruction & ~0xff00) | ((ofs & 0xff) << 8);
155   }
156   UPB_ASSERT(getofs(*instruction) == ofs);  /* Would fail in cases of overflow. */
157 }
158 
pcofs(compiler * c)159 static uint32_t pcofs(compiler *c) {
160   return (uint32_t)(c->pc - c->group->bytecode);
161 }
162 
163 /* Defines a local label at the current PC location.  All previous forward
164  * references are updated to point to this location.  The location is noted
165  * for any future backward references. */
label(compiler * c,unsigned int label)166 static void label(compiler *c, unsigned int label) {
167   int val;
168   uint32_t *codep;
169 
170   UPB_ASSERT(label < MAXLABEL);
171   val = c->fwd_labels[label];
172   codep = (val == EMPTYLABEL) ? NULL : c->group->bytecode + val;
173   while (codep) {
174     int ofs = getofs(*codep);
175     setofs(codep, (int32_t)(c->pc - codep - instruction_len(*codep)));
176     codep = ofs ? codep + ofs : NULL;
177   }
178   c->fwd_labels[label] = EMPTYLABEL;
179   c->back_labels[label] = pcofs(c);
180 }
181 
182 /* Creates a reference to a numbered label; either a forward reference
183  * (positive arg) or backward reference (negative arg).  For forward references
184  * the value returned now is actually a "next" pointer into a linked list of all
185  * instructions that use this label and will be patched later when the label is
186  * defined with label().
187  *
188  * The returned value is the offset that should be written into the instruction.
189  */
labelref(compiler * c,int label)190 static int32_t labelref(compiler *c, int label) {
191   UPB_ASSERT(label < MAXLABEL);
192   if (label == LABEL_DISPATCH) {
193     /* No resolving required. */
194     return 0;
195   } else if (label < 0) {
196     /* Backward local label.  Relative to the next instruction. */
197     uint32_t from = (uint32_t)((c->pc + 1) - c->group->bytecode);
198     return c->back_labels[-label] - from;
199   } else {
200     /* Forward local label: prepend to (possibly-empty) linked list. */
201     int *lptr = &c->fwd_labels[label];
202     int32_t ret = (*lptr == EMPTYLABEL) ? 0 : *lptr - pcofs(c);
203     *lptr = pcofs(c);
204     return ret;
205   }
206 }
207 
put32(compiler * c,uint32_t v)208 static void put32(compiler *c, uint32_t v) {
209   mgroup *g = c->group;
210   if (c->pc == g->bytecode_end) {
211     int ofs = pcofs(c);
212     size_t oldsize = g->bytecode_end - g->bytecode;
213     size_t newsize = UPB_MAX(oldsize * 2, 64);
214     /* TODO(haberman): handle OOM. */
215     g->bytecode = upb_grealloc(g->bytecode, oldsize * sizeof(uint32_t),
216                                             newsize * sizeof(uint32_t));
217     g->bytecode_end = g->bytecode + newsize;
218     c->pc = g->bytecode + ofs;
219   }
220   *c->pc++ = v;
221 }
222 
putop(compiler * c,int op,...)223 static void putop(compiler *c, int op, ...) {
224   va_list ap;
225   va_start(ap, op);
226 
227   switch (op) {
228     case OP_SETDISPATCH: {
229       uintptr_t ptr = (uintptr_t)va_arg(ap, void*);
230       put32(c, OP_SETDISPATCH);
231       put32(c, (uint32_t)ptr);
232       if (sizeof(uintptr_t) > sizeof(uint32_t))
233         put32(c, (uint64_t)ptr >> 32);
234       break;
235     }
236     case OP_STARTMSG:
237     case OP_ENDMSG:
238     case OP_PUSHLENDELIM:
239     case OP_POP:
240     case OP_SETDELIM:
241     case OP_HALT:
242     case OP_RET:
243     case OP_DISPATCH:
244       put32(c, op);
245       break;
246     case OP_PARSE_DOUBLE:
247     case OP_PARSE_FLOAT:
248     case OP_PARSE_INT64:
249     case OP_PARSE_UINT64:
250     case OP_PARSE_INT32:
251     case OP_PARSE_FIXED64:
252     case OP_PARSE_FIXED32:
253     case OP_PARSE_BOOL:
254     case OP_PARSE_UINT32:
255     case OP_PARSE_SFIXED32:
256     case OP_PARSE_SFIXED64:
257     case OP_PARSE_SINT32:
258     case OP_PARSE_SINT64:
259     case OP_STARTSEQ:
260     case OP_ENDSEQ:
261     case OP_STARTSUBMSG:
262     case OP_ENDSUBMSG:
263     case OP_STARTSTR:
264     case OP_STRING:
265     case OP_ENDSTR:
266     case OP_PUSHTAGDELIM:
267       put32(c, op | va_arg(ap, upb_selector_t) << 8);
268       break;
269     case OP_SETBIGGROUPNUM:
270       put32(c, op);
271       put32(c, va_arg(ap, int));
272       break;
273     case OP_CALL: {
274       const upb_pbdecodermethod *method = va_arg(ap, upb_pbdecodermethod *);
275       put32(c, op | (method->code_base.ofs - (pcofs(c) + 1)) << 8);
276       break;
277     }
278     case OP_CHECKDELIM:
279     case OP_BRANCH: {
280       uint32_t instruction = op;
281       int label = va_arg(ap, int);
282       setofs(&instruction, labelref(c, label));
283       put32(c, instruction);
284       break;
285     }
286     case OP_TAG1:
287     case OP_TAG2: {
288       int label = va_arg(ap, int);
289       uint64_t tag = va_arg(ap, uint64_t);
290       uint32_t instruction = (uint32_t)(op | (tag << 16));
291       UPB_ASSERT(tag <= 0xffff);
292       setofs(&instruction, labelref(c, label));
293       put32(c, instruction);
294       break;
295     }
296     case OP_TAGN: {
297       int label = va_arg(ap, int);
298       uint64_t tag = va_arg(ap, uint64_t);
299       uint32_t instruction = op | (upb_value_size(tag) << 16);
300       setofs(&instruction, labelref(c, label));
301       put32(c, instruction);
302       put32(c, (uint32_t)tag);
303       put32(c, tag >> 32);
304       break;
305     }
306   }
307 
308   va_end(ap);
309 }
310 
311 #if defined(UPB_DUMP_BYTECODE)
312 
upb_pbdecoder_getopname(unsigned int op)313 const char *upb_pbdecoder_getopname(unsigned int op) {
314 #define QUOTE(x) #x
315 #define EXPAND_AND_QUOTE(x) QUOTE(x)
316 #define OPNAME(x) OP_##x
317 #define OP(x) case OPNAME(x): return EXPAND_AND_QUOTE(OPNAME(x));
318 #define T(x) OP(PARSE_##x)
319   /* Keep in sync with list in decoder.int.h. */
320   switch ((opcode)op) {
321     T(DOUBLE) T(FLOAT) T(INT64) T(UINT64) T(INT32) T(FIXED64) T(FIXED32)
322     T(BOOL) T(UINT32) T(SFIXED32) T(SFIXED64) T(SINT32) T(SINT64)
323     OP(STARTMSG) OP(ENDMSG) OP(STARTSEQ) OP(ENDSEQ) OP(STARTSUBMSG)
324     OP(ENDSUBMSG) OP(STARTSTR) OP(STRING) OP(ENDSTR) OP(CALL) OP(RET)
325     OP(PUSHLENDELIM) OP(PUSHTAGDELIM) OP(SETDELIM) OP(CHECKDELIM)
326     OP(BRANCH) OP(TAG1) OP(TAG2) OP(TAGN) OP(SETDISPATCH) OP(POP)
327     OP(SETBIGGROUPNUM) OP(DISPATCH) OP(HALT)
328   }
329   return "<unknown op>";
330 #undef OP
331 #undef T
332 }
333 
334 #endif
335 
336 #ifdef UPB_DUMP_BYTECODE
337 
dumpbc(uint32_t * p,uint32_t * end,FILE * f)338 static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) {
339 
340   uint32_t *begin = p;
341 
342   while (p < end) {
343     fprintf(f, "%p  %8tx", p, p - begin);
344     uint32_t instr = *p++;
345     uint8_t op = getop(instr);
346     fprintf(f, " %s", upb_pbdecoder_getopname(op));
347     switch ((opcode)op) {
348       case OP_SETDISPATCH: {
349         const upb_inttable *dispatch;
350         memcpy(&dispatch, p, sizeof(void*));
351         p += ptr_words;
352         const upb_pbdecodermethod *method =
353             (void *)((char *)dispatch -
354                      offsetof(upb_pbdecodermethod, dispatch));
355         fprintf(f, " %s", upb_msgdef_fullname(
356                               upb_handlers_msgdef(method->dest_handlers_)));
357         break;
358       }
359       case OP_DISPATCH:
360       case OP_STARTMSG:
361       case OP_ENDMSG:
362       case OP_PUSHLENDELIM:
363       case OP_POP:
364       case OP_SETDELIM:
365       case OP_HALT:
366       case OP_RET:
367         break;
368       case OP_PARSE_DOUBLE:
369       case OP_PARSE_FLOAT:
370       case OP_PARSE_INT64:
371       case OP_PARSE_UINT64:
372       case OP_PARSE_INT32:
373       case OP_PARSE_FIXED64:
374       case OP_PARSE_FIXED32:
375       case OP_PARSE_BOOL:
376       case OP_PARSE_UINT32:
377       case OP_PARSE_SFIXED32:
378       case OP_PARSE_SFIXED64:
379       case OP_PARSE_SINT32:
380       case OP_PARSE_SINT64:
381       case OP_STARTSEQ:
382       case OP_ENDSEQ:
383       case OP_STARTSUBMSG:
384       case OP_ENDSUBMSG:
385       case OP_STARTSTR:
386       case OP_STRING:
387       case OP_ENDSTR:
388       case OP_PUSHTAGDELIM:
389         fprintf(f, " %d", instr >> 8);
390         break;
391       case OP_SETBIGGROUPNUM:
392         fprintf(f, " %d", *p++);
393         break;
394       case OP_CHECKDELIM:
395       case OP_CALL:
396       case OP_BRANCH:
397         fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
398         break;
399       case OP_TAG1:
400       case OP_TAG2: {
401         fprintf(f, " tag:0x%x", instr >> 16);
402         if (getofs(instr)) {
403           fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
404         }
405         break;
406       }
407       case OP_TAGN: {
408         uint64_t tag = *p++;
409         tag |= (uint64_t)*p++ << 32;
410         fprintf(f, " tag:0x%llx", (long long)tag);
411         fprintf(f, " n:%d", instr >> 16);
412         if (getofs(instr)) {
413           fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
414         }
415         break;
416       }
417     }
418     fputs("\n", f);
419   }
420 }
421 
422 #endif
423 
get_encoded_tag(const upb_fielddef * f,int wire_type)424 static uint64_t get_encoded_tag(const upb_fielddef *f, int wire_type) {
425   uint32_t tag = (upb_fielddef_number(f) << 3) | wire_type;
426   uint64_t encoded_tag = upb_vencode32(tag);
427   /* No tag should be greater than 5 bytes. */
428   UPB_ASSERT(encoded_tag <= 0xffffffffff);
429   return encoded_tag;
430 }
431 
putchecktag(compiler * c,const upb_fielddef * f,int wire_type,int dest)432 static void putchecktag(compiler *c, const upb_fielddef *f,
433                         int wire_type, int dest) {
434   uint64_t tag = get_encoded_tag(f, wire_type);
435   switch (upb_value_size(tag)) {
436     case 1:
437       putop(c, OP_TAG1, dest, tag);
438       break;
439     case 2:
440       putop(c, OP_TAG2, dest, tag);
441       break;
442     default:
443       putop(c, OP_TAGN, dest, tag);
444       break;
445   }
446 }
447 
getsel(const upb_fielddef * f,upb_handlertype_t type)448 static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) {
449   upb_selector_t selector;
450   bool ok = upb_handlers_getselector(f, type, &selector);
451   UPB_ASSERT(ok);
452   return selector;
453 }
454 
455 /* Takes an existing, primary dispatch table entry and repacks it with a
456  * different alternate wire type.  Called when we are inserting a secondary
457  * dispatch table entry for an alternate wire type. */
repack(uint64_t dispatch,int new_wt2)458 static uint64_t repack(uint64_t dispatch, int new_wt2) {
459   uint64_t ofs;
460   uint8_t wt1;
461   uint8_t old_wt2;
462   upb_pbdecoder_unpackdispatch(dispatch, &ofs, &wt1, &old_wt2);
463   UPB_ASSERT(old_wt2 == NO_WIRE_TYPE);  /* wt2 should not be set yet. */
464   return upb_pbdecoder_packdispatch(ofs, wt1, new_wt2);
465 }
466 
467 /* Marks the current bytecode position as the dispatch target for this message,
468  * field, and wire type. */
dispatchtarget(compiler * c,upb_pbdecodermethod * method,const upb_fielddef * f,int wire_type)469 static void dispatchtarget(compiler *c, upb_pbdecodermethod *method,
470                            const upb_fielddef *f, int wire_type) {
471   /* Offset is relative to msg base. */
472   uint64_t ofs = pcofs(c) - method->code_base.ofs;
473   uint32_t fn = upb_fielddef_number(f);
474   upb_inttable *d = &method->dispatch;
475   upb_value v;
476   if (upb_inttable_remove(d, fn, &v)) {
477     /* TODO: prioritize based on packed setting in .proto file. */
478     uint64_t repacked = repack(upb_value_getuint64(v), wire_type);
479     upb_inttable_insert(d, fn, upb_value_uint64(repacked));
480     upb_inttable_insert(d, fn + UPB_MAX_FIELDNUMBER, upb_value_uint64(ofs));
481   } else {
482     uint64_t val = upb_pbdecoder_packdispatch(ofs, wire_type, NO_WIRE_TYPE);
483     upb_inttable_insert(d, fn, upb_value_uint64(val));
484   }
485 }
486 
putpush(compiler * c,const upb_fielddef * f)487 static void putpush(compiler *c, const upb_fielddef *f) {
488   if (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE) {
489     putop(c, OP_PUSHLENDELIM);
490   } else {
491     uint32_t fn = upb_fielddef_number(f);
492     if (fn >= 1 << 24) {
493       putop(c, OP_PUSHTAGDELIM, 0);
494       putop(c, OP_SETBIGGROUPNUM, fn);
495     } else {
496       putop(c, OP_PUSHTAGDELIM, fn);
497     }
498   }
499 }
500 
find_submethod(const compiler * c,const upb_pbdecodermethod * method,const upb_fielddef * f)501 static upb_pbdecodermethod *find_submethod(const compiler *c,
502                                            const upb_pbdecodermethod *method,
503                                            const upb_fielddef *f) {
504   const upb_handlers *sub =
505       upb_handlers_getsubhandlers(method->dest_handlers_, f);
506   upb_value v;
507   return upb_inttable_lookupptr(&c->group->methods, sub, &v)
508              ? upb_value_getptr(v)
509              : NULL;
510 }
511 
putsel(compiler * c,opcode op,upb_selector_t sel,const upb_handlers * h)512 static void putsel(compiler *c, opcode op, upb_selector_t sel,
513                    const upb_handlers *h) {
514   if (upb_handlers_gethandler(h, sel, NULL)) {
515     putop(c, op, sel);
516   }
517 }
518 
519 /* Puts an opcode to call a callback, but only if a callback actually exists for
520  * this field and handler type. */
maybeput(compiler * c,opcode op,const upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)521 static void maybeput(compiler *c, opcode op, const upb_handlers *h,
522                      const upb_fielddef *f, upb_handlertype_t type) {
523   putsel(c, op, getsel(f, type), h);
524 }
525 
haslazyhandlers(const upb_handlers * h,const upb_fielddef * f)526 static bool haslazyhandlers(const upb_handlers *h, const upb_fielddef *f) {
527   if (!upb_fielddef_lazy(f))
528     return false;
529 
530   return upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STARTSTR), NULL) ||
531          upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STRING), NULL) ||
532          upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_ENDSTR), NULL);
533 }
534 
535 
536 /* bytecode compiler code generation ******************************************/
537 
538 /* Symbolic names for our local labels. */
539 #define LABEL_LOOPSTART 1  /* Top of a repeated field loop. */
540 #define LABEL_LOOPBREAK 2  /* To jump out of a repeated loop */
541 #define LABEL_FIELD     3  /* Jump backward to find the most recent field. */
542 #define LABEL_ENDMSG    4  /* To reach the OP_ENDMSG instr for this msg. */
543 
544 /* Generates bytecode to parse a single non-lazy message field. */
generate_msgfield(compiler * c,const upb_fielddef * f,upb_pbdecodermethod * method)545 static void generate_msgfield(compiler *c, const upb_fielddef *f,
546                               upb_pbdecodermethod *method) {
547   const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
548   const upb_pbdecodermethod *sub_m = find_submethod(c, method, f);
549   int wire_type;
550 
551   if (!sub_m) {
552     /* Don't emit any code for this field at all; it will be parsed as an
553      * unknown field.
554      *
555      * TODO(haberman): we should change this to parse it as a string field
556      * instead.  It will probably be faster, but more importantly, once we
557      * start vending unknown fields, a field shouldn't be treated as unknown
558      * just because it doesn't have subhandlers registered. */
559     return;
560   }
561 
562   label(c, LABEL_FIELD);
563 
564   wire_type =
565       (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE)
566           ? UPB_WIRE_TYPE_DELIMITED
567           : UPB_WIRE_TYPE_START_GROUP;
568 
569   if (upb_fielddef_isseq(f)) {
570     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
571     putchecktag(c, f, wire_type, LABEL_DISPATCH);
572    dispatchtarget(c, method, f, wire_type);
573     putop(c, OP_PUSHTAGDELIM, 0);
574     putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
575    label(c, LABEL_LOOPSTART);
576     putpush(c, f);
577     putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
578     putop(c, OP_CALL, sub_m);
579     putop(c, OP_POP);
580     maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
581     if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
582       putop(c, OP_SETDELIM);
583     }
584     putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
585     putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
586     putop(c, OP_BRANCH, -LABEL_LOOPSTART);
587    label(c, LABEL_LOOPBREAK);
588     putop(c, OP_POP);
589     maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
590   } else {
591     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
592     putchecktag(c, f, wire_type, LABEL_DISPATCH);
593    dispatchtarget(c, method, f, wire_type);
594     putpush(c, f);
595     putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
596     putop(c, OP_CALL, sub_m);
597     putop(c, OP_POP);
598     maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
599     if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
600       putop(c, OP_SETDELIM);
601     }
602   }
603 }
604 
605 /* Generates bytecode to parse a single string or lazy submessage field. */
generate_delimfield(compiler * c,const upb_fielddef * f,upb_pbdecodermethod * method)606 static void generate_delimfield(compiler *c, const upb_fielddef *f,
607                                 upb_pbdecodermethod *method) {
608   const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
609 
610   label(c, LABEL_FIELD);
611   if (upb_fielddef_isseq(f)) {
612     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
613     putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
614    dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
615     putop(c, OP_PUSHTAGDELIM, 0);
616     putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
617    label(c, LABEL_LOOPSTART);
618     putop(c, OP_PUSHLENDELIM);
619     putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
620     /* Need to emit even if no handler to skip past the string. */
621     putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
622     maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
623     putop(c, OP_POP);
624     putop(c, OP_SETDELIM);
625     putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
626     putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_LOOPBREAK);
627     putop(c, OP_BRANCH, -LABEL_LOOPSTART);
628    label(c, LABEL_LOOPBREAK);
629     putop(c, OP_POP);
630     maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
631   } else {
632     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
633     putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
634    dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
635     putop(c, OP_PUSHLENDELIM);
636     putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
637     putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
638     maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
639     putop(c, OP_POP);
640     putop(c, OP_SETDELIM);
641   }
642 }
643 
644 /* Generates bytecode to parse a single primitive field. */
generate_primitivefield(compiler * c,const upb_fielddef * f,upb_pbdecodermethod * method)645 static void generate_primitivefield(compiler *c, const upb_fielddef *f,
646                                     upb_pbdecodermethod *method) {
647   const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
648   upb_descriptortype_t descriptor_type = upb_fielddef_descriptortype(f);
649   opcode parse_type;
650   upb_selector_t sel;
651   int wire_type;
652 
653   label(c, LABEL_FIELD);
654 
655   /* From a decoding perspective, ENUM is the same as INT32. */
656   if (descriptor_type == UPB_DESCRIPTOR_TYPE_ENUM)
657     descriptor_type = UPB_DESCRIPTOR_TYPE_INT32;
658 
659   parse_type = (opcode)descriptor_type;
660 
661   /* TODO(haberman): generate packed or non-packed first depending on "packed"
662    * setting in the fielddef.  This will favor (in speed) whichever was
663    * specified. */
664 
665   UPB_ASSERT((int)parse_type >= 0 && parse_type <= OP_MAX);
666   sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
667   wire_type = upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
668   if (upb_fielddef_isseq(f)) {
669     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
670     putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
671    dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
672     putop(c, OP_PUSHLENDELIM);
673     putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));  /* Packed */
674    label(c, LABEL_LOOPSTART);
675     putop(c, parse_type, sel);
676     putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
677     putop(c, OP_BRANCH, -LABEL_LOOPSTART);
678    dispatchtarget(c, method, f, wire_type);
679     putop(c, OP_PUSHTAGDELIM, 0);
680     putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));  /* Non-packed */
681    label(c, LABEL_LOOPSTART);
682     putop(c, parse_type, sel);
683     putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
684     putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
685     putop(c, OP_BRANCH, -LABEL_LOOPSTART);
686    label(c, LABEL_LOOPBREAK);
687     putop(c, OP_POP);  /* Packed and non-packed join. */
688     maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
689     putop(c, OP_SETDELIM);  /* Could remove for non-packed by dup ENDSEQ. */
690   } else {
691     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
692     putchecktag(c, f, wire_type, LABEL_DISPATCH);
693    dispatchtarget(c, method, f, wire_type);
694     putop(c, parse_type, sel);
695   }
696 }
697 
698 /* Adds bytecode for parsing the given message to the given decoderplan,
699  * while adding all dispatch targets to this message's dispatch table. */
compile_method(compiler * c,upb_pbdecodermethod * method)700 static void compile_method(compiler *c, upb_pbdecodermethod *method) {
701   const upb_handlers *h;
702   const upb_msgdef *md;
703   uint32_t* start_pc;
704   int i, n;
705   upb_value val;
706 
707   UPB_ASSERT(method);
708 
709   /* Clear all entries in the dispatch table. */
710   upb_inttable_uninit(&method->dispatch);
711   upb_inttable_init(&method->dispatch, UPB_CTYPE_UINT64);
712 
713   h = upb_pbdecodermethod_desthandlers(method);
714   md = upb_handlers_msgdef(h);
715 
716  method->code_base.ofs = pcofs(c);
717   putop(c, OP_SETDISPATCH, &method->dispatch);
718   putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h);
719  label(c, LABEL_FIELD);
720   start_pc = c->pc;
721   n = upb_msgdef_fieldcount(md);
722   for(i = 0; i < n; i++) {
723     const upb_fielddef *f = upb_msgdef_field(md, i);
724     upb_fieldtype_t type = upb_fielddef_type(f);
725 
726     if (type == UPB_TYPE_MESSAGE && !(haslazyhandlers(h, f) && c->lazy)) {
727       generate_msgfield(c, f, method);
728     } else if (type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES ||
729                type == UPB_TYPE_MESSAGE) {
730       generate_delimfield(c, f, method);
731     } else {
732       generate_primitivefield(c, f, method);
733     }
734   }
735 
736   /* If there were no fields, or if no handlers were defined, we need to
737    * generate a non-empty loop body so that we can at least dispatch for unknown
738    * fields and check for the end of the message. */
739   if (c->pc == start_pc) {
740     /* Check for end-of-message. */
741     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
742     /* Unconditionally dispatch. */
743     putop(c, OP_DISPATCH, 0);
744   }
745 
746   /* For now we just loop back to the last field of the message (or if none,
747    * the DISPATCH opcode for the message). */
748   putop(c, OP_BRANCH, -LABEL_FIELD);
749 
750   /* Insert both a label and a dispatch table entry for this end-of-msg. */
751  label(c, LABEL_ENDMSG);
752   val = upb_value_uint64(pcofs(c) - method->code_base.ofs);
753   upb_inttable_insert(&method->dispatch, DISPATCH_ENDMSG, val);
754 
755   putsel(c, OP_ENDMSG, UPB_ENDMSG_SELECTOR, h);
756   putop(c, OP_RET);
757 
758   upb_inttable_compact(&method->dispatch);
759 }
760 
761 /* Populate "methods" with new upb_pbdecodermethod objects reachable from "h".
762  * Returns the method for these handlers.
763  *
764  * Generates a new method for every destination handlers reachable from "h". */
find_methods(compiler * c,const upb_handlers * h)765 static void find_methods(compiler *c, const upb_handlers *h) {
766   upb_value v;
767   int i, n;
768   const upb_msgdef *md;
769   upb_pbdecodermethod *method;
770 
771   if (upb_inttable_lookupptr(&c->group->methods, h, &v))
772     return;
773 
774   method = newmethod(h, c->group);
775   upb_inttable_insertptr(&c->group->methods, h, upb_value_ptr(method));
776 
777   /* Find submethods. */
778   md = upb_handlers_msgdef(h);
779   n = upb_msgdef_fieldcount(md);
780   for (i = 0; i < n; i++) {
781     const upb_fielddef *f = upb_msgdef_field(md, i);
782     const upb_handlers *sub_h;
783     if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
784         (sub_h = upb_handlers_getsubhandlers(h, f)) != NULL) {
785       /* We only generate a decoder method for submessages with handlers.
786        * Others will be parsed as unknown fields. */
787       find_methods(c, sub_h);
788     }
789   }
790 }
791 
792 /* (Re-)compile bytecode for all messages in "msgs."
793  * Overwrites any existing bytecode in "c". */
compile_methods(compiler * c)794 static void compile_methods(compiler *c) {
795   upb_inttable_iter i;
796 
797   /* Start over at the beginning of the bytecode. */
798   c->pc = c->group->bytecode;
799 
800   upb_inttable_begin(&i, &c->group->methods);
801   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
802     upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
803     compile_method(c, method);
804   }
805 }
806 
set_bytecode_handlers(mgroup * g)807 static void set_bytecode_handlers(mgroup *g) {
808   upb_inttable_iter i;
809   upb_inttable_begin(&i, &g->methods);
810   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
811     upb_pbdecodermethod *m = upb_value_getptr(upb_inttable_iter_value(&i));
812     upb_byteshandler *h = &m->input_handler_;
813 
814     m->code_base.ptr = g->bytecode + m->code_base.ofs;
815 
816     upb_byteshandler_setstartstr(h, upb_pbdecoder_startbc, m->code_base.ptr);
817     upb_byteshandler_setstring(h, upb_pbdecoder_decode, g);
818     upb_byteshandler_setendstr(h, upb_pbdecoder_end, m);
819   }
820 }
821 
822 
823 /* TODO(haberman): allow this to be constructed for an arbitrary set of dest
824  * handlers and other mgroups (but verify we have a transitive closure). */
mgroup_new(const upb_handlers * dest,bool lazy)825 const mgroup *mgroup_new(const upb_handlers *dest, bool lazy) {
826   mgroup *g;
827   compiler *c;
828 
829   g = newgroup();
830   c = newcompiler(g, lazy);
831   find_methods(c, dest);
832 
833   /* We compile in two passes:
834    * 1. all messages are assigned relative offsets from the beginning of the
835    *    bytecode (saved in method->code_base).
836    * 2. forwards OP_CALL instructions can be correctly linked since message
837    *    offsets have been previously assigned.
838    *
839    * Could avoid the second pass by linking OP_CALL instructions somehow. */
840   compile_methods(c);
841   compile_methods(c);
842   g->bytecode_end = c->pc;
843   freecompiler(c);
844 
845 #ifdef UPB_DUMP_BYTECODE
846   {
847     FILE *f = fopen("/tmp/upb-bytecode", "w");
848     UPB_ASSERT(f);
849     dumpbc(g->bytecode, g->bytecode_end, stderr);
850     dumpbc(g->bytecode, g->bytecode_end, f);
851     fclose(f);
852 
853     f = fopen("/tmp/upb-bytecode.bin", "wb");
854     UPB_ASSERT(f);
855     fwrite(g->bytecode, 1, g->bytecode_end - g->bytecode, f);
856     fclose(f);
857   }
858 #endif
859 
860   set_bytecode_handlers(g);
861   return g;
862 }
863 
864 
865 /* upb_pbcodecache ************************************************************/
866 
upb_pbcodecache_new(upb_handlercache * dest)867 upb_pbcodecache *upb_pbcodecache_new(upb_handlercache *dest) {
868   upb_pbcodecache *c = upb_gmalloc(sizeof(*c));
869 
870   if (!c) return NULL;
871 
872   c->dest = dest;
873   c->lazy = false;
874 
875   c->arena = upb_arena_new();
876   if (!upb_inttable_init(&c->groups, UPB_CTYPE_CONSTPTR)) return NULL;
877 
878   return c;
879 }
880 
upb_pbcodecache_free(upb_pbcodecache * c)881 void upb_pbcodecache_free(upb_pbcodecache *c) {
882   upb_inttable_iter i;
883 
884   upb_inttable_begin(&i, &c->groups);
885   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
886     upb_value val = upb_inttable_iter_value(&i);
887     freegroup((void*)upb_value_getconstptr(val));
888   }
889 
890   upb_inttable_uninit(&c->groups);
891   upb_arena_free(c->arena);
892   upb_gfree(c);
893 }
894 
upb_pbdecodermethodopts_setlazy(upb_pbcodecache * c,bool lazy)895 void upb_pbdecodermethodopts_setlazy(upb_pbcodecache *c, bool lazy) {
896   UPB_ASSERT(upb_inttable_count(&c->groups) == 0);
897   c->lazy = lazy;
898 }
899 
upb_pbcodecache_get(upb_pbcodecache * c,const upb_msgdef * md)900 const upb_pbdecodermethod *upb_pbcodecache_get(upb_pbcodecache *c,
901                                                const upb_msgdef *md) {
902   upb_value v;
903   bool ok;
904   const upb_handlers *h;
905   const mgroup *g;
906 
907   h = upb_handlercache_get(c->dest, md);
908   if (upb_inttable_lookupptr(&c->groups, md, &v)) {
909     g = upb_value_getconstptr(v);
910   } else {
911     g = mgroup_new(h, c->lazy);
912     ok = upb_inttable_insertptr(&c->groups, md, upb_value_constptr(g));
913     UPB_ASSUME(ok);
914   }
915 
916   ok = upb_inttable_lookupptr(&g->methods, h, &v);
917   UPB_ASSUME(ok);
918   return upb_value_getptr(v);
919 }
920