1 /*
2 ** protobuf decoder bytecode compiler
3 **
4 ** Code to compile a upb::Handlers into bytecode for decoding a protobuf
5 ** according to that specific schema and destination handlers.
6 **
7 ** Bytecode definition is in decoder.int.h.
8 */
9
10 #include <stdarg.h>
11 #include "upb/pb/decoder.int.h"
12 #include "upb/pb/varint.int.h"
13
14 #ifdef UPB_DUMP_BYTECODE
15 #include <stdio.h>
16 #endif
17
18 #include "upb/port_def.inc"
19
20 #define MAXLABEL 5
21 #define EMPTYLABEL -1
22
23 /* upb_pbdecodermethod ********************************************************/
24
freemethod(upb_pbdecodermethod * method)25 static void freemethod(upb_pbdecodermethod *method) {
26 upb_inttable_uninit(&method->dispatch);
27 upb_gfree(method);
28 }
29
newmethod(const upb_handlers * dest_handlers,mgroup * group)30 static upb_pbdecodermethod *newmethod(const upb_handlers *dest_handlers,
31 mgroup *group) {
32 upb_pbdecodermethod *ret = upb_gmalloc(sizeof(*ret));
33 upb_byteshandler_init(&ret->input_handler_);
34
35 ret->group = group;
36 ret->dest_handlers_ = dest_handlers;
37 upb_inttable_init(&ret->dispatch, UPB_CTYPE_UINT64);
38
39 return ret;
40 }
41
upb_pbdecodermethod_desthandlers(const upb_pbdecodermethod * m)42 const upb_handlers *upb_pbdecodermethod_desthandlers(
43 const upb_pbdecodermethod *m) {
44 return m->dest_handlers_;
45 }
46
upb_pbdecodermethod_inputhandler(const upb_pbdecodermethod * m)47 const upb_byteshandler *upb_pbdecodermethod_inputhandler(
48 const upb_pbdecodermethod *m) {
49 return &m->input_handler_;
50 }
51
upb_pbdecodermethod_isnative(const upb_pbdecodermethod * m)52 bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m) {
53 return m->is_native_;
54 }
55
56
57 /* mgroup *********************************************************************/
58
freegroup(mgroup * g)59 static void freegroup(mgroup *g) {
60 upb_inttable_iter i;
61
62 upb_inttable_begin(&i, &g->methods);
63 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
64 freemethod(upb_value_getptr(upb_inttable_iter_value(&i)));
65 }
66
67 upb_inttable_uninit(&g->methods);
68 upb_gfree(g->bytecode);
69 upb_gfree(g);
70 }
71
newgroup(void)72 mgroup *newgroup(void) {
73 mgroup *g = upb_gmalloc(sizeof(*g));
74 upb_inttable_init(&g->methods, UPB_CTYPE_PTR);
75 g->bytecode = NULL;
76 g->bytecode_end = NULL;
77 return g;
78 }
79
80
81 /* bytecode compiler **********************************************************/
82
83 /* Data used only at compilation time. */
84 typedef struct {
85 mgroup *group;
86
87 uint32_t *pc;
88 int fwd_labels[MAXLABEL];
89 int back_labels[MAXLABEL];
90
91 /* For fields marked "lazy", parse them lazily or eagerly? */
92 bool lazy;
93 } compiler;
94
newcompiler(mgroup * group,bool lazy)95 static compiler *newcompiler(mgroup *group, bool lazy) {
96 compiler *ret = upb_gmalloc(sizeof(*ret));
97 int i;
98
99 ret->group = group;
100 ret->lazy = lazy;
101 for (i = 0; i < MAXLABEL; i++) {
102 ret->fwd_labels[i] = EMPTYLABEL;
103 ret->back_labels[i] = EMPTYLABEL;
104 }
105 return ret;
106 }
107
freecompiler(compiler * c)108 static void freecompiler(compiler *c) {
109 upb_gfree(c);
110 }
111
112 const size_t ptr_words = sizeof(void*) / sizeof(uint32_t);
113
114 /* How many words an instruction is. */
instruction_len(uint32_t instr)115 static int instruction_len(uint32_t instr) {
116 switch (getop(instr)) {
117 case OP_SETDISPATCH: return 1 + ptr_words;
118 case OP_TAGN: return 3;
119 case OP_SETBIGGROUPNUM: return 2;
120 default: return 1;
121 }
122 }
123
op_has_longofs(int32_t instruction)124 bool op_has_longofs(int32_t instruction) {
125 switch (getop(instruction)) {
126 case OP_CALL:
127 case OP_BRANCH:
128 case OP_CHECKDELIM:
129 return true;
130 /* The "tag" instructions only have 8 bytes available for the jump target,
131 * but that is ok because these opcodes only require short jumps. */
132 case OP_TAG1:
133 case OP_TAG2:
134 case OP_TAGN:
135 return false;
136 default:
137 UPB_ASSERT(false);
138 return false;
139 }
140 }
141
getofs(uint32_t instruction)142 static int32_t getofs(uint32_t instruction) {
143 if (op_has_longofs(instruction)) {
144 return (int32_t)instruction >> 8;
145 } else {
146 return (int8_t)(instruction >> 8);
147 }
148 }
149
setofs(uint32_t * instruction,int32_t ofs)150 static void setofs(uint32_t *instruction, int32_t ofs) {
151 if (op_has_longofs(*instruction)) {
152 *instruction = getop(*instruction) | (uint32_t)ofs << 8;
153 } else {
154 *instruction = (*instruction & ~0xff00) | ((ofs & 0xff) << 8);
155 }
156 UPB_ASSERT(getofs(*instruction) == ofs); /* Would fail in cases of overflow. */
157 }
158
pcofs(compiler * c)159 static uint32_t pcofs(compiler *c) {
160 return (uint32_t)(c->pc - c->group->bytecode);
161 }
162
163 /* Defines a local label at the current PC location. All previous forward
164 * references are updated to point to this location. The location is noted
165 * for any future backward references. */
label(compiler * c,unsigned int label)166 static void label(compiler *c, unsigned int label) {
167 int val;
168 uint32_t *codep;
169
170 UPB_ASSERT(label < MAXLABEL);
171 val = c->fwd_labels[label];
172 codep = (val == EMPTYLABEL) ? NULL : c->group->bytecode + val;
173 while (codep) {
174 int ofs = getofs(*codep);
175 setofs(codep, (int32_t)(c->pc - codep - instruction_len(*codep)));
176 codep = ofs ? codep + ofs : NULL;
177 }
178 c->fwd_labels[label] = EMPTYLABEL;
179 c->back_labels[label] = pcofs(c);
180 }
181
182 /* Creates a reference to a numbered label; either a forward reference
183 * (positive arg) or backward reference (negative arg). For forward references
184 * the value returned now is actually a "next" pointer into a linked list of all
185 * instructions that use this label and will be patched later when the label is
186 * defined with label().
187 *
188 * The returned value is the offset that should be written into the instruction.
189 */
labelref(compiler * c,int label)190 static int32_t labelref(compiler *c, int label) {
191 UPB_ASSERT(label < MAXLABEL);
192 if (label == LABEL_DISPATCH) {
193 /* No resolving required. */
194 return 0;
195 } else if (label < 0) {
196 /* Backward local label. Relative to the next instruction. */
197 uint32_t from = (uint32_t)((c->pc + 1) - c->group->bytecode);
198 return c->back_labels[-label] - from;
199 } else {
200 /* Forward local label: prepend to (possibly-empty) linked list. */
201 int *lptr = &c->fwd_labels[label];
202 int32_t ret = (*lptr == EMPTYLABEL) ? 0 : *lptr - pcofs(c);
203 *lptr = pcofs(c);
204 return ret;
205 }
206 }
207
put32(compiler * c,uint32_t v)208 static void put32(compiler *c, uint32_t v) {
209 mgroup *g = c->group;
210 if (c->pc == g->bytecode_end) {
211 int ofs = pcofs(c);
212 size_t oldsize = g->bytecode_end - g->bytecode;
213 size_t newsize = UPB_MAX(oldsize * 2, 64);
214 /* TODO(haberman): handle OOM. */
215 g->bytecode = upb_grealloc(g->bytecode, oldsize * sizeof(uint32_t),
216 newsize * sizeof(uint32_t));
217 g->bytecode_end = g->bytecode + newsize;
218 c->pc = g->bytecode + ofs;
219 }
220 *c->pc++ = v;
221 }
222
putop(compiler * c,int op,...)223 static void putop(compiler *c, int op, ...) {
224 va_list ap;
225 va_start(ap, op);
226
227 switch (op) {
228 case OP_SETDISPATCH: {
229 uintptr_t ptr = (uintptr_t)va_arg(ap, void*);
230 put32(c, OP_SETDISPATCH);
231 put32(c, (uint32_t)ptr);
232 if (sizeof(uintptr_t) > sizeof(uint32_t))
233 put32(c, (uint64_t)ptr >> 32);
234 break;
235 }
236 case OP_STARTMSG:
237 case OP_ENDMSG:
238 case OP_PUSHLENDELIM:
239 case OP_POP:
240 case OP_SETDELIM:
241 case OP_HALT:
242 case OP_RET:
243 case OP_DISPATCH:
244 put32(c, op);
245 break;
246 case OP_PARSE_DOUBLE:
247 case OP_PARSE_FLOAT:
248 case OP_PARSE_INT64:
249 case OP_PARSE_UINT64:
250 case OP_PARSE_INT32:
251 case OP_PARSE_FIXED64:
252 case OP_PARSE_FIXED32:
253 case OP_PARSE_BOOL:
254 case OP_PARSE_UINT32:
255 case OP_PARSE_SFIXED32:
256 case OP_PARSE_SFIXED64:
257 case OP_PARSE_SINT32:
258 case OP_PARSE_SINT64:
259 case OP_STARTSEQ:
260 case OP_ENDSEQ:
261 case OP_STARTSUBMSG:
262 case OP_ENDSUBMSG:
263 case OP_STARTSTR:
264 case OP_STRING:
265 case OP_ENDSTR:
266 case OP_PUSHTAGDELIM:
267 put32(c, op | va_arg(ap, upb_selector_t) << 8);
268 break;
269 case OP_SETBIGGROUPNUM:
270 put32(c, op);
271 put32(c, va_arg(ap, int));
272 break;
273 case OP_CALL: {
274 const upb_pbdecodermethod *method = va_arg(ap, upb_pbdecodermethod *);
275 put32(c, op | (method->code_base.ofs - (pcofs(c) + 1)) << 8);
276 break;
277 }
278 case OP_CHECKDELIM:
279 case OP_BRANCH: {
280 uint32_t instruction = op;
281 int label = va_arg(ap, int);
282 setofs(&instruction, labelref(c, label));
283 put32(c, instruction);
284 break;
285 }
286 case OP_TAG1:
287 case OP_TAG2: {
288 int label = va_arg(ap, int);
289 uint64_t tag = va_arg(ap, uint64_t);
290 uint32_t instruction = (uint32_t)(op | (tag << 16));
291 UPB_ASSERT(tag <= 0xffff);
292 setofs(&instruction, labelref(c, label));
293 put32(c, instruction);
294 break;
295 }
296 case OP_TAGN: {
297 int label = va_arg(ap, int);
298 uint64_t tag = va_arg(ap, uint64_t);
299 uint32_t instruction = op | (upb_value_size(tag) << 16);
300 setofs(&instruction, labelref(c, label));
301 put32(c, instruction);
302 put32(c, (uint32_t)tag);
303 put32(c, tag >> 32);
304 break;
305 }
306 }
307
308 va_end(ap);
309 }
310
311 #if defined(UPB_DUMP_BYTECODE)
312
upb_pbdecoder_getopname(unsigned int op)313 const char *upb_pbdecoder_getopname(unsigned int op) {
314 #define QUOTE(x) #x
315 #define EXPAND_AND_QUOTE(x) QUOTE(x)
316 #define OPNAME(x) OP_##x
317 #define OP(x) case OPNAME(x): return EXPAND_AND_QUOTE(OPNAME(x));
318 #define T(x) OP(PARSE_##x)
319 /* Keep in sync with list in decoder.int.h. */
320 switch ((opcode)op) {
321 T(DOUBLE) T(FLOAT) T(INT64) T(UINT64) T(INT32) T(FIXED64) T(FIXED32)
322 T(BOOL) T(UINT32) T(SFIXED32) T(SFIXED64) T(SINT32) T(SINT64)
323 OP(STARTMSG) OP(ENDMSG) OP(STARTSEQ) OP(ENDSEQ) OP(STARTSUBMSG)
324 OP(ENDSUBMSG) OP(STARTSTR) OP(STRING) OP(ENDSTR) OP(CALL) OP(RET)
325 OP(PUSHLENDELIM) OP(PUSHTAGDELIM) OP(SETDELIM) OP(CHECKDELIM)
326 OP(BRANCH) OP(TAG1) OP(TAG2) OP(TAGN) OP(SETDISPATCH) OP(POP)
327 OP(SETBIGGROUPNUM) OP(DISPATCH) OP(HALT)
328 }
329 return "<unknown op>";
330 #undef OP
331 #undef T
332 }
333
334 #endif
335
336 #ifdef UPB_DUMP_BYTECODE
337
dumpbc(uint32_t * p,uint32_t * end,FILE * f)338 static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) {
339
340 uint32_t *begin = p;
341
342 while (p < end) {
343 fprintf(f, "%p %8tx", p, p - begin);
344 uint32_t instr = *p++;
345 uint8_t op = getop(instr);
346 fprintf(f, " %s", upb_pbdecoder_getopname(op));
347 switch ((opcode)op) {
348 case OP_SETDISPATCH: {
349 const upb_inttable *dispatch;
350 memcpy(&dispatch, p, sizeof(void*));
351 p += ptr_words;
352 const upb_pbdecodermethod *method =
353 (void *)((char *)dispatch -
354 offsetof(upb_pbdecodermethod, dispatch));
355 fprintf(f, " %s", upb_msgdef_fullname(
356 upb_handlers_msgdef(method->dest_handlers_)));
357 break;
358 }
359 case OP_DISPATCH:
360 case OP_STARTMSG:
361 case OP_ENDMSG:
362 case OP_PUSHLENDELIM:
363 case OP_POP:
364 case OP_SETDELIM:
365 case OP_HALT:
366 case OP_RET:
367 break;
368 case OP_PARSE_DOUBLE:
369 case OP_PARSE_FLOAT:
370 case OP_PARSE_INT64:
371 case OP_PARSE_UINT64:
372 case OP_PARSE_INT32:
373 case OP_PARSE_FIXED64:
374 case OP_PARSE_FIXED32:
375 case OP_PARSE_BOOL:
376 case OP_PARSE_UINT32:
377 case OP_PARSE_SFIXED32:
378 case OP_PARSE_SFIXED64:
379 case OP_PARSE_SINT32:
380 case OP_PARSE_SINT64:
381 case OP_STARTSEQ:
382 case OP_ENDSEQ:
383 case OP_STARTSUBMSG:
384 case OP_ENDSUBMSG:
385 case OP_STARTSTR:
386 case OP_STRING:
387 case OP_ENDSTR:
388 case OP_PUSHTAGDELIM:
389 fprintf(f, " %d", instr >> 8);
390 break;
391 case OP_SETBIGGROUPNUM:
392 fprintf(f, " %d", *p++);
393 break;
394 case OP_CHECKDELIM:
395 case OP_CALL:
396 case OP_BRANCH:
397 fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
398 break;
399 case OP_TAG1:
400 case OP_TAG2: {
401 fprintf(f, " tag:0x%x", instr >> 16);
402 if (getofs(instr)) {
403 fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
404 }
405 break;
406 }
407 case OP_TAGN: {
408 uint64_t tag = *p++;
409 tag |= (uint64_t)*p++ << 32;
410 fprintf(f, " tag:0x%llx", (long long)tag);
411 fprintf(f, " n:%d", instr >> 16);
412 if (getofs(instr)) {
413 fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
414 }
415 break;
416 }
417 }
418 fputs("\n", f);
419 }
420 }
421
422 #endif
423
get_encoded_tag(const upb_fielddef * f,int wire_type)424 static uint64_t get_encoded_tag(const upb_fielddef *f, int wire_type) {
425 uint32_t tag = (upb_fielddef_number(f) << 3) | wire_type;
426 uint64_t encoded_tag = upb_vencode32(tag);
427 /* No tag should be greater than 5 bytes. */
428 UPB_ASSERT(encoded_tag <= 0xffffffffff);
429 return encoded_tag;
430 }
431
putchecktag(compiler * c,const upb_fielddef * f,int wire_type,int dest)432 static void putchecktag(compiler *c, const upb_fielddef *f,
433 int wire_type, int dest) {
434 uint64_t tag = get_encoded_tag(f, wire_type);
435 switch (upb_value_size(tag)) {
436 case 1:
437 putop(c, OP_TAG1, dest, tag);
438 break;
439 case 2:
440 putop(c, OP_TAG2, dest, tag);
441 break;
442 default:
443 putop(c, OP_TAGN, dest, tag);
444 break;
445 }
446 }
447
getsel(const upb_fielddef * f,upb_handlertype_t type)448 static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) {
449 upb_selector_t selector;
450 bool ok = upb_handlers_getselector(f, type, &selector);
451 UPB_ASSERT(ok);
452 return selector;
453 }
454
455 /* Takes an existing, primary dispatch table entry and repacks it with a
456 * different alternate wire type. Called when we are inserting a secondary
457 * dispatch table entry for an alternate wire type. */
repack(uint64_t dispatch,int new_wt2)458 static uint64_t repack(uint64_t dispatch, int new_wt2) {
459 uint64_t ofs;
460 uint8_t wt1;
461 uint8_t old_wt2;
462 upb_pbdecoder_unpackdispatch(dispatch, &ofs, &wt1, &old_wt2);
463 UPB_ASSERT(old_wt2 == NO_WIRE_TYPE); /* wt2 should not be set yet. */
464 return upb_pbdecoder_packdispatch(ofs, wt1, new_wt2);
465 }
466
467 /* Marks the current bytecode position as the dispatch target for this message,
468 * field, and wire type. */
dispatchtarget(compiler * c,upb_pbdecodermethod * method,const upb_fielddef * f,int wire_type)469 static void dispatchtarget(compiler *c, upb_pbdecodermethod *method,
470 const upb_fielddef *f, int wire_type) {
471 /* Offset is relative to msg base. */
472 uint64_t ofs = pcofs(c) - method->code_base.ofs;
473 uint32_t fn = upb_fielddef_number(f);
474 upb_inttable *d = &method->dispatch;
475 upb_value v;
476 if (upb_inttable_remove(d, fn, &v)) {
477 /* TODO: prioritize based on packed setting in .proto file. */
478 uint64_t repacked = repack(upb_value_getuint64(v), wire_type);
479 upb_inttable_insert(d, fn, upb_value_uint64(repacked));
480 upb_inttable_insert(d, fn + UPB_MAX_FIELDNUMBER, upb_value_uint64(ofs));
481 } else {
482 uint64_t val = upb_pbdecoder_packdispatch(ofs, wire_type, NO_WIRE_TYPE);
483 upb_inttable_insert(d, fn, upb_value_uint64(val));
484 }
485 }
486
putpush(compiler * c,const upb_fielddef * f)487 static void putpush(compiler *c, const upb_fielddef *f) {
488 if (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE) {
489 putop(c, OP_PUSHLENDELIM);
490 } else {
491 uint32_t fn = upb_fielddef_number(f);
492 if (fn >= 1 << 24) {
493 putop(c, OP_PUSHTAGDELIM, 0);
494 putop(c, OP_SETBIGGROUPNUM, fn);
495 } else {
496 putop(c, OP_PUSHTAGDELIM, fn);
497 }
498 }
499 }
500
find_submethod(const compiler * c,const upb_pbdecodermethod * method,const upb_fielddef * f)501 static upb_pbdecodermethod *find_submethod(const compiler *c,
502 const upb_pbdecodermethod *method,
503 const upb_fielddef *f) {
504 const upb_handlers *sub =
505 upb_handlers_getsubhandlers(method->dest_handlers_, f);
506 upb_value v;
507 return upb_inttable_lookupptr(&c->group->methods, sub, &v)
508 ? upb_value_getptr(v)
509 : NULL;
510 }
511
putsel(compiler * c,opcode op,upb_selector_t sel,const upb_handlers * h)512 static void putsel(compiler *c, opcode op, upb_selector_t sel,
513 const upb_handlers *h) {
514 if (upb_handlers_gethandler(h, sel, NULL)) {
515 putop(c, op, sel);
516 }
517 }
518
519 /* Puts an opcode to call a callback, but only if a callback actually exists for
520 * this field and handler type. */
maybeput(compiler * c,opcode op,const upb_handlers * h,const upb_fielddef * f,upb_handlertype_t type)521 static void maybeput(compiler *c, opcode op, const upb_handlers *h,
522 const upb_fielddef *f, upb_handlertype_t type) {
523 putsel(c, op, getsel(f, type), h);
524 }
525
haslazyhandlers(const upb_handlers * h,const upb_fielddef * f)526 static bool haslazyhandlers(const upb_handlers *h, const upb_fielddef *f) {
527 if (!upb_fielddef_lazy(f))
528 return false;
529
530 return upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STARTSTR), NULL) ||
531 upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STRING), NULL) ||
532 upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_ENDSTR), NULL);
533 }
534
535
536 /* bytecode compiler code generation ******************************************/
537
538 /* Symbolic names for our local labels. */
539 #define LABEL_LOOPSTART 1 /* Top of a repeated field loop. */
540 #define LABEL_LOOPBREAK 2 /* To jump out of a repeated loop */
541 #define LABEL_FIELD 3 /* Jump backward to find the most recent field. */
542 #define LABEL_ENDMSG 4 /* To reach the OP_ENDMSG instr for this msg. */
543
544 /* Generates bytecode to parse a single non-lazy message field. */
generate_msgfield(compiler * c,const upb_fielddef * f,upb_pbdecodermethod * method)545 static void generate_msgfield(compiler *c, const upb_fielddef *f,
546 upb_pbdecodermethod *method) {
547 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
548 const upb_pbdecodermethod *sub_m = find_submethod(c, method, f);
549 int wire_type;
550
551 if (!sub_m) {
552 /* Don't emit any code for this field at all; it will be parsed as an
553 * unknown field.
554 *
555 * TODO(haberman): we should change this to parse it as a string field
556 * instead. It will probably be faster, but more importantly, once we
557 * start vending unknown fields, a field shouldn't be treated as unknown
558 * just because it doesn't have subhandlers registered. */
559 return;
560 }
561
562 label(c, LABEL_FIELD);
563
564 wire_type =
565 (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE)
566 ? UPB_WIRE_TYPE_DELIMITED
567 : UPB_WIRE_TYPE_START_GROUP;
568
569 if (upb_fielddef_isseq(f)) {
570 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
571 putchecktag(c, f, wire_type, LABEL_DISPATCH);
572 dispatchtarget(c, method, f, wire_type);
573 putop(c, OP_PUSHTAGDELIM, 0);
574 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
575 label(c, LABEL_LOOPSTART);
576 putpush(c, f);
577 putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
578 putop(c, OP_CALL, sub_m);
579 putop(c, OP_POP);
580 maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
581 if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
582 putop(c, OP_SETDELIM);
583 }
584 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
585 putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
586 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
587 label(c, LABEL_LOOPBREAK);
588 putop(c, OP_POP);
589 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
590 } else {
591 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
592 putchecktag(c, f, wire_type, LABEL_DISPATCH);
593 dispatchtarget(c, method, f, wire_type);
594 putpush(c, f);
595 putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
596 putop(c, OP_CALL, sub_m);
597 putop(c, OP_POP);
598 maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
599 if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
600 putop(c, OP_SETDELIM);
601 }
602 }
603 }
604
605 /* Generates bytecode to parse a single string or lazy submessage field. */
generate_delimfield(compiler * c,const upb_fielddef * f,upb_pbdecodermethod * method)606 static void generate_delimfield(compiler *c, const upb_fielddef *f,
607 upb_pbdecodermethod *method) {
608 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
609
610 label(c, LABEL_FIELD);
611 if (upb_fielddef_isseq(f)) {
612 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
613 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
614 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
615 putop(c, OP_PUSHTAGDELIM, 0);
616 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
617 label(c, LABEL_LOOPSTART);
618 putop(c, OP_PUSHLENDELIM);
619 putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
620 /* Need to emit even if no handler to skip past the string. */
621 putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
622 maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
623 putop(c, OP_POP);
624 putop(c, OP_SETDELIM);
625 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
626 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_LOOPBREAK);
627 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
628 label(c, LABEL_LOOPBREAK);
629 putop(c, OP_POP);
630 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
631 } else {
632 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
633 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
634 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
635 putop(c, OP_PUSHLENDELIM);
636 putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
637 putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
638 maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
639 putop(c, OP_POP);
640 putop(c, OP_SETDELIM);
641 }
642 }
643
644 /* Generates bytecode to parse a single primitive field. */
generate_primitivefield(compiler * c,const upb_fielddef * f,upb_pbdecodermethod * method)645 static void generate_primitivefield(compiler *c, const upb_fielddef *f,
646 upb_pbdecodermethod *method) {
647 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
648 upb_descriptortype_t descriptor_type = upb_fielddef_descriptortype(f);
649 opcode parse_type;
650 upb_selector_t sel;
651 int wire_type;
652
653 label(c, LABEL_FIELD);
654
655 /* From a decoding perspective, ENUM is the same as INT32. */
656 if (descriptor_type == UPB_DESCRIPTOR_TYPE_ENUM)
657 descriptor_type = UPB_DESCRIPTOR_TYPE_INT32;
658
659 parse_type = (opcode)descriptor_type;
660
661 /* TODO(haberman): generate packed or non-packed first depending on "packed"
662 * setting in the fielddef. This will favor (in speed) whichever was
663 * specified. */
664
665 UPB_ASSERT((int)parse_type >= 0 && parse_type <= OP_MAX);
666 sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
667 wire_type = upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
668 if (upb_fielddef_isseq(f)) {
669 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
670 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
671 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
672 putop(c, OP_PUSHLENDELIM);
673 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); /* Packed */
674 label(c, LABEL_LOOPSTART);
675 putop(c, parse_type, sel);
676 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
677 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
678 dispatchtarget(c, method, f, wire_type);
679 putop(c, OP_PUSHTAGDELIM, 0);
680 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); /* Non-packed */
681 label(c, LABEL_LOOPSTART);
682 putop(c, parse_type, sel);
683 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
684 putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
685 putop(c, OP_BRANCH, -LABEL_LOOPSTART);
686 label(c, LABEL_LOOPBREAK);
687 putop(c, OP_POP); /* Packed and non-packed join. */
688 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
689 putop(c, OP_SETDELIM); /* Could remove for non-packed by dup ENDSEQ. */
690 } else {
691 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
692 putchecktag(c, f, wire_type, LABEL_DISPATCH);
693 dispatchtarget(c, method, f, wire_type);
694 putop(c, parse_type, sel);
695 }
696 }
697
698 /* Adds bytecode for parsing the given message to the given decoderplan,
699 * while adding all dispatch targets to this message's dispatch table. */
compile_method(compiler * c,upb_pbdecodermethod * method)700 static void compile_method(compiler *c, upb_pbdecodermethod *method) {
701 const upb_handlers *h;
702 const upb_msgdef *md;
703 uint32_t* start_pc;
704 int i, n;
705 upb_value val;
706
707 UPB_ASSERT(method);
708
709 /* Clear all entries in the dispatch table. */
710 upb_inttable_uninit(&method->dispatch);
711 upb_inttable_init(&method->dispatch, UPB_CTYPE_UINT64);
712
713 h = upb_pbdecodermethod_desthandlers(method);
714 md = upb_handlers_msgdef(h);
715
716 method->code_base.ofs = pcofs(c);
717 putop(c, OP_SETDISPATCH, &method->dispatch);
718 putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h);
719 label(c, LABEL_FIELD);
720 start_pc = c->pc;
721 n = upb_msgdef_fieldcount(md);
722 for(i = 0; i < n; i++) {
723 const upb_fielddef *f = upb_msgdef_field(md, i);
724 upb_fieldtype_t type = upb_fielddef_type(f);
725
726 if (type == UPB_TYPE_MESSAGE && !(haslazyhandlers(h, f) && c->lazy)) {
727 generate_msgfield(c, f, method);
728 } else if (type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES ||
729 type == UPB_TYPE_MESSAGE) {
730 generate_delimfield(c, f, method);
731 } else {
732 generate_primitivefield(c, f, method);
733 }
734 }
735
736 /* If there were no fields, or if no handlers were defined, we need to
737 * generate a non-empty loop body so that we can at least dispatch for unknown
738 * fields and check for the end of the message. */
739 if (c->pc == start_pc) {
740 /* Check for end-of-message. */
741 putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
742 /* Unconditionally dispatch. */
743 putop(c, OP_DISPATCH, 0);
744 }
745
746 /* For now we just loop back to the last field of the message (or if none,
747 * the DISPATCH opcode for the message). */
748 putop(c, OP_BRANCH, -LABEL_FIELD);
749
750 /* Insert both a label and a dispatch table entry for this end-of-msg. */
751 label(c, LABEL_ENDMSG);
752 val = upb_value_uint64(pcofs(c) - method->code_base.ofs);
753 upb_inttable_insert(&method->dispatch, DISPATCH_ENDMSG, val);
754
755 putsel(c, OP_ENDMSG, UPB_ENDMSG_SELECTOR, h);
756 putop(c, OP_RET);
757
758 upb_inttable_compact(&method->dispatch);
759 }
760
761 /* Populate "methods" with new upb_pbdecodermethod objects reachable from "h".
762 * Returns the method for these handlers.
763 *
764 * Generates a new method for every destination handlers reachable from "h". */
find_methods(compiler * c,const upb_handlers * h)765 static void find_methods(compiler *c, const upb_handlers *h) {
766 upb_value v;
767 int i, n;
768 const upb_msgdef *md;
769 upb_pbdecodermethod *method;
770
771 if (upb_inttable_lookupptr(&c->group->methods, h, &v))
772 return;
773
774 method = newmethod(h, c->group);
775 upb_inttable_insertptr(&c->group->methods, h, upb_value_ptr(method));
776
777 /* Find submethods. */
778 md = upb_handlers_msgdef(h);
779 n = upb_msgdef_fieldcount(md);
780 for (i = 0; i < n; i++) {
781 const upb_fielddef *f = upb_msgdef_field(md, i);
782 const upb_handlers *sub_h;
783 if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
784 (sub_h = upb_handlers_getsubhandlers(h, f)) != NULL) {
785 /* We only generate a decoder method for submessages with handlers.
786 * Others will be parsed as unknown fields. */
787 find_methods(c, sub_h);
788 }
789 }
790 }
791
792 /* (Re-)compile bytecode for all messages in "msgs."
793 * Overwrites any existing bytecode in "c". */
compile_methods(compiler * c)794 static void compile_methods(compiler *c) {
795 upb_inttable_iter i;
796
797 /* Start over at the beginning of the bytecode. */
798 c->pc = c->group->bytecode;
799
800 upb_inttable_begin(&i, &c->group->methods);
801 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
802 upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
803 compile_method(c, method);
804 }
805 }
806
set_bytecode_handlers(mgroup * g)807 static void set_bytecode_handlers(mgroup *g) {
808 upb_inttable_iter i;
809 upb_inttable_begin(&i, &g->methods);
810 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
811 upb_pbdecodermethod *m = upb_value_getptr(upb_inttable_iter_value(&i));
812 upb_byteshandler *h = &m->input_handler_;
813
814 m->code_base.ptr = g->bytecode + m->code_base.ofs;
815
816 upb_byteshandler_setstartstr(h, upb_pbdecoder_startbc, m->code_base.ptr);
817 upb_byteshandler_setstring(h, upb_pbdecoder_decode, g);
818 upb_byteshandler_setendstr(h, upb_pbdecoder_end, m);
819 }
820 }
821
822
823 /* TODO(haberman): allow this to be constructed for an arbitrary set of dest
824 * handlers and other mgroups (but verify we have a transitive closure). */
mgroup_new(const upb_handlers * dest,bool lazy)825 const mgroup *mgroup_new(const upb_handlers *dest, bool lazy) {
826 mgroup *g;
827 compiler *c;
828
829 g = newgroup();
830 c = newcompiler(g, lazy);
831 find_methods(c, dest);
832
833 /* We compile in two passes:
834 * 1. all messages are assigned relative offsets from the beginning of the
835 * bytecode (saved in method->code_base).
836 * 2. forwards OP_CALL instructions can be correctly linked since message
837 * offsets have been previously assigned.
838 *
839 * Could avoid the second pass by linking OP_CALL instructions somehow. */
840 compile_methods(c);
841 compile_methods(c);
842 g->bytecode_end = c->pc;
843 freecompiler(c);
844
845 #ifdef UPB_DUMP_BYTECODE
846 {
847 FILE *f = fopen("/tmp/upb-bytecode", "w");
848 UPB_ASSERT(f);
849 dumpbc(g->bytecode, g->bytecode_end, stderr);
850 dumpbc(g->bytecode, g->bytecode_end, f);
851 fclose(f);
852
853 f = fopen("/tmp/upb-bytecode.bin", "wb");
854 UPB_ASSERT(f);
855 fwrite(g->bytecode, 1, g->bytecode_end - g->bytecode, f);
856 fclose(f);
857 }
858 #endif
859
860 set_bytecode_handlers(g);
861 return g;
862 }
863
864
865 /* upb_pbcodecache ************************************************************/
866
upb_pbcodecache_new(upb_handlercache * dest)867 upb_pbcodecache *upb_pbcodecache_new(upb_handlercache *dest) {
868 upb_pbcodecache *c = upb_gmalloc(sizeof(*c));
869
870 if (!c) return NULL;
871
872 c->dest = dest;
873 c->lazy = false;
874
875 c->arena = upb_arena_new();
876 if (!upb_inttable_init(&c->groups, UPB_CTYPE_CONSTPTR)) return NULL;
877
878 return c;
879 }
880
upb_pbcodecache_free(upb_pbcodecache * c)881 void upb_pbcodecache_free(upb_pbcodecache *c) {
882 upb_inttable_iter i;
883
884 upb_inttable_begin(&i, &c->groups);
885 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
886 upb_value val = upb_inttable_iter_value(&i);
887 freegroup((void*)upb_value_getconstptr(val));
888 }
889
890 upb_inttable_uninit(&c->groups);
891 upb_arena_free(c->arena);
892 upb_gfree(c);
893 }
894
upb_pbdecodermethodopts_setlazy(upb_pbcodecache * c,bool lazy)895 void upb_pbdecodermethodopts_setlazy(upb_pbcodecache *c, bool lazy) {
896 UPB_ASSERT(upb_inttable_count(&c->groups) == 0);
897 c->lazy = lazy;
898 }
899
upb_pbcodecache_get(upb_pbcodecache * c,const upb_msgdef * md)900 const upb_pbdecodermethod *upb_pbcodecache_get(upb_pbcodecache *c,
901 const upb_msgdef *md) {
902 upb_value v;
903 bool ok;
904 const upb_handlers *h;
905 const mgroup *g;
906
907 h = upb_handlercache_get(c->dest, md);
908 if (upb_inttable_lookupptr(&c->groups, md, &v)) {
909 g = upb_value_getconstptr(v);
910 } else {
911 g = mgroup_new(h, c->lazy);
912 ok = upb_inttable_insertptr(&c->groups, md, upb_value_constptr(g));
913 UPB_ASSUME(ok);
914 }
915
916 ok = upb_inttable_lookupptr(&g->methods, h, &v);
917 UPB_ASSUME(ok);
918 return upb_value_getptr(v);
919 }
920