1 /* We encode backwards, to avoid pre-computing lengths (one-pass encode). */
2 
3 #include "upb/encode.h"
4 
5 #include <setjmp.h>
6 #include <string.h>
7 
8 #include "upb/msg.h"
9 #include "upb/upb.h"
10 
11 /* Must be last. */
12 #include "upb/port_def.inc"
13 
14 #define UPB_PB_VARINT_MAX_LEN 10
15 
16 UPB_NOINLINE
encode_varint64(uint64_t val,char * buf)17 static size_t encode_varint64(uint64_t val, char *buf) {
18   size_t i = 0;
19   do {
20     uint8_t byte = val & 0x7fU;
21     val >>= 7;
22     if (val) byte |= 0x80U;
23     buf[i++] = byte;
24   } while (val);
25   return i;
26 }
27 
encode_zz32(int32_t n)28 static uint32_t encode_zz32(int32_t n) { return ((uint32_t)n << 1) ^ (n >> 31); }
encode_zz64(int64_t n)29 static uint64_t encode_zz64(int64_t n) { return ((uint64_t)n << 1) ^ (n >> 63); }
30 
31 typedef struct {
32   jmp_buf err;
33   upb_alloc *alloc;
34   char *buf, *ptr, *limit;
35   int options;
36   int depth;
37   _upb_mapsorter sorter;
38 } upb_encstate;
39 
upb_roundup_pow2(size_t bytes)40 static size_t upb_roundup_pow2(size_t bytes) {
41   size_t ret = 128;
42   while (ret < bytes) {
43     ret *= 2;
44   }
45   return ret;
46 }
47 
encode_err(upb_encstate * e)48 UPB_NORETURN static void encode_err(upb_encstate *e) {
49   UPB_LONGJMP(e->err, 1);
50 }
51 
52 UPB_NOINLINE
encode_growbuffer(upb_encstate * e,size_t bytes)53 static void encode_growbuffer(upb_encstate *e, size_t bytes) {
54   size_t old_size = e->limit - e->buf;
55   size_t new_size = upb_roundup_pow2(bytes + (e->limit - e->ptr));
56   char *new_buf = upb_realloc(e->alloc, e->buf, old_size, new_size);
57 
58   if (!new_buf) encode_err(e);
59 
60   /* We want previous data at the end, realloc() put it at the beginning. */
61   if (old_size > 0) {
62     memmove(new_buf + new_size - old_size, e->buf, old_size);
63   }
64 
65   e->ptr = new_buf + new_size - (e->limit - e->ptr);
66   e->limit = new_buf + new_size;
67   e->buf = new_buf;
68 
69   e->ptr -= bytes;
70 }
71 
72 /* Call to ensure that at least "bytes" bytes are available for writing at
73  * e->ptr.  Returns false if the bytes could not be allocated. */
74 UPB_FORCEINLINE
encode_reserve(upb_encstate * e,size_t bytes)75 static void encode_reserve(upb_encstate *e, size_t bytes) {
76   if ((size_t)(e->ptr - e->buf) < bytes) {
77     encode_growbuffer(e, bytes);
78     return;
79   }
80 
81   e->ptr -= bytes;
82 }
83 
84 /* Writes the given bytes to the buffer, handling reserve/advance. */
encode_bytes(upb_encstate * e,const void * data,size_t len)85 static void encode_bytes(upb_encstate *e, const void *data, size_t len) {
86   if (len == 0) return;  /* memcpy() with zero size is UB */
87   encode_reserve(e, len);
88   memcpy(e->ptr, data, len);
89 }
90 
encode_fixed64(upb_encstate * e,uint64_t val)91 static void encode_fixed64(upb_encstate *e, uint64_t val) {
92   val = _upb_be_swap64(val);
93   encode_bytes(e, &val, sizeof(uint64_t));
94 }
95 
encode_fixed32(upb_encstate * e,uint32_t val)96 static void encode_fixed32(upb_encstate *e, uint32_t val) {
97   val = _upb_be_swap32(val);
98   encode_bytes(e, &val, sizeof(uint32_t));
99 }
100 
101 UPB_NOINLINE
encode_longvarint(upb_encstate * e,uint64_t val)102 static void encode_longvarint(upb_encstate *e, uint64_t val) {
103   size_t len;
104   char *start;
105 
106   encode_reserve(e, UPB_PB_VARINT_MAX_LEN);
107   len = encode_varint64(val, e->ptr);
108   start = e->ptr + UPB_PB_VARINT_MAX_LEN - len;
109   memmove(start, e->ptr, len);
110   e->ptr = start;
111 }
112 
113 UPB_FORCEINLINE
encode_varint(upb_encstate * e,uint64_t val)114 static void encode_varint(upb_encstate *e, uint64_t val) {
115   if (val < 128 && e->ptr != e->buf) {
116     --e->ptr;
117     *e->ptr = val;
118   } else {
119     encode_longvarint(e, val);
120   }
121 }
122 
encode_double(upb_encstate * e,double d)123 static void encode_double(upb_encstate *e, double d) {
124   uint64_t u64;
125   UPB_ASSERT(sizeof(double) == sizeof(uint64_t));
126   memcpy(&u64, &d, sizeof(uint64_t));
127   encode_fixed64(e, u64);
128 }
129 
encode_float(upb_encstate * e,float d)130 static void encode_float(upb_encstate *e, float d) {
131   uint32_t u32;
132   UPB_ASSERT(sizeof(float) == sizeof(uint32_t));
133   memcpy(&u32, &d, sizeof(uint32_t));
134   encode_fixed32(e, u32);
135 }
136 
encode_tag(upb_encstate * e,uint32_t field_number,uint8_t wire_type)137 static void encode_tag(upb_encstate *e, uint32_t field_number,
138                        uint8_t wire_type) {
139   encode_varint(e, (field_number << 3) | wire_type);
140 }
141 
encode_fixedarray(upb_encstate * e,const upb_array * arr,size_t elem_size,uint32_t tag)142 static void encode_fixedarray(upb_encstate *e, const upb_array *arr,
143                                size_t elem_size, uint32_t tag) {
144   size_t bytes = arr->len * elem_size;
145   const char* data = _upb_array_constptr(arr);
146   const char* ptr = data + bytes - elem_size;
147   if (tag) {
148     while (true) {
149       encode_bytes(e, ptr, elem_size);
150       encode_varint(e, tag);
151       if (ptr == data) break;
152       ptr -= elem_size;
153     }
154   } else {
155     encode_bytes(e, data, bytes);
156   }
157 }
158 
159 static void encode_message(upb_encstate *e, const char *msg,
160                            const upb_msglayout *m, size_t *size);
161 
encode_scalar(upb_encstate * e,const void * _field_mem,const upb_msglayout * m,const upb_msglayout_field * f,bool skip_zero_value)162 static void encode_scalar(upb_encstate *e, const void *_field_mem,
163                           const upb_msglayout *m, const upb_msglayout_field *f,
164                           bool skip_zero_value) {
165   const char *field_mem = _field_mem;
166   int wire_type;
167 
168 #define CASE(ctype, type, wtype, encodeval) \
169   {                                         \
170     ctype val = *(ctype *)field_mem;        \
171     if (skip_zero_value && val == 0) {      \
172       return;                               \
173     }                                       \
174     encode_##type(e, encodeval);            \
175     wire_type = wtype;                      \
176     break;                                  \
177   }
178 
179   switch (f->descriptortype) {
180     case UPB_DESCRIPTOR_TYPE_DOUBLE:
181       CASE(double, double, UPB_WIRE_TYPE_64BIT, val);
182     case UPB_DESCRIPTOR_TYPE_FLOAT:
183       CASE(float, float, UPB_WIRE_TYPE_32BIT, val);
184     case UPB_DESCRIPTOR_TYPE_INT64:
185     case UPB_DESCRIPTOR_TYPE_UINT64:
186       CASE(uint64_t, varint, UPB_WIRE_TYPE_VARINT, val);
187     case UPB_DESCRIPTOR_TYPE_UINT32:
188       CASE(uint32_t, varint, UPB_WIRE_TYPE_VARINT, val);
189     case UPB_DESCRIPTOR_TYPE_INT32:
190     case UPB_DESCRIPTOR_TYPE_ENUM:
191       CASE(int32_t, varint, UPB_WIRE_TYPE_VARINT, (int64_t)val);
192     case UPB_DESCRIPTOR_TYPE_SFIXED64:
193     case UPB_DESCRIPTOR_TYPE_FIXED64:
194       CASE(uint64_t, fixed64, UPB_WIRE_TYPE_64BIT, val);
195     case UPB_DESCRIPTOR_TYPE_FIXED32:
196     case UPB_DESCRIPTOR_TYPE_SFIXED32:
197       CASE(uint32_t, fixed32, UPB_WIRE_TYPE_32BIT, val);
198     case UPB_DESCRIPTOR_TYPE_BOOL:
199       CASE(bool, varint, UPB_WIRE_TYPE_VARINT, val);
200     case UPB_DESCRIPTOR_TYPE_SINT32:
201       CASE(int32_t, varint, UPB_WIRE_TYPE_VARINT, encode_zz32(val));
202     case UPB_DESCRIPTOR_TYPE_SINT64:
203       CASE(int64_t, varint, UPB_WIRE_TYPE_VARINT, encode_zz64(val));
204     case UPB_DESCRIPTOR_TYPE_STRING:
205     case UPB_DESCRIPTOR_TYPE_BYTES: {
206       upb_strview view = *(upb_strview*)field_mem;
207       if (skip_zero_value && view.size == 0) {
208         return;
209       }
210       encode_bytes(e, view.data, view.size);
211       encode_varint(e, view.size);
212       wire_type = UPB_WIRE_TYPE_DELIMITED;
213       break;
214     }
215     case UPB_DESCRIPTOR_TYPE_GROUP: {
216       size_t size;
217       void *submsg = *(void **)field_mem;
218       const upb_msglayout *subm = m->submsgs[f->submsg_index];
219       if (submsg == NULL) {
220         return;
221       }
222       if (--e->depth == 0) encode_err(e);
223       encode_tag(e, f->number, UPB_WIRE_TYPE_END_GROUP);
224       encode_message(e, submsg, subm, &size);
225       wire_type = UPB_WIRE_TYPE_START_GROUP;
226       e->depth++;
227       break;
228     }
229     case UPB_DESCRIPTOR_TYPE_MESSAGE: {
230       size_t size;
231       void *submsg = *(void **)field_mem;
232       const upb_msglayout *subm = m->submsgs[f->submsg_index];
233       if (submsg == NULL) {
234         return;
235       }
236       if (--e->depth == 0) encode_err(e);
237       encode_message(e, submsg, subm, &size);
238       encode_varint(e, size);
239       wire_type = UPB_WIRE_TYPE_DELIMITED;
240       e->depth++;
241       break;
242     }
243     default:
244       UPB_UNREACHABLE();
245   }
246 #undef CASE
247 
248   encode_tag(e, f->number, wire_type);
249 }
250 
encode_array(upb_encstate * e,const char * field_mem,const upb_msglayout * m,const upb_msglayout_field * f)251 static void encode_array(upb_encstate *e, const char *field_mem,
252                          const upb_msglayout *m, const upb_msglayout_field *f) {
253   const upb_array *arr = *(const upb_array**)field_mem;
254   bool packed = f->label == _UPB_LABEL_PACKED;
255   size_t pre_len = e->limit - e->ptr;
256 
257   if (arr == NULL || arr->len == 0) {
258     return;
259   }
260 
261 #define VARINT_CASE(ctype, encode)                                       \
262   {                                                                      \
263     const ctype *start = _upb_array_constptr(arr);                       \
264     const ctype *ptr = start + arr->len;                                 \
265     uint32_t tag = packed ? 0 : (f->number << 3) | UPB_WIRE_TYPE_VARINT; \
266     do {                                                                 \
267       ptr--;                                                             \
268       encode_varint(e, encode);                                          \
269       if (tag) encode_varint(e, tag);                                    \
270     } while (ptr != start);                                              \
271   }                                                                      \
272   break;
273 
274 #define TAG(wire_type) (packed ? 0 : (f->number << 3 | wire_type))
275 
276   switch (f->descriptortype) {
277     case UPB_DESCRIPTOR_TYPE_DOUBLE:
278       encode_fixedarray(e, arr, sizeof(double), TAG(UPB_WIRE_TYPE_64BIT));
279       break;
280     case UPB_DESCRIPTOR_TYPE_FLOAT:
281       encode_fixedarray(e, arr, sizeof(float), TAG(UPB_WIRE_TYPE_32BIT));
282       break;
283     case UPB_DESCRIPTOR_TYPE_SFIXED64:
284     case UPB_DESCRIPTOR_TYPE_FIXED64:
285       encode_fixedarray(e, arr, sizeof(uint64_t), TAG(UPB_WIRE_TYPE_64BIT));
286       break;
287     case UPB_DESCRIPTOR_TYPE_FIXED32:
288     case UPB_DESCRIPTOR_TYPE_SFIXED32:
289       encode_fixedarray(e, arr, sizeof(uint32_t), TAG(UPB_WIRE_TYPE_32BIT));
290       break;
291     case UPB_DESCRIPTOR_TYPE_INT64:
292     case UPB_DESCRIPTOR_TYPE_UINT64:
293       VARINT_CASE(uint64_t, *ptr);
294     case UPB_DESCRIPTOR_TYPE_UINT32:
295       VARINT_CASE(uint32_t, *ptr);
296     case UPB_DESCRIPTOR_TYPE_INT32:
297     case UPB_DESCRIPTOR_TYPE_ENUM:
298       VARINT_CASE(int32_t, (int64_t)*ptr);
299     case UPB_DESCRIPTOR_TYPE_BOOL:
300       VARINT_CASE(bool, *ptr);
301     case UPB_DESCRIPTOR_TYPE_SINT32:
302       VARINT_CASE(int32_t, encode_zz32(*ptr));
303     case UPB_DESCRIPTOR_TYPE_SINT64:
304       VARINT_CASE(int64_t, encode_zz64(*ptr));
305     case UPB_DESCRIPTOR_TYPE_STRING:
306     case UPB_DESCRIPTOR_TYPE_BYTES: {
307       const upb_strview *start = _upb_array_constptr(arr);
308       const upb_strview *ptr = start + arr->len;
309       do {
310         ptr--;
311         encode_bytes(e, ptr->data, ptr->size);
312         encode_varint(e, ptr->size);
313         encode_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED);
314       } while (ptr != start);
315       return;
316     }
317     case UPB_DESCRIPTOR_TYPE_GROUP: {
318       const void *const*start = _upb_array_constptr(arr);
319       const void *const*ptr = start + arr->len;
320       const upb_msglayout *subm = m->submsgs[f->submsg_index];
321       if (--e->depth == 0) encode_err(e);
322       do {
323         size_t size;
324         ptr--;
325         encode_tag(e, f->number, UPB_WIRE_TYPE_END_GROUP);
326         encode_message(e, *ptr, subm, &size);
327         encode_tag(e, f->number, UPB_WIRE_TYPE_START_GROUP);
328       } while (ptr != start);
329       e->depth++;
330       return;
331     }
332     case UPB_DESCRIPTOR_TYPE_MESSAGE: {
333       const void *const*start = _upb_array_constptr(arr);
334       const void *const*ptr = start + arr->len;
335       const upb_msglayout *subm = m->submsgs[f->submsg_index];
336       if (--e->depth == 0) encode_err(e);
337       do {
338         size_t size;
339         ptr--;
340         encode_message(e, *ptr, subm, &size);
341         encode_varint(e, size);
342         encode_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED);
343       } while (ptr != start);
344       e->depth++;
345       return;
346     }
347   }
348 #undef VARINT_CASE
349 
350   if (packed) {
351     encode_varint(e, e->limit - e->ptr - pre_len);
352     encode_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED);
353   }
354 }
355 
encode_mapentry(upb_encstate * e,uint32_t number,const upb_msglayout * layout,const upb_map_entry * ent)356 static void encode_mapentry(upb_encstate *e, uint32_t number,
357                             const upb_msglayout *layout,
358                             const upb_map_entry *ent) {
359   const upb_msglayout_field *key_field = &layout->fields[0];
360   const upb_msglayout_field *val_field = &layout->fields[1];
361   size_t pre_len = e->limit - e->ptr;
362   size_t size;
363   encode_scalar(e, &ent->v, layout, val_field, false);
364   encode_scalar(e, &ent->k, layout, key_field, false);
365   size = (e->limit - e->ptr) - pre_len;
366   encode_varint(e, size);
367   encode_tag(e, number, UPB_WIRE_TYPE_DELIMITED);
368 }
369 
encode_map(upb_encstate * e,const char * field_mem,const upb_msglayout * m,const upb_msglayout_field * f)370 static void encode_map(upb_encstate *e, const char *field_mem,
371                        const upb_msglayout *m, const upb_msglayout_field *f) {
372   const upb_map *map = *(const upb_map**)field_mem;
373   const upb_msglayout *layout = m->submsgs[f->submsg_index];
374   UPB_ASSERT(layout->field_count == 2);
375 
376   if (map == NULL) return;
377 
378   if (e->options & UPB_ENCODE_DETERMINISTIC) {
379     _upb_sortedmap sorted;
380     _upb_mapsorter_pushmap(&e->sorter, layout->fields[0].descriptortype, map,
381                            &sorted);
382     upb_map_entry ent;
383     while (_upb_sortedmap_next(&e->sorter, map, &sorted, &ent)) {
384       encode_mapentry(e, f->number, layout, &ent);
385     }
386     _upb_mapsorter_popmap(&e->sorter, &sorted);
387   } else {
388     upb_strtable_iter i;
389     upb_strtable_begin(&i, &map->table);
390     for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
391       upb_strview key = upb_strtable_iter_key(&i);
392       const upb_value val = upb_strtable_iter_value(&i);
393       upb_map_entry ent;
394       _upb_map_fromkey(key, &ent.k, map->key_size);
395       _upb_map_fromvalue(val, &ent.v, map->val_size);
396       encode_mapentry(e, f->number, layout, &ent);
397     }
398   }
399 }
400 
encode_scalarfield(upb_encstate * e,const char * msg,const upb_msglayout * m,const upb_msglayout_field * f)401 static void encode_scalarfield(upb_encstate *e, const char *msg,
402                                const upb_msglayout *m,
403                                const upb_msglayout_field *f) {
404   bool skip_empty = false;
405   if (f->presence == 0) {
406     /* Proto3 presence. */
407     skip_empty = true;
408   } else if (f->presence > 0) {
409     /* Proto2 presence: hasbit. */
410     if (!_upb_hasbit_field(msg, f)) return;
411   } else {
412     /* Field is in a oneof. */
413     if (_upb_getoneofcase_field(msg, f) != f->number) return;
414   }
415   encode_scalar(e, msg + f->offset, m, f, skip_empty);
416 }
417 
encode_message(upb_encstate * e,const char * msg,const upb_msglayout * m,size_t * size)418 static void encode_message(upb_encstate *e, const char *msg,
419                            const upb_msglayout *m, size_t *size) {
420   size_t pre_len = e->limit - e->ptr;
421   const upb_msglayout_field *f = &m->fields[m->field_count];
422   const upb_msglayout_field *first = &m->fields[0];
423 
424   if ((e->options & UPB_ENCODE_SKIPUNKNOWN) == 0) {
425     size_t unknown_size;
426     const char *unknown = upb_msg_getunknown(msg, &unknown_size);
427 
428     if (unknown) {
429       encode_bytes(e, unknown, unknown_size);
430     }
431   }
432 
433   while (f != first) {
434     f--;
435     if (_upb_isrepeated(f)) {
436       encode_array(e, msg + f->offset, m, f);
437     } else if (f->label == _UPB_LABEL_MAP) {
438       encode_map(e, msg + f->offset, m, f);
439     } else {
440       encode_scalarfield(e, msg, m, f);
441     }
442   }
443 
444   *size = (e->limit - e->ptr) - pre_len;
445 }
446 
upb_encode_ex(const void * msg,const upb_msglayout * m,int options,upb_arena * arena,size_t * size)447 char *upb_encode_ex(const void *msg, const upb_msglayout *m, int options,
448                     upb_arena *arena, size_t *size) {
449   upb_encstate e;
450   unsigned depth = (unsigned)options >> 16;
451 
452   e.alloc = upb_arena_alloc(arena);
453   e.buf = NULL;
454   e.limit = NULL;
455   e.ptr = NULL;
456   e.depth = depth ? depth : 64;
457   e.options = options;
458   _upb_mapsorter_init(&e.sorter);
459   char *ret = NULL;
460 
461   if (UPB_SETJMP(e.err)) {
462     *size = 0;
463     ret = NULL;
464   } else {
465     encode_message(&e, msg, m, size);
466     *size = e.limit - e.ptr;
467     if (*size == 0) {
468       static char ch;
469       ret = &ch;
470     } else {
471       UPB_ASSERT(e.ptr);
472       ret = e.ptr;
473     }
474   }
475 
476   _upb_mapsorter_destroy(&e.sorter);
477   return ret;
478 }
479