1 
2 #include "upb/text_encode.h"
3 
4 #include <ctype.h>
5 #include <float.h>
6 #include <inttypes.h>
7 #include <stdarg.h>
8 #include <stdio.h>
9 #include <string.h>
10 
11 #include "upb/reflection.h"
12 #include "upb/port_def.inc"
13 
14 typedef struct {
15   char *buf, *ptr, *end;
16   size_t overflow;
17   int indent_depth;
18   int options;
19   const upb_symtab *ext_pool;
20   _upb_mapsorter sorter;
21 } txtenc;
22 
23 static void txtenc_msg(txtenc *e, const upb_msg *msg, const upb_msgdef *m);
24 
txtenc_putbytes(txtenc * e,const void * data,size_t len)25 static void txtenc_putbytes(txtenc *e, const void *data, size_t len) {
26   size_t have = e->end - e->ptr;
27   if (UPB_LIKELY(have >= len)) {
28     memcpy(e->ptr, data, len);
29     e->ptr += len;
30   } else {
31     if (have) memcpy(e->ptr, data, have);
32     e->ptr += have;
33     e->overflow += (len - have);
34   }
35 }
36 
txtenc_putstr(txtenc * e,const char * str)37 static void txtenc_putstr(txtenc *e, const char *str) {
38   txtenc_putbytes(e, str, strlen(str));
39 }
40 
txtenc_printf(txtenc * e,const char * fmt,...)41 static void txtenc_printf(txtenc *e, const char *fmt, ...) {
42   size_t n;
43   size_t have = e->end - e->ptr;
44   va_list args;
45 
46   va_start(args, fmt);
47   n = vsnprintf(e->ptr, have, fmt, args);
48   va_end(args);
49 
50   if (UPB_LIKELY(have > n)) {
51     e->ptr += n;
52   } else {
53     e->ptr += have;
54     e->overflow += (n - have);
55   }
56 }
57 
txtenc_indent(txtenc * e)58 static void txtenc_indent(txtenc *e) {
59   if ((e->options & UPB_TXTENC_SINGLELINE) == 0) {
60     int i = e->indent_depth;
61     while (i-- > 0) {
62       txtenc_putstr(e, "  ");
63     }
64   }
65 }
66 
txtenc_endfield(txtenc * e)67 static void txtenc_endfield(txtenc *e) {
68   if (e->options & UPB_TXTENC_SINGLELINE) {
69     txtenc_putstr(e, " ");
70   } else {
71     txtenc_putstr(e, "\n");
72   }
73 }
74 
txtenc_enum(int32_t val,const upb_fielddef * f,txtenc * e)75 static void txtenc_enum(int32_t val, const upb_fielddef *f, txtenc *e) {
76   const upb_enumdef *e_def = upb_fielddef_enumsubdef(f);
77   const char *name = upb_enumdef_iton(e_def, val);
78 
79   if (name) {
80     txtenc_printf(e, "%s", name);
81   } else {
82     txtenc_printf(e, "%" PRId32, val);
83   }
84 }
85 
txtenc_string(txtenc * e,upb_strview str,bool bytes)86 static void txtenc_string(txtenc *e, upb_strview str, bool bytes) {
87   const char *ptr = str.data;
88   const char *end = ptr + str.size;
89   txtenc_putstr(e, "\"");
90 
91   while (ptr < end) {
92     switch (*ptr) {
93       case '\n':
94         txtenc_putstr(e, "\\n");
95         break;
96       case '\r':
97         txtenc_putstr(e, "\\r");
98         break;
99       case '\t':
100         txtenc_putstr(e, "\\t");
101         break;
102       case '\"':
103         txtenc_putstr(e, "\\\"");
104         break;
105       case '\'':
106         txtenc_putstr(e, "\\'");
107         break;
108       case '\\':
109         txtenc_putstr(e, "\\\\");
110         break;
111       default:
112         if ((bytes || (uint8_t)*ptr < 0x80) && !isprint(*ptr)) {
113           txtenc_printf(e, "\\%03o", (int)(uint8_t)*ptr);
114         } else {
115           txtenc_putbytes(e, ptr, 1);
116         }
117         break;
118     }
119     ptr++;
120   }
121 
122   txtenc_putstr(e, "\"");
123 }
124 
txtenc_field(txtenc * e,upb_msgval val,const upb_fielddef * f)125 static void txtenc_field(txtenc *e, upb_msgval val, const upb_fielddef *f) {
126   txtenc_indent(e);
127   txtenc_printf(e, "%s: ", upb_fielddef_name(f));
128 
129   switch (upb_fielddef_type(f)) {
130     case UPB_TYPE_BOOL:
131       txtenc_putstr(e, val.bool_val ? "true" : "false");
132       break;
133     case UPB_TYPE_FLOAT:
134       txtenc_printf(e, "%f", val.float_val);
135       break;
136     case UPB_TYPE_DOUBLE:
137       txtenc_printf(e, "%f", val.double_val);
138       break;
139     case UPB_TYPE_INT32:
140       txtenc_printf(e, "%" PRId32, val.int32_val);
141       break;
142     case UPB_TYPE_UINT32:
143       txtenc_printf(e, "%" PRIu32, val.uint32_val);
144       break;
145     case UPB_TYPE_INT64:
146       txtenc_printf(e, "%" PRId64, val.int64_val);
147       break;
148     case UPB_TYPE_UINT64:
149       txtenc_printf(e, "%" PRIu64, val.uint64_val);
150       break;
151     case UPB_TYPE_STRING:
152       txtenc_string(e, val.str_val, false);
153       break;
154     case UPB_TYPE_BYTES:
155       txtenc_string(e, val.str_val, true);
156       break;
157     case UPB_TYPE_ENUM:
158       txtenc_enum(val.int32_val, f, e);
159       break;
160     case UPB_TYPE_MESSAGE:
161       txtenc_putstr(e, "{");
162       txtenc_endfield(e);
163       e->indent_depth++;
164       txtenc_msg(e, val.msg_val, upb_fielddef_msgsubdef(f));
165       e->indent_depth--;
166       txtenc_indent(e);
167       txtenc_putstr(e, "}");
168       break;
169   }
170 
171   txtenc_endfield(e);
172 }
173 
174 /*
175  * Arrays print as simple repeated elements, eg.
176  *
177  *    foo_field: 1
178  *    foo_field: 2
179  *    foo_field: 3
180  */
txtenc_array(txtenc * e,const upb_array * arr,const upb_fielddef * f)181 static void txtenc_array(txtenc *e, const upb_array *arr,
182                          const upb_fielddef *f) {
183   size_t i;
184   size_t size = upb_array_size(arr);
185 
186   for (i = 0; i < size; i++) {
187     txtenc_field(e, upb_array_get(arr, i), f);
188   }
189 }
190 
txtenc_mapentry(txtenc * e,upb_msgval key,upb_msgval val,const upb_fielddef * f)191 static void txtenc_mapentry(txtenc *e, upb_msgval key, upb_msgval val,
192                             const upb_fielddef *f) {
193   const upb_msgdef *entry = upb_fielddef_msgsubdef(f);
194   const upb_fielddef *key_f = upb_msgdef_field(entry, 0);
195   const upb_fielddef *val_f = upb_msgdef_field(entry, 1);
196   txtenc_indent(e);
197   txtenc_printf(e, "%s: {", upb_fielddef_name(f));
198   txtenc_endfield(e);
199   e->indent_depth++;
200 
201   txtenc_field(e, key, key_f);
202   txtenc_field(e, val, val_f);
203 
204   e->indent_depth--;
205   txtenc_indent(e);
206   txtenc_putstr(e, "}");
207   txtenc_endfield(e);
208 }
209 
210 /*
211  * Maps print as messages of key/value, etc.
212  *
213  *    foo_map: {
214  *      key: "abc"
215  *      value: 123
216  *    }
217  *    foo_map: {
218  *      key: "def"
219  *      value: 456
220  *    }
221  */
txtenc_map(txtenc * e,const upb_map * map,const upb_fielddef * f)222 static void txtenc_map(txtenc *e, const upb_map *map, const upb_fielddef *f) {
223   if (e->options & UPB_TXTENC_NOSORT) {
224     size_t iter = UPB_MAP_BEGIN;
225     while (upb_mapiter_next(map, &iter)) {
226       upb_msgval key = upb_mapiter_key(map, iter);
227       upb_msgval val = upb_mapiter_value(map, iter);
228       txtenc_mapentry(e, key, val, f);
229     }
230   } else {
231     const upb_msgdef *entry = upb_fielddef_msgsubdef(f);
232     const upb_fielddef *key_f = upb_msgdef_field(entry, 0);
233     _upb_sortedmap sorted;
234     upb_map_entry ent;
235 
236     _upb_mapsorter_pushmap(&e->sorter, upb_fielddef_descriptortype(key_f), map,
237                            &sorted);
238     while (_upb_sortedmap_next(&e->sorter, map, &sorted, &ent)) {
239       upb_msgval key, val;
240       memcpy(&key, &ent.k, sizeof(key));
241       memcpy(&val, &ent.v, sizeof(val));
242       txtenc_mapentry(e, key, val, f);
243     }
244     _upb_mapsorter_popmap(&e->sorter, &sorted);
245   }
246 }
247 
248 #define CHK(x) do { if (!(x)) { return false; } } while(0)
249 
txtenc_parsevarint(const char * ptr,const char * limit,uint64_t * val)250 static const char *txtenc_parsevarint(const char *ptr, const char *limit,
251                                       uint64_t *val) {
252   uint8_t byte;
253   int bitpos = 0;
254   *val = 0;
255 
256   do {
257     CHK(bitpos < 70 && ptr < limit);
258     byte = *ptr;
259     *val |= (uint64_t)(byte & 0x7F) << bitpos;
260     ptr++;
261     bitpos += 7;
262   } while (byte & 0x80);
263 
264   return ptr;
265 }
266 
267 /*
268  * Unknown fields are printed by number.
269  *
270  * 1001: 123
271  * 1002: "hello"
272  * 1006: 0xdeadbeef
273  * 1003: {
274  *   1: 111
275  * }
276  */
txtenc_unknown(txtenc * e,const char * ptr,const char * end,int groupnum)277 static const char *txtenc_unknown(txtenc *e, const char *ptr, const char *end,
278                                   int groupnum) {
279   while (ptr < end) {
280     uint64_t tag_64;
281     uint32_t tag;
282     CHK(ptr = txtenc_parsevarint(ptr, end, &tag_64));
283     CHK(tag_64 < UINT32_MAX);
284     tag = (uint32_t)tag_64;
285 
286     if ((tag & 7) == UPB_WIRE_TYPE_END_GROUP) {
287       CHK((tag >> 3) == (uint32_t)groupnum);
288       return ptr;
289     }
290 
291     txtenc_indent(e);
292     txtenc_printf(e, "%d: ", (int)(tag >> 3));
293 
294     switch (tag & 7) {
295       case UPB_WIRE_TYPE_VARINT: {
296         uint64_t val;
297         CHK(ptr = txtenc_parsevarint(ptr, end, &val));
298         txtenc_printf(e, "%" PRIu64, val);
299         break;
300       }
301       case UPB_WIRE_TYPE_32BIT: {
302         uint32_t val;
303         CHK(end - ptr >= 4);
304         memcpy(&val, ptr, 4);
305         ptr += 4;
306         txtenc_printf(e, "0x%08" PRIu32, val);
307         break;
308       }
309       case UPB_WIRE_TYPE_64BIT: {
310         uint64_t val;
311         CHK(end - ptr >= 8);
312         memcpy(&val, ptr, 8);
313         ptr += 8;
314         txtenc_printf(e, "0x%016" PRIu64, val);
315         break;
316       }
317       case UPB_WIRE_TYPE_DELIMITED: {
318         uint64_t len;
319         size_t avail = end - ptr;
320         char *start = e->ptr;
321         size_t start_overflow = e->overflow;
322         CHK(ptr = txtenc_parsevarint(ptr, end, &len));
323         CHK(avail >= len);
324 
325         /* Speculatively try to parse as message. */
326         txtenc_putstr(e, "{");
327         txtenc_endfield(e);
328         e->indent_depth++;
329         if (txtenc_unknown(e, ptr, end, -1)) {
330           e->indent_depth--;
331           txtenc_indent(e);
332           txtenc_putstr(e, "}");
333         } else {
334           /* Didn't work out, print as raw bytes. */
335           upb_strview str;
336           e->indent_depth--;
337           e->ptr = start;
338           e->overflow = start_overflow;
339           str.data = ptr;
340           str.size = len;
341           txtenc_string(e, str, true);
342         }
343         ptr += len;
344         break;
345       }
346       case UPB_WIRE_TYPE_START_GROUP:
347         txtenc_putstr(e, "{");
348         txtenc_endfield(e);
349         e->indent_depth++;
350         CHK(ptr = txtenc_unknown(e, ptr, end, tag >> 3));
351         e->indent_depth--;
352         txtenc_indent(e);
353         txtenc_putstr(e, "}");
354         break;
355     }
356     txtenc_endfield(e);
357   }
358 
359   return groupnum == -1 ? ptr : NULL;
360 }
361 
362 #undef CHK
363 
txtenc_msg(txtenc * e,const upb_msg * msg,const upb_msgdef * m)364 static void txtenc_msg(txtenc *e, const upb_msg *msg,
365                        const upb_msgdef *m) {
366   size_t iter = UPB_MSG_BEGIN;
367   const upb_fielddef *f;
368   upb_msgval val;
369 
370   while (upb_msg_next(msg, m, e->ext_pool, &f, &val, &iter)) {
371     if (upb_fielddef_ismap(f)) {
372       txtenc_map(e, val.map_val, f);
373     } else if (upb_fielddef_isseq(f)) {
374       txtenc_array(e, val.array_val, f);
375     } else {
376       txtenc_field(e, val, f);
377     }
378   }
379 
380   if ((e->options & UPB_TXTENC_SKIPUNKNOWN) == 0) {
381     size_t len;
382     const char *ptr = upb_msg_getunknown(msg, &len);
383     char *start = e->ptr;
384     if (ptr) {
385       if (!txtenc_unknown(e, ptr, ptr + len, -1)) {
386         /* Unknown failed to parse, back up and don't print it at all. */
387         e->ptr = start;
388       }
389     }
390   }
391 }
392 
txtenc_nullz(txtenc * e,size_t size)393 size_t txtenc_nullz(txtenc *e, size_t size) {
394   size_t ret = e->ptr - e->buf + e->overflow;
395 
396   if (size > 0) {
397     if (e->ptr == e->end) e->ptr--;
398     *e->ptr = '\0';
399   }
400 
401   return ret;
402 }
403 
upb_text_encode(const upb_msg * msg,const upb_msgdef * m,const upb_symtab * ext_pool,int options,char * buf,size_t size)404 size_t upb_text_encode(const upb_msg *msg, const upb_msgdef *m,
405                        const upb_symtab *ext_pool, int options, char *buf,
406                        size_t size) {
407   txtenc e;
408 
409   e.buf = buf;
410   e.ptr = buf;
411   e.end = buf + size;
412   e.overflow = 0;
413   e.indent_depth = 0;
414   e.options = options;
415   e.ext_pool = ext_pool;
416   _upb_mapsorter_init(&e.sorter);
417 
418   txtenc_msg(&e, msg, m);
419   _upb_mapsorter_destroy(&e.sorter);
420   return txtenc_nullz(&e, size);
421 }
422