1
2 #include "upb/text_encode.h"
3
4 #include <ctype.h>
5 #include <float.h>
6 #include <inttypes.h>
7 #include <stdarg.h>
8 #include <stdio.h>
9 #include <string.h>
10
11 #include "upb/reflection.h"
12 #include "upb/port_def.inc"
13
14 typedef struct {
15 char *buf, *ptr, *end;
16 size_t overflow;
17 int indent_depth;
18 int options;
19 const upb_symtab *ext_pool;
20 _upb_mapsorter sorter;
21 } txtenc;
22
23 static void txtenc_msg(txtenc *e, const upb_msg *msg, const upb_msgdef *m);
24
txtenc_putbytes(txtenc * e,const void * data,size_t len)25 static void txtenc_putbytes(txtenc *e, const void *data, size_t len) {
26 size_t have = e->end - e->ptr;
27 if (UPB_LIKELY(have >= len)) {
28 memcpy(e->ptr, data, len);
29 e->ptr += len;
30 } else {
31 if (have) memcpy(e->ptr, data, have);
32 e->ptr += have;
33 e->overflow += (len - have);
34 }
35 }
36
txtenc_putstr(txtenc * e,const char * str)37 static void txtenc_putstr(txtenc *e, const char *str) {
38 txtenc_putbytes(e, str, strlen(str));
39 }
40
txtenc_printf(txtenc * e,const char * fmt,...)41 static void txtenc_printf(txtenc *e, const char *fmt, ...) {
42 size_t n;
43 size_t have = e->end - e->ptr;
44 va_list args;
45
46 va_start(args, fmt);
47 n = vsnprintf(e->ptr, have, fmt, args);
48 va_end(args);
49
50 if (UPB_LIKELY(have > n)) {
51 e->ptr += n;
52 } else {
53 e->ptr += have;
54 e->overflow += (n - have);
55 }
56 }
57
txtenc_indent(txtenc * e)58 static void txtenc_indent(txtenc *e) {
59 if ((e->options & UPB_TXTENC_SINGLELINE) == 0) {
60 int i = e->indent_depth;
61 while (i-- > 0) {
62 txtenc_putstr(e, " ");
63 }
64 }
65 }
66
txtenc_endfield(txtenc * e)67 static void txtenc_endfield(txtenc *e) {
68 if (e->options & UPB_TXTENC_SINGLELINE) {
69 txtenc_putstr(e, " ");
70 } else {
71 txtenc_putstr(e, "\n");
72 }
73 }
74
txtenc_enum(int32_t val,const upb_fielddef * f,txtenc * e)75 static void txtenc_enum(int32_t val, const upb_fielddef *f, txtenc *e) {
76 const upb_enumdef *e_def = upb_fielddef_enumsubdef(f);
77 const char *name = upb_enumdef_iton(e_def, val);
78
79 if (name) {
80 txtenc_printf(e, "%s", name);
81 } else {
82 txtenc_printf(e, "%" PRId32, val);
83 }
84 }
85
txtenc_string(txtenc * e,upb_strview str,bool bytes)86 static void txtenc_string(txtenc *e, upb_strview str, bool bytes) {
87 const char *ptr = str.data;
88 const char *end = ptr + str.size;
89 txtenc_putstr(e, "\"");
90
91 while (ptr < end) {
92 switch (*ptr) {
93 case '\n':
94 txtenc_putstr(e, "\\n");
95 break;
96 case '\r':
97 txtenc_putstr(e, "\\r");
98 break;
99 case '\t':
100 txtenc_putstr(e, "\\t");
101 break;
102 case '\"':
103 txtenc_putstr(e, "\\\"");
104 break;
105 case '\'':
106 txtenc_putstr(e, "\\'");
107 break;
108 case '\\':
109 txtenc_putstr(e, "\\\\");
110 break;
111 default:
112 if ((bytes || (uint8_t)*ptr < 0x80) && !isprint(*ptr)) {
113 txtenc_printf(e, "\\%03o", (int)(uint8_t)*ptr);
114 } else {
115 txtenc_putbytes(e, ptr, 1);
116 }
117 break;
118 }
119 ptr++;
120 }
121
122 txtenc_putstr(e, "\"");
123 }
124
txtenc_field(txtenc * e,upb_msgval val,const upb_fielddef * f)125 static void txtenc_field(txtenc *e, upb_msgval val, const upb_fielddef *f) {
126 txtenc_indent(e);
127 txtenc_printf(e, "%s: ", upb_fielddef_name(f));
128
129 switch (upb_fielddef_type(f)) {
130 case UPB_TYPE_BOOL:
131 txtenc_putstr(e, val.bool_val ? "true" : "false");
132 break;
133 case UPB_TYPE_FLOAT:
134 txtenc_printf(e, "%f", val.float_val);
135 break;
136 case UPB_TYPE_DOUBLE:
137 txtenc_printf(e, "%f", val.double_val);
138 break;
139 case UPB_TYPE_INT32:
140 txtenc_printf(e, "%" PRId32, val.int32_val);
141 break;
142 case UPB_TYPE_UINT32:
143 txtenc_printf(e, "%" PRIu32, val.uint32_val);
144 break;
145 case UPB_TYPE_INT64:
146 txtenc_printf(e, "%" PRId64, val.int64_val);
147 break;
148 case UPB_TYPE_UINT64:
149 txtenc_printf(e, "%" PRIu64, val.uint64_val);
150 break;
151 case UPB_TYPE_STRING:
152 txtenc_string(e, val.str_val, false);
153 break;
154 case UPB_TYPE_BYTES:
155 txtenc_string(e, val.str_val, true);
156 break;
157 case UPB_TYPE_ENUM:
158 txtenc_enum(val.int32_val, f, e);
159 break;
160 case UPB_TYPE_MESSAGE:
161 txtenc_putstr(e, "{");
162 txtenc_endfield(e);
163 e->indent_depth++;
164 txtenc_msg(e, val.msg_val, upb_fielddef_msgsubdef(f));
165 e->indent_depth--;
166 txtenc_indent(e);
167 txtenc_putstr(e, "}");
168 break;
169 }
170
171 txtenc_endfield(e);
172 }
173
174 /*
175 * Arrays print as simple repeated elements, eg.
176 *
177 * foo_field: 1
178 * foo_field: 2
179 * foo_field: 3
180 */
txtenc_array(txtenc * e,const upb_array * arr,const upb_fielddef * f)181 static void txtenc_array(txtenc *e, const upb_array *arr,
182 const upb_fielddef *f) {
183 size_t i;
184 size_t size = upb_array_size(arr);
185
186 for (i = 0; i < size; i++) {
187 txtenc_field(e, upb_array_get(arr, i), f);
188 }
189 }
190
txtenc_mapentry(txtenc * e,upb_msgval key,upb_msgval val,const upb_fielddef * f)191 static void txtenc_mapentry(txtenc *e, upb_msgval key, upb_msgval val,
192 const upb_fielddef *f) {
193 const upb_msgdef *entry = upb_fielddef_msgsubdef(f);
194 const upb_fielddef *key_f = upb_msgdef_field(entry, 0);
195 const upb_fielddef *val_f = upb_msgdef_field(entry, 1);
196 txtenc_indent(e);
197 txtenc_printf(e, "%s: {", upb_fielddef_name(f));
198 txtenc_endfield(e);
199 e->indent_depth++;
200
201 txtenc_field(e, key, key_f);
202 txtenc_field(e, val, val_f);
203
204 e->indent_depth--;
205 txtenc_indent(e);
206 txtenc_putstr(e, "}");
207 txtenc_endfield(e);
208 }
209
210 /*
211 * Maps print as messages of key/value, etc.
212 *
213 * foo_map: {
214 * key: "abc"
215 * value: 123
216 * }
217 * foo_map: {
218 * key: "def"
219 * value: 456
220 * }
221 */
txtenc_map(txtenc * e,const upb_map * map,const upb_fielddef * f)222 static void txtenc_map(txtenc *e, const upb_map *map, const upb_fielddef *f) {
223 if (e->options & UPB_TXTENC_NOSORT) {
224 size_t iter = UPB_MAP_BEGIN;
225 while (upb_mapiter_next(map, &iter)) {
226 upb_msgval key = upb_mapiter_key(map, iter);
227 upb_msgval val = upb_mapiter_value(map, iter);
228 txtenc_mapentry(e, key, val, f);
229 }
230 } else {
231 const upb_msgdef *entry = upb_fielddef_msgsubdef(f);
232 const upb_fielddef *key_f = upb_msgdef_field(entry, 0);
233 _upb_sortedmap sorted;
234 upb_map_entry ent;
235
236 _upb_mapsorter_pushmap(&e->sorter, upb_fielddef_descriptortype(key_f), map,
237 &sorted);
238 while (_upb_sortedmap_next(&e->sorter, map, &sorted, &ent)) {
239 upb_msgval key, val;
240 memcpy(&key, &ent.k, sizeof(key));
241 memcpy(&val, &ent.v, sizeof(val));
242 txtenc_mapentry(e, key, val, f);
243 }
244 _upb_mapsorter_popmap(&e->sorter, &sorted);
245 }
246 }
247
248 #define CHK(x) do { if (!(x)) { return false; } } while(0)
249
txtenc_parsevarint(const char * ptr,const char * limit,uint64_t * val)250 static const char *txtenc_parsevarint(const char *ptr, const char *limit,
251 uint64_t *val) {
252 uint8_t byte;
253 int bitpos = 0;
254 *val = 0;
255
256 do {
257 CHK(bitpos < 70 && ptr < limit);
258 byte = *ptr;
259 *val |= (uint64_t)(byte & 0x7F) << bitpos;
260 ptr++;
261 bitpos += 7;
262 } while (byte & 0x80);
263
264 return ptr;
265 }
266
267 /*
268 * Unknown fields are printed by number.
269 *
270 * 1001: 123
271 * 1002: "hello"
272 * 1006: 0xdeadbeef
273 * 1003: {
274 * 1: 111
275 * }
276 */
txtenc_unknown(txtenc * e,const char * ptr,const char * end,int groupnum)277 static const char *txtenc_unknown(txtenc *e, const char *ptr, const char *end,
278 int groupnum) {
279 while (ptr < end) {
280 uint64_t tag_64;
281 uint32_t tag;
282 CHK(ptr = txtenc_parsevarint(ptr, end, &tag_64));
283 CHK(tag_64 < UINT32_MAX);
284 tag = (uint32_t)tag_64;
285
286 if ((tag & 7) == UPB_WIRE_TYPE_END_GROUP) {
287 CHK((tag >> 3) == (uint32_t)groupnum);
288 return ptr;
289 }
290
291 txtenc_indent(e);
292 txtenc_printf(e, "%d: ", (int)(tag >> 3));
293
294 switch (tag & 7) {
295 case UPB_WIRE_TYPE_VARINT: {
296 uint64_t val;
297 CHK(ptr = txtenc_parsevarint(ptr, end, &val));
298 txtenc_printf(e, "%" PRIu64, val);
299 break;
300 }
301 case UPB_WIRE_TYPE_32BIT: {
302 uint32_t val;
303 CHK(end - ptr >= 4);
304 memcpy(&val, ptr, 4);
305 ptr += 4;
306 txtenc_printf(e, "0x%08" PRIu32, val);
307 break;
308 }
309 case UPB_WIRE_TYPE_64BIT: {
310 uint64_t val;
311 CHK(end - ptr >= 8);
312 memcpy(&val, ptr, 8);
313 ptr += 8;
314 txtenc_printf(e, "0x%016" PRIu64, val);
315 break;
316 }
317 case UPB_WIRE_TYPE_DELIMITED: {
318 uint64_t len;
319 size_t avail = end - ptr;
320 char *start = e->ptr;
321 size_t start_overflow = e->overflow;
322 CHK(ptr = txtenc_parsevarint(ptr, end, &len));
323 CHK(avail >= len);
324
325 /* Speculatively try to parse as message. */
326 txtenc_putstr(e, "{");
327 txtenc_endfield(e);
328 e->indent_depth++;
329 if (txtenc_unknown(e, ptr, end, -1)) {
330 e->indent_depth--;
331 txtenc_indent(e);
332 txtenc_putstr(e, "}");
333 } else {
334 /* Didn't work out, print as raw bytes. */
335 upb_strview str;
336 e->indent_depth--;
337 e->ptr = start;
338 e->overflow = start_overflow;
339 str.data = ptr;
340 str.size = len;
341 txtenc_string(e, str, true);
342 }
343 ptr += len;
344 break;
345 }
346 case UPB_WIRE_TYPE_START_GROUP:
347 txtenc_putstr(e, "{");
348 txtenc_endfield(e);
349 e->indent_depth++;
350 CHK(ptr = txtenc_unknown(e, ptr, end, tag >> 3));
351 e->indent_depth--;
352 txtenc_indent(e);
353 txtenc_putstr(e, "}");
354 break;
355 }
356 txtenc_endfield(e);
357 }
358
359 return groupnum == -1 ? ptr : NULL;
360 }
361
362 #undef CHK
363
txtenc_msg(txtenc * e,const upb_msg * msg,const upb_msgdef * m)364 static void txtenc_msg(txtenc *e, const upb_msg *msg,
365 const upb_msgdef *m) {
366 size_t iter = UPB_MSG_BEGIN;
367 const upb_fielddef *f;
368 upb_msgval val;
369
370 while (upb_msg_next(msg, m, e->ext_pool, &f, &val, &iter)) {
371 if (upb_fielddef_ismap(f)) {
372 txtenc_map(e, val.map_val, f);
373 } else if (upb_fielddef_isseq(f)) {
374 txtenc_array(e, val.array_val, f);
375 } else {
376 txtenc_field(e, val, f);
377 }
378 }
379
380 if ((e->options & UPB_TXTENC_SKIPUNKNOWN) == 0) {
381 size_t len;
382 const char *ptr = upb_msg_getunknown(msg, &len);
383 char *start = e->ptr;
384 if (ptr) {
385 if (!txtenc_unknown(e, ptr, ptr + len, -1)) {
386 /* Unknown failed to parse, back up and don't print it at all. */
387 e->ptr = start;
388 }
389 }
390 }
391 }
392
txtenc_nullz(txtenc * e,size_t size)393 size_t txtenc_nullz(txtenc *e, size_t size) {
394 size_t ret = e->ptr - e->buf + e->overflow;
395
396 if (size > 0) {
397 if (e->ptr == e->end) e->ptr--;
398 *e->ptr = '\0';
399 }
400
401 return ret;
402 }
403
upb_text_encode(const upb_msg * msg,const upb_msgdef * m,const upb_symtab * ext_pool,int options,char * buf,size_t size)404 size_t upb_text_encode(const upb_msg *msg, const upb_msgdef *m,
405 const upb_symtab *ext_pool, int options, char *buf,
406 size_t size) {
407 txtenc e;
408
409 e.buf = buf;
410 e.ptr = buf;
411 e.end = buf + size;
412 e.overflow = 0;
413 e.indent_depth = 0;
414 e.options = options;
415 e.ext_pool = ext_pool;
416 _upb_mapsorter_init(&e.sorter);
417
418 txtenc_msg(&e, msg, m);
419 _upb_mapsorter_destroy(&e.sorter);
420 return txtenc_nullz(&e, size);
421 }
422