1 /*
2  *
3  * An exhaustive set of tests for parsing both valid and invalid protobuf
4  * input, with buffer breaks in arbitrary places.
5  *
6  * Tests to add:
7  * - string/bytes
8  * - unknown field handler called appropriately
9  * - unknown fields can be inserted in random places
10  * - fuzzing of valid input
11  * - resource limits (max stack depth, max string len)
12  * - testing of groups
13  * - more throrough testing of sequences
14  * - test skipping of submessages
15  * - test suspending the decoder
16  * - buffers that are close enough to the end of the address space that
17  *   pointers overflow (this might be difficult).
18  * - a few "kitchen sink" examples (one proto that uses all types, lots
19  *   of submsg/sequences, etc.
20  * - test different handlers at every level and whether handlers fire at
21  *   the correct field path.
22  * - test skips that extend past the end of current buffer (where decoder
23  *   returns value greater than the size param).
24  */
25 
26 #ifndef __STDC_FORMAT_MACROS
27 #define __STDC_FORMAT_MACROS  // For PRIuS, etc.
28 #endif
29 
30 #include <inttypes.h>
31 #include <stdarg.h>
32 #include <stdint.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <sstream>
36 
37 #include "tests/test_util.h"
38 #include "tests/upb_test.h"
39 #include "tests/pb/test_decoder.upbdefs.h"
40 
41 #ifdef AMALGAMATED
42 #include "upb.h"
43 #else  // AMALGAMATED
44 #include "upb/handlers.h"
45 #include "upb/pb/decoder.h"
46 #include "upb/upb.h"
47 #endif  // !AMALGAMATED
48 
49 #include "upb/port_def.inc"
50 
51 #undef PRINT_FAILURE
52 #define PRINT_FAILURE(expr)                                           \
53   fprintf(stderr, "Assertion failed: %s:%d\n", __FILE__, __LINE__);   \
54   fprintf(stderr, "expr: %s\n", #expr);                               \
55 
56 #define MAX_NESTING 64
57 
58 #define LINE(x) x "\n"
59 
60 uint32_t filter_hash = 0;
61 double completed;
62 double total;
63 double *count;
64 
65 enum TestMode {
66   COUNT_ONLY = 1,
67   NO_HANDLERS = 2,
68   ALL_HANDLERS = 3
69 } test_mode;
70 
71 // Copied from decoder.c, since this is not a public interface.
72 typedef struct {
73   uint8_t native_wire_type;
74   bool is_numeric;
75 } upb_decoder_typeinfo;
76 
77 static const upb_decoder_typeinfo upb_decoder_types[] = {
78   {UPB_WIRE_TYPE_END_GROUP,   false},  // ENDGROUP
79   {UPB_WIRE_TYPE_64BIT,       true},   // DOUBLE
80   {UPB_WIRE_TYPE_32BIT,       true},   // FLOAT
81   {UPB_WIRE_TYPE_VARINT,      true},   // INT64
82   {UPB_WIRE_TYPE_VARINT,      true},   // UINT64
83   {UPB_WIRE_TYPE_VARINT,      true},   // INT32
84   {UPB_WIRE_TYPE_64BIT,       true},   // FIXED64
85   {UPB_WIRE_TYPE_32BIT,       true},   // FIXED32
86   {UPB_WIRE_TYPE_VARINT,      true},   // BOOL
87   {UPB_WIRE_TYPE_DELIMITED,   false},  // STRING
88   {UPB_WIRE_TYPE_START_GROUP, false},  // GROUP
89   {UPB_WIRE_TYPE_DELIMITED,   false},  // MESSAGE
90   {UPB_WIRE_TYPE_DELIMITED,   false},  // BYTES
91   {UPB_WIRE_TYPE_VARINT,      true},   // UINT32
92   {UPB_WIRE_TYPE_VARINT,      true},   // ENUM
93   {UPB_WIRE_TYPE_32BIT,       true},   // SFIXED32
94   {UPB_WIRE_TYPE_64BIT,       true},   // SFIXED64
95   {UPB_WIRE_TYPE_VARINT,      true},   // SINT32
96   {UPB_WIRE_TYPE_VARINT,      true},   // SINT64
97 };
98 
99 #ifndef USE_GOOGLE
100 using std::string;
101 #endif
102 
vappendf(string * str,const char * format,va_list args)103 void vappendf(string* str, const char *format, va_list args) {
104   va_list copy;
105   va_copy(copy, args);
106 
107   int count = vsnprintf(NULL, 0, format, args);
108   if (count >= 0)
109   {
110     UPB_ASSERT(count < 32768);
111     char *buffer = new char[count + 1];
112     UPB_ASSERT(buffer);
113     count = vsnprintf(buffer, count + 1, format, copy);
114     UPB_ASSERT(count >= 0);
115     str->append(buffer, count);
116     delete [] buffer;
117   }
118   va_end(copy);
119 }
120 
appendf(string * str,const char * fmt,...)121 void appendf(string* str, const char *fmt, ...) {
122   va_list args;
123   va_start(args, fmt);
124   vappendf(str, fmt, args);
125   va_end(args);
126 }
127 
PrintBinary(const string & str)128 void PrintBinary(const string& str) {
129   for (size_t i = 0; i < str.size(); i++) {
130     if (isprint(str[i])) {
131       fprintf(stderr, "%c", str[i]);
132     } else {
133       fprintf(stderr, "\\x%02x", (int)(uint8_t)str[i]);
134     }
135   }
136 }
137 
138 #define UPB_PB_VARINT_MAX_LEN 10
139 
upb_vencode64(uint64_t val,char * buf)140 static size_t upb_vencode64(uint64_t val, char *buf) {
141   size_t i;
142   if (val == 0) { buf[0] = 0; return 1; }
143   i = 0;
144   while (val) {
145     uint8_t byte = val & 0x7fU;
146     val >>= 7;
147     if (val) byte |= 0x80U;
148     buf[i++] = byte;
149   }
150   return i;
151 }
152 
upb_zzenc_32(int32_t n)153 static uint32_t upb_zzenc_32(int32_t n) {
154   return ((uint32_t)n << 1) ^ (n >> 31);
155 }
156 
upb_zzenc_64(int64_t n)157 static uint64_t upb_zzenc_64(int64_t n) {
158   return ((uint64_t)n << 1) ^ (n >> 63);
159 }
160 
161 /* Routines for building arbitrary protos *************************************/
162 
163 const string empty;
164 
cat(const string & a,const string & b,const string & c=empty,const string & d=empty,const string & e=empty,const string & f=empty,const string & g=empty,const string & h=empty,const string & i=empty,const string & j=empty,const string & k=empty,const string & l=empty)165 string cat(const string& a, const string& b,
166            const string& c = empty,
167            const string& d = empty,
168            const string& e = empty,
169            const string& f = empty,
170            const string& g = empty,
171            const string& h = empty,
172            const string& i = empty,
173            const string& j = empty,
174            const string& k = empty,
175            const string& l = empty) {
176   string ret;
177   ret.reserve(a.size() + b.size() + c.size() + d.size() + e.size() + f.size() +
178               g.size() + h.size() + i.size() + j.size() + k.size() + l.size());
179   ret.append(a);
180   ret.append(b);
181   ret.append(c);
182   ret.append(d);
183   ret.append(e);
184   ret.append(f);
185   ret.append(g);
186   ret.append(h);
187   ret.append(i);
188   ret.append(j);
189   ret.append(k);
190   ret.append(l);
191   return ret;
192 }
193 
194 template <typename T>
num2string(T num)195 string num2string(T num) {
196   std::ostringstream ss;
197   ss << num;
198   return ss.str();
199 }
200 
varint(uint64_t x)201 string varint(uint64_t x) {
202   char buf[UPB_PB_VARINT_MAX_LEN];
203   size_t len = upb_vencode64(x, buf);
204   return string(buf, len);
205 }
206 
207 // TODO: proper byte-swapping for big-endian machines.
fixed32(void * data)208 string fixed32(void *data) { return string(static_cast<char*>(data), 4); }
fixed64(void * data)209 string fixed64(void *data) { return string(static_cast<char*>(data), 8); }
210 
delim(const string & buf)211 string delim(const string& buf) { return cat(varint(buf.size()), buf); }
uint32(uint32_t u32)212 string uint32(uint32_t u32) { return fixed32(&u32); }
uint64(uint64_t u64)213 string uint64(uint64_t u64) { return fixed64(&u64); }
flt(float f)214 string flt(float f) { return fixed32(&f); }
dbl(double d)215 string dbl(double d) { return fixed64(&d); }
zz32(int32_t x)216 string zz32(int32_t x) { return varint(upb_zzenc_32(x)); }
zz64(int64_t x)217 string zz64(int64_t x) { return varint(upb_zzenc_64(x)); }
218 
tag(uint32_t fieldnum,char wire_type)219 string tag(uint32_t fieldnum, char wire_type) {
220   return varint((fieldnum << 3) | wire_type);
221 }
222 
submsg(uint32_t fn,const string & buf)223 string submsg(uint32_t fn, const string& buf) {
224   return cat( tag(fn, UPB_WIRE_TYPE_DELIMITED), delim(buf) );
225 }
226 
group(uint32_t fn,const string & buf)227 string group(uint32_t fn, const string& buf) {
228   return cat(tag(fn, UPB_WIRE_TYPE_START_GROUP), buf,
229              tag(fn, UPB_WIRE_TYPE_END_GROUP));
230 }
231 
232 // Like delim()/submsg(), but intentionally encodes an incorrect length.
233 // These help test when a delimited boundary doesn't land in the right place.
badlen_delim(int err,const string & buf)234 string badlen_delim(int err, const string& buf) {
235   return cat(varint(buf.size() + err), buf);
236 }
237 
badlen_submsg(int err,uint32_t fn,const string & buf)238 string badlen_submsg(int err, uint32_t fn, const string& buf) {
239   return cat( tag(fn, UPB_WIRE_TYPE_DELIMITED), badlen_delim(err, buf) );
240 }
241 
242 
243 /* A set of handlers that covers all .proto types *****************************/
244 
245 // The handlers simply append to a string indicating what handlers were called.
246 // This string is similar to protobuf text format but fields are referred to by
247 // number instead of name and sequences are explicitly delimited.  We indent
248 // using the closure depth to test that the stack of closures is properly
249 // handled.
250 
251 int closures[MAX_NESTING];
252 string output;
253 
indentbuf(string * buf,int depth)254 void indentbuf(string *buf, int depth) {
255   buf->append(2 * depth, ' ');
256 }
257 
258 #define NUMERIC_VALUE_HANDLER(member, ctype, fmt)                   \
259   bool value_##member(int* depth, const uint32_t* num, ctype val) { \
260     indentbuf(&output, *depth);                                     \
261     appendf(&output, "%" PRIu32 ":%" fmt "\n", *num, val);          \
262     return true;                                                    \
263   }
264 
NUMERIC_VALUE_HANDLER(uint32,uint32_t,PRIu32)265 NUMERIC_VALUE_HANDLER(uint32, uint32_t, PRIu32)
266 NUMERIC_VALUE_HANDLER(uint64, uint64_t, PRIu64)
267 NUMERIC_VALUE_HANDLER(int32,  int32_t,  PRId32)
268 NUMERIC_VALUE_HANDLER(int64,  int64_t,  PRId64)
269 NUMERIC_VALUE_HANDLER(float,  float,    "g")
270 NUMERIC_VALUE_HANDLER(double, double,   "g")
271 
272 bool value_bool(int* depth, const uint32_t* num, bool val) {
273   indentbuf(&output, *depth);
274   appendf(&output, "%" PRIu32 ":%s\n", *num, val ? "true" : "false");
275   return true;
276 }
277 
startstr(int * depth,const uint32_t * num,size_t size_hint)278 int* startstr(int* depth, const uint32_t* num, size_t size_hint) {
279   indentbuf(&output, *depth);
280   appendf(&output, "%" PRIu32 ":(%zu)\"", *num, size_hint);
281   return depth + 1;
282 }
283 
value_string(int * depth,const uint32_t * num,const char * buf,size_t n,const upb_bufhandle * handle)284 size_t value_string(int* depth, const uint32_t* num, const char* buf,
285                     size_t n, const upb_bufhandle* handle) {
286   UPB_UNUSED(num);
287   UPB_UNUSED(depth);
288   output.append(buf, n);
289   ASSERT(handle == &global_handle);
290   return n;
291 }
292 
endstr(int * depth,const uint32_t * num)293 bool endstr(int* depth, const uint32_t* num) {
294   UPB_UNUSED(num);
295   output.append("\n");
296   indentbuf(&output, *depth);
297   appendf(&output, "%" PRIu32 ":\"\n", *num);
298   return true;
299 }
300 
startsubmsg(int * depth,const uint32_t * num)301 int* startsubmsg(int* depth, const uint32_t* num) {
302   indentbuf(&output, *depth);
303   appendf(&output, "%" PRIu32 ":{\n", *num);
304   return depth + 1;
305 }
306 
endsubmsg(int * depth,const uint32_t * num)307 bool endsubmsg(int* depth, const uint32_t* num) {
308   UPB_UNUSED(num);
309   indentbuf(&output, *depth);
310   output.append("}\n");
311   return true;
312 }
313 
startseq(int * depth,const uint32_t * num)314 int* startseq(int* depth, const uint32_t* num) {
315   indentbuf(&output, *depth);
316   appendf(&output, "%" PRIu32 ":[\n", *num);
317   return depth + 1;
318 }
319 
endseq(int * depth,const uint32_t * num)320 bool endseq(int* depth, const uint32_t* num) {
321   UPB_UNUSED(num);
322   indentbuf(&output, *depth);
323   output.append("]\n");
324   return true;
325 }
326 
startmsg(int * depth)327 bool startmsg(int* depth) {
328   indentbuf(&output, *depth);
329   output.append("<\n");
330   return true;
331 }
332 
endmsg(int * depth,upb_status * status)333 bool endmsg(int* depth, upb_status* status) {
334   UPB_UNUSED(status);
335   indentbuf(&output, *depth);
336   output.append(">\n");
337   return true;
338 }
339 
free_uint32(void * val)340 void free_uint32(void *val) {
341   uint32_t *u32 = static_cast<uint32_t*>(val);
342   delete u32;
343 }
344 
345 template<class T, bool F(int*, const uint32_t*, T)>
doreg(upb::HandlersPtr h,uint32_t num)346 void doreg(upb::HandlersPtr h, uint32_t num) {
347   upb::FieldDefPtr f = h.message_def().FindFieldByNumber(num);
348   ASSERT(f);
349   ASSERT(h.SetValueHandler<T>(f, UpbBind(F, new uint32_t(num))));
350   if (f.IsSequence()) {
351     ASSERT(h.SetStartSequenceHandler(f, UpbBind(startseq, new uint32_t(num))));
352     ASSERT(h.SetEndSequenceHandler(f, UpbBind(endseq, new uint32_t(num))));
353   }
354 }
355 
356 // The repeated field number to correspond to the given non-repeated field
357 // number.
rep_fn(uint32_t fn)358 uint32_t rep_fn(uint32_t fn) {
359   return (UPB_MAX_FIELDNUMBER - 1000) + fn;
360 }
361 
362 #define NOP_FIELD 40
363 #define UNKNOWN_FIELD 666
364 
365 template <class T, bool F(int*, const uint32_t*, T)>
reg(upb::HandlersPtr h,upb_descriptortype_t type)366 void reg(upb::HandlersPtr h, upb_descriptortype_t type) {
367   // We register both a repeated and a non-repeated field for every type.
368   // For the non-repeated field we make the field number the same as the
369   // type.  For the repeated field we make it a function of the type.
370   doreg<T, F>(h, type);
371   doreg<T, F>(h, rep_fn(type));
372 }
373 
regseq(upb::HandlersPtr h,upb::FieldDefPtr f,uint32_t num)374 void regseq(upb::HandlersPtr h, upb::FieldDefPtr f, uint32_t num) {
375   ASSERT(h.SetStartSequenceHandler(f, UpbBind(startseq, new uint32_t(num))));
376   ASSERT(h.SetEndSequenceHandler(f, UpbBind(endseq, new uint32_t(num))));
377 }
378 
reg_subm(upb::HandlersPtr h,uint32_t num)379 void reg_subm(upb::HandlersPtr h, uint32_t num) {
380   upb::FieldDefPtr f = h.message_def().FindFieldByNumber(num);
381   ASSERT(f);
382   if (f.IsSequence()) regseq(h, f, num);
383   ASSERT(
384       h.SetStartSubMessageHandler(f, UpbBind(startsubmsg, new uint32_t(num))));
385   ASSERT(h.SetEndSubMessageHandler(f, UpbBind(endsubmsg, new uint32_t(num))));
386 }
387 
reg_str(upb::HandlersPtr h,uint32_t num)388 void reg_str(upb::HandlersPtr h, uint32_t num) {
389   upb::FieldDefPtr f = h.message_def().FindFieldByNumber(num);
390   ASSERT(f);
391   if (f.IsSequence()) regseq(h, f, num);
392   ASSERT(h.SetStartStringHandler(f, UpbBind(startstr, new uint32_t(num))));
393   ASSERT(h.SetEndStringHandler(f, UpbBind(endstr, new uint32_t(num))));
394   ASSERT(h.SetStringHandler(f, UpbBind(value_string, new uint32_t(num))));
395 }
396 
397 struct HandlerRegisterData {
398   TestMode mode;
399 };
400 
callback(const void * closure,upb::Handlers * h_ptr)401 void callback(const void *closure, upb::Handlers* h_ptr) {
402   upb::HandlersPtr h(h_ptr);
403   const HandlerRegisterData* data =
404       static_cast<const HandlerRegisterData*>(closure);
405   if (data->mode == ALL_HANDLERS) {
406     h.SetStartMessageHandler(UpbMakeHandler(startmsg));
407     h.SetEndMessageHandler(UpbMakeHandler(endmsg));
408 
409     // Register handlers for each type.
410     reg<double,   value_double>(h, UPB_DESCRIPTOR_TYPE_DOUBLE);
411     reg<float,    value_float> (h, UPB_DESCRIPTOR_TYPE_FLOAT);
412     reg<int64_t,  value_int64> (h, UPB_DESCRIPTOR_TYPE_INT64);
413     reg<uint64_t, value_uint64>(h, UPB_DESCRIPTOR_TYPE_UINT64);
414     reg<int32_t,  value_int32> (h, UPB_DESCRIPTOR_TYPE_INT32);
415     reg<uint64_t, value_uint64>(h, UPB_DESCRIPTOR_TYPE_FIXED64);
416     reg<uint32_t, value_uint32>(h, UPB_DESCRIPTOR_TYPE_FIXED32);
417     reg<bool,     value_bool>  (h, UPB_DESCRIPTOR_TYPE_BOOL);
418     reg<uint32_t, value_uint32>(h, UPB_DESCRIPTOR_TYPE_UINT32);
419     reg<int32_t,  value_int32> (h, UPB_DESCRIPTOR_TYPE_ENUM);
420     reg<int32_t,  value_int32> (h, UPB_DESCRIPTOR_TYPE_SFIXED32);
421     reg<int64_t,  value_int64> (h, UPB_DESCRIPTOR_TYPE_SFIXED64);
422     reg<int32_t,  value_int32> (h, UPB_DESCRIPTOR_TYPE_SINT32);
423     reg<int64_t,  value_int64> (h, UPB_DESCRIPTOR_TYPE_SINT64);
424 
425     reg_str(h, UPB_DESCRIPTOR_TYPE_STRING);
426     reg_str(h, UPB_DESCRIPTOR_TYPE_BYTES);
427     reg_str(h, rep_fn(UPB_DESCRIPTOR_TYPE_STRING));
428     reg_str(h, rep_fn(UPB_DESCRIPTOR_TYPE_BYTES));
429 
430     // Register submessage/group handlers that are self-recursive
431     // to this type, eg: message M { optional M m = 1; }
432     reg_subm(h, UPB_DESCRIPTOR_TYPE_MESSAGE);
433     reg_subm(h, rep_fn(UPB_DESCRIPTOR_TYPE_MESSAGE));
434 
435     if (h.message_def().full_name() == std::string("DecoderTest")) {
436       reg_subm(h, UPB_DESCRIPTOR_TYPE_GROUP);
437       reg_subm(h, rep_fn(UPB_DESCRIPTOR_TYPE_GROUP));
438     }
439 
440     // For NOP_FIELD we register no handlers, so we can pad a proto freely without
441     // changing the output.
442   }
443 }
444 
445 /* Running of test cases ******************************************************/
446 
447 const upb::Handlers *global_handlers;
448 upb::pb::DecoderMethodPtr global_method;
449 
CreateDecoder(upb::Arena * arena,upb::pb::DecoderMethodPtr method,upb::Sink sink,upb::Status * status)450 upb::pb::DecoderPtr CreateDecoder(upb::Arena* arena,
451                                   upb::pb::DecoderMethodPtr method,
452                                   upb::Sink sink, upb::Status* status) {
453   upb::pb::DecoderPtr ret =
454       upb::pb::DecoderPtr::Create(arena, method, sink, status);
455   ret.set_max_nesting(MAX_NESTING);
456   return ret;
457 }
458 
CheckBytesParsed(upb::pb::DecoderPtr decoder,size_t ofs)459 void CheckBytesParsed(upb::pb::DecoderPtr decoder, size_t ofs) {
460   // We can't have parsed more data than the decoder callback is telling us it
461   // parsed.
462   ASSERT(decoder.BytesParsed() <= ofs);
463 
464   // The difference between what we've decoded and what the decoder has accepted
465   // represents the internally buffered amount.  This amount should not exceed
466   // this value which comes from decoder.int.h.
467   ASSERT(ofs <= (decoder.BytesParsed() + UPB_DECODER_MAX_RESIDUAL_BYTES));
468 }
469 
parse(VerboseParserEnvironment * env,upb::pb::DecoderPtr decoder,int bytes)470 static bool parse(VerboseParserEnvironment* env,
471                   upb::pb::DecoderPtr decoder, int bytes) {
472   CheckBytesParsed(decoder, env->ofs());
473   bool ret = env->ParseBuffer(bytes);
474   if (ret) {
475     CheckBytesParsed(decoder, env->ofs());
476   }
477 
478   return ret;
479 }
480 
do_run_decoder(VerboseParserEnvironment * env,upb::pb::DecoderPtr decoder,const string & proto,const string * expected_output,size_t i,size_t j,bool may_skip)481 void do_run_decoder(VerboseParserEnvironment* env, upb::pb::DecoderPtr decoder,
482                     const string& proto, const string* expected_output,
483                     size_t i, size_t j, bool may_skip) {
484   env->Reset(proto.c_str(), proto.size(), may_skip, expected_output == NULL);
485   decoder.Reset();
486 
487   if (test_mode != COUNT_ONLY) {
488     output.clear();
489 
490     if (filter_hash) {
491       fprintf(stderr, "RUNNING TEST CASE\n");
492       fprintf(stderr, "Input (len=%u): ", (unsigned)proto.size());
493       PrintBinary(proto);
494       fprintf(stderr, "\n");
495       if (expected_output) {
496         if (test_mode == ALL_HANDLERS) {
497           fprintf(stderr, "Expected output: %s\n", expected_output->c_str());
498         } else if (test_mode == NO_HANDLERS) {
499           fprintf(stderr,
500                   "No handlers are registered, BUT if they were "
501                   "the expected output would be: %s\n",
502                   expected_output->c_str());
503         }
504       } else {
505         fprintf(stderr, "Expected to FAIL\n");
506       }
507     }
508 
509     bool ok = env->Start() &&
510               parse(env, decoder, (int)i) &&
511               parse(env, decoder, (int)(j - i)) &&
512               parse(env, decoder, -1) &&
513               env->End();
514 
515     ASSERT(env->CheckConsistency());
516 
517     if (test_mode == ALL_HANDLERS) {
518       if (expected_output) {
519         if (output != *expected_output) {
520           fprintf(stderr, "Text mismatch: '%s' vs '%s'\n",
521                   output.c_str(), expected_output->c_str());
522         }
523         ASSERT(ok);
524         ASSERT(output == *expected_output);
525       } else {
526         if (ok) {
527           fprintf(stderr, "Didn't expect ok result, but got output: '%s'\n",
528                   output.c_str());
529         }
530         ASSERT(!ok);
531       }
532     }
533   }
534   (*count)++;
535 }
536 
run_decoder(const string & proto,const string * expected_output)537 void run_decoder(const string& proto, const string* expected_output) {
538   VerboseParserEnvironment env(filter_hash != 0);
539   upb::Sink sink(global_handlers, &closures[0]);
540   upb::pb::DecoderPtr decoder = CreateDecoder(env.arena(), global_method, sink, env.status());
541   env.ResetBytesSink(decoder.input());
542   for (size_t i = 0; i < proto.size(); i++) {
543     for (size_t j = i; j < UPB_MIN(proto.size(), i + 5); j++) {
544       do_run_decoder(&env, decoder, proto, expected_output, i, j, true);
545       if (env.SkippedWithNull()) {
546         do_run_decoder(&env, decoder, proto, expected_output, i, j, false);
547       }
548     }
549   }
550 }
551 
552 const static string thirty_byte_nop = cat(
553     tag(NOP_FIELD, UPB_WIRE_TYPE_DELIMITED), delim(string(30, 'X')) );
554 
555 // Indents and wraps text as if it were a submessage with this field number
wrap_text(int32_t fn,const string & text)556 string wrap_text(int32_t fn, const string& text) {
557   string wrapped_text = text;
558   size_t pos = 0;
559   string replace_with = "\n  ";
560   while ((pos = wrapped_text.find("\n", pos)) != string::npos &&
561          pos != wrapped_text.size() - 1) {
562     wrapped_text.replace(pos, 1, replace_with);
563     pos += replace_with.size();
564   }
565   wrapped_text = cat(
566       LINE("<"),
567       num2string(fn), LINE(":{")
568       "  ", wrapped_text,
569       LINE("  }")
570       LINE(">"));
571   return wrapped_text;
572 }
573 
assert_successful_parse(const string & proto,const char * expected_fmt,...)574 void assert_successful_parse(const string& proto,
575                              const char *expected_fmt, ...) {
576   string expected_text;
577   va_list args;
578   va_start(args, expected_fmt);
579   vappendf(&expected_text, expected_fmt, args);
580   va_end(args);
581   // To test both middle-of-buffer and end-of-buffer code paths,
582   // repeat once with no-op padding data at the end of buffer.
583   run_decoder(proto, &expected_text);
584   run_decoder(cat( proto, thirty_byte_nop ), &expected_text);
585 
586   // Test that this also works when wrapped in a submessage or group.
587   // Indent the expected text one level and wrap it.
588   string wrapped_text1 = wrap_text(UPB_DESCRIPTOR_TYPE_MESSAGE, expected_text);
589   string wrapped_text2 = wrap_text(UPB_DESCRIPTOR_TYPE_GROUP, expected_text);
590 
591   run_decoder(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, proto), &wrapped_text1);
592   run_decoder(group(UPB_DESCRIPTOR_TYPE_GROUP, proto), &wrapped_text2);
593 }
594 
assert_does_not_parse_at_eof(const string & proto)595 void assert_does_not_parse_at_eof(const string& proto) {
596   run_decoder(proto, NULL);
597 
598   // Also test that we fail to parse at end-of-submessage, not just
599   // end-of-message.  But skip this if we have no handlers, because in that
600   // case we won't descend into the submessage.
601   if (test_mode != NO_HANDLERS) {
602     run_decoder(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, proto), NULL);
603     run_decoder(cat(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, proto),
604                     thirty_byte_nop), NULL);
605   }
606 }
607 
assert_does_not_parse(const string & proto)608 void assert_does_not_parse(const string& proto) {
609   // Test that the error is caught both at end-of-buffer and middle-of-buffer.
610   assert_does_not_parse_at_eof(proto);
611   assert_does_not_parse_at_eof(cat( proto, thirty_byte_nop ));
612 }
613 
614 
615 /* The actual tests ***********************************************************/
616 
test_premature_eof_for_type(upb_descriptortype_t type)617 void test_premature_eof_for_type(upb_descriptortype_t type) {
618   // Incomplete values for each wire type.
619   static const string incompletes[6] = {
620     string("\x80"),     // UPB_WIRE_TYPE_VARINT
621     string("abcdefg"),  // UPB_WIRE_TYPE_64BIT
622     string("\x80"),     // UPB_WIRE_TYPE_DELIMITED (partial length)
623     string(),           // UPB_WIRE_TYPE_START_GROUP (no value required)
624     string(),           // UPB_WIRE_TYPE_END_GROUP (no value required)
625     string("abc")       // UPB_WIRE_TYPE_32BIT
626   };
627 
628   uint32_t fieldnum = type;
629   uint32_t rep_fieldnum = rep_fn(type);
630   int wire_type = upb_decoder_types[type].native_wire_type;
631   const string& incomplete = incompletes[wire_type];
632 
633   // EOF before a known non-repeated value.
634   assert_does_not_parse_at_eof(tag(fieldnum, wire_type));
635 
636   // EOF before a known repeated value.
637   assert_does_not_parse_at_eof(tag(rep_fieldnum, wire_type));
638 
639   // EOF before an unknown value.
640   assert_does_not_parse_at_eof(tag(UNKNOWN_FIELD, wire_type));
641 
642   // EOF inside a known non-repeated value.
643   assert_does_not_parse_at_eof(
644       cat( tag(fieldnum, wire_type), incomplete ));
645 
646   // EOF inside a known repeated value.
647   assert_does_not_parse_at_eof(
648       cat( tag(rep_fieldnum, wire_type), incomplete ));
649 
650   // EOF inside an unknown value.
651   assert_does_not_parse_at_eof(
652       cat( tag(UNKNOWN_FIELD, wire_type), incomplete ));
653 
654   if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
655     // EOF in the middle of delimited data for known non-repeated value.
656     assert_does_not_parse_at_eof(
657         cat( tag(fieldnum, wire_type), varint(1) ));
658 
659     // EOF in the middle of delimited data for known repeated value.
660     assert_does_not_parse_at_eof(
661         cat( tag(rep_fieldnum, wire_type), varint(1) ));
662 
663     // EOF in the middle of delimited data for unknown value.
664     assert_does_not_parse_at_eof(
665         cat( tag(UNKNOWN_FIELD, wire_type), varint(1) ));
666 
667     if (type == UPB_DESCRIPTOR_TYPE_MESSAGE) {
668       // Submessage ends in the middle of a value.
669       string incomplete_submsg =
670           cat ( tag(UPB_DESCRIPTOR_TYPE_INT32, UPB_WIRE_TYPE_VARINT),
671                 incompletes[UPB_WIRE_TYPE_VARINT] );
672       assert_does_not_parse(
673           cat( tag(fieldnum, UPB_WIRE_TYPE_DELIMITED),
674                varint(incomplete_submsg.size()),
675                incomplete_submsg ));
676     }
677   } else {
678     // Packed region ends in the middle of a value.
679     assert_does_not_parse(
680         cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED),
681              varint(incomplete.size()),
682              incomplete ));
683 
684     // EOF in the middle of packed region.
685     assert_does_not_parse_at_eof(
686         cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED), varint(1) ));
687   }
688 }
689 
690 // "33" and "66" are just two random values that all numeric types can
691 // represent.
test_valid_data_for_type(upb_descriptortype_t type,const string & enc33,const string & enc66)692 void test_valid_data_for_type(upb_descriptortype_t type,
693                               const string& enc33, const string& enc66) {
694   uint32_t fieldnum = type;
695   uint32_t rep_fieldnum = rep_fn(type);
696   int wire_type = upb_decoder_types[type].native_wire_type;
697 
698   // Non-repeated
699   assert_successful_parse(
700       cat( tag(fieldnum, wire_type), enc33,
701            tag(fieldnum, wire_type), enc66 ),
702       LINE("<")
703       LINE("%u:33")
704       LINE("%u:66")
705       LINE(">"), fieldnum, fieldnum);
706 
707   // Non-packed repeated.
708   assert_successful_parse(
709       cat( tag(rep_fieldnum, wire_type), enc33,
710            tag(rep_fieldnum, wire_type), enc66 ),
711       LINE("<")
712       LINE("%u:[")
713       LINE("  %u:33")
714       LINE("  %u:66")
715       LINE("]")
716       LINE(">"), rep_fieldnum, rep_fieldnum, rep_fieldnum);
717 
718   // Packed repeated.
719   assert_successful_parse(
720       cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED),
721            delim(cat( enc33, enc66 )) ),
722       LINE("<")
723       LINE("%u:[")
724       LINE("  %u:33")
725       LINE("  %u:66")
726       LINE("]")
727       LINE(">"), rep_fieldnum, rep_fieldnum, rep_fieldnum);
728 }
729 
test_valid_data_for_signed_type(upb_descriptortype_t type,const string & enc33,const string & enc66)730 void test_valid_data_for_signed_type(upb_descriptortype_t type,
731                                      const string& enc33, const string& enc66) {
732   uint32_t fieldnum = type;
733   uint32_t rep_fieldnum = rep_fn(type);
734   int wire_type = upb_decoder_types[type].native_wire_type;
735 
736   // Non-repeated
737   assert_successful_parse(
738       cat( tag(fieldnum, wire_type), enc33,
739            tag(fieldnum, wire_type), enc66 ),
740       LINE("<")
741       LINE("%u:33")
742       LINE("%u:-66")
743       LINE(">"), fieldnum, fieldnum);
744 
745   // Non-packed repeated.
746   assert_successful_parse(
747       cat( tag(rep_fieldnum, wire_type), enc33,
748            tag(rep_fieldnum, wire_type), enc66 ),
749       LINE("<")
750       LINE("%u:[")
751       LINE("  %u:33")
752       LINE("  %u:-66")
753       LINE("]")
754       LINE(">"), rep_fieldnum, rep_fieldnum, rep_fieldnum);
755 
756   // Packed repeated.
757   assert_successful_parse(
758       cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED),
759            delim(cat( enc33, enc66 )) ),
760       LINE("<")
761       LINE("%u:[")
762       LINE("  %u:33")
763       LINE("  %u:-66")
764       LINE("]")
765       LINE(">"), rep_fieldnum, rep_fieldnum, rep_fieldnum);
766 }
767 
768 // Test that invalid protobufs are properly detected (without crashing) and
769 // have an error reported.  Field numbers match registered handlers above.
test_invalid()770 void test_invalid() {
771   test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_DOUBLE);
772   test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_FLOAT);
773   test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_INT64);
774   test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_UINT64);
775   test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_INT32);
776   test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_FIXED64);
777   test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_FIXED32);
778   test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_BOOL);
779   test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_STRING);
780   test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_BYTES);
781   test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_UINT32);
782   test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_ENUM);
783   test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_SFIXED32);
784   test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_SFIXED64);
785   test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_SINT32);
786   test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_SINT64);
787 
788   // EOF inside a tag's varint.
789   assert_does_not_parse_at_eof( string("\x80") );
790 
791   // EOF inside a known group.
792   // TODO(haberman): add group to decoder test schema.
793   //assert_does_not_parse_at_eof( tag(4, UPB_WIRE_TYPE_START_GROUP) );
794 
795   // EOF inside an unknown group.
796   assert_does_not_parse_at_eof( tag(UNKNOWN_FIELD, UPB_WIRE_TYPE_START_GROUP) );
797 
798   // End group that we are not currently in.
799   assert_does_not_parse( tag(4, UPB_WIRE_TYPE_END_GROUP) );
800 
801   // Field number is 0.
802   assert_does_not_parse(
803       cat( tag(0, UPB_WIRE_TYPE_DELIMITED), varint(0) ));
804   // The previous test alone did not catch this particular pattern which could
805   // corrupt the internal state.
806   assert_does_not_parse(
807       cat( tag(0, UPB_WIRE_TYPE_64BIT), uint64(0) ));
808 
809   // Field number is too large.
810   assert_does_not_parse(
811       cat( tag(UPB_MAX_FIELDNUMBER + 1, UPB_WIRE_TYPE_DELIMITED),
812            varint(0) ));
813 
814   // Known group inside a submessage has ENDGROUP tag AFTER submessage end.
815   assert_does_not_parse(
816       cat ( submsg(UPB_DESCRIPTOR_TYPE_MESSAGE,
817                    tag(UPB_DESCRIPTOR_TYPE_GROUP, UPB_WIRE_TYPE_START_GROUP)),
818             tag(UPB_DESCRIPTOR_TYPE_GROUP, UPB_WIRE_TYPE_END_GROUP)));
819 
820   // Unknown string extends past enclosing submessage.
821   assert_does_not_parse(
822       cat (badlen_submsg(-1, UPB_DESCRIPTOR_TYPE_MESSAGE,
823                          submsg(12345, string("   "))),
824            submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, string("     "))));
825 
826   // Unknown fixed-length field extends past enclosing submessage.
827   assert_does_not_parse(
828       cat (badlen_submsg(-1, UPB_DESCRIPTOR_TYPE_MESSAGE,
829                          cat( tag(12345, UPB_WIRE_TYPE_64BIT), uint64(0))),
830            submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, string("     "))));
831 
832   // Test exceeding the resource limit of stack depth.
833   if (test_mode != NO_HANDLERS) {
834     string buf;
835     for (int i = 0; i <= MAX_NESTING; i++) {
836       buf.assign(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, buf));
837     }
838     assert_does_not_parse(buf);
839   }
840 }
841 
test_valid()842 void test_valid() {
843   // Empty protobuf.
844   assert_successful_parse(string(""), "<\n>\n");
845 
846   // Empty protobuf where we never call PutString between
847   // StartString/EndString.
848 
849   upb::Status status;
850   upb::Arena arena;
851   upb::Sink sink(global_handlers, &closures[0]);
852   upb::pb::DecoderPtr decoder =
853       CreateDecoder(&arena, global_method, sink, &status);
854   output.clear();
855   bool ok = upb::PutBuffer(std::string(), decoder.input());
856   ASSERT(ok);
857   ASSERT(status.ok());
858   if (test_mode == ALL_HANDLERS) {
859     ASSERT(output == string("<\n>\n"));
860   }
861 
862   test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_DOUBLE,
863                                   dbl(33),
864                                   dbl(-66));
865   test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_FLOAT, flt(33), flt(-66));
866   test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_INT64,
867                                   varint(33),
868                                   varint(-66));
869   test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_INT32,
870                                   varint(33),
871                                   varint(-66));
872   test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_ENUM,
873                                   varint(33),
874                                   varint(-66));
875   test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_SFIXED32,
876                                   uint32(33),
877                                   uint32(-66));
878   test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_SFIXED64,
879                                   uint64(33),
880                                   uint64(-66));
881   test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_SINT32,
882                                   zz32(33),
883                                   zz32(-66));
884   test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_SINT64,
885                                   zz64(33),
886                                   zz64(-66));
887 
888   test_valid_data_for_type(UPB_DESCRIPTOR_TYPE_UINT64, varint(33), varint(66));
889   test_valid_data_for_type(UPB_DESCRIPTOR_TYPE_UINT32, varint(33), varint(66));
890   test_valid_data_for_type(UPB_DESCRIPTOR_TYPE_FIXED64, uint64(33), uint64(66));
891   test_valid_data_for_type(UPB_DESCRIPTOR_TYPE_FIXED32, uint32(33), uint32(66));
892 
893   // Unknown fields.
894   int int32_type = UPB_DESCRIPTOR_TYPE_INT32;
895   int msg_type = UPB_DESCRIPTOR_TYPE_MESSAGE;
896   assert_successful_parse(
897       cat( tag(12345, UPB_WIRE_TYPE_VARINT), varint(2345678) ),
898       "<\n>\n");
899   assert_successful_parse(
900       cat( tag(12345, UPB_WIRE_TYPE_32BIT), uint32(2345678) ),
901       "<\n>\n");
902   assert_successful_parse(
903       cat( tag(12345, UPB_WIRE_TYPE_64BIT), uint64(2345678) ),
904       "<\n>\n");
905   assert_successful_parse(
906       submsg(12345, string("                ")),
907       "<\n>\n");
908 
909   // Unknown field inside a known submessage.
910   assert_successful_parse(
911       submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, submsg(12345, string("   "))),
912       LINE("<")
913       LINE("%u:{")
914       LINE("  <")
915       LINE("  >")
916       LINE("  }")
917       LINE(">"), UPB_DESCRIPTOR_TYPE_MESSAGE);
918 
919   assert_successful_parse(
920       cat (submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, submsg(12345, string("   "))),
921            tag(UPB_DESCRIPTOR_TYPE_INT32, UPB_WIRE_TYPE_VARINT),
922            varint(5)),
923       LINE("<")
924       LINE("%u:{")
925       LINE("  <")
926       LINE("  >")
927       LINE("  }")
928       LINE("%u:5")
929       LINE(">"), UPB_DESCRIPTOR_TYPE_MESSAGE, UPB_DESCRIPTOR_TYPE_INT32);
930 
931   // This triggered a previous bug in the decoder.
932   assert_successful_parse(
933       cat( tag(UPB_DESCRIPTOR_TYPE_SFIXED32, UPB_WIRE_TYPE_VARINT),
934            varint(0) ),
935       "<\n>\n");
936 
937   assert_successful_parse(
938       cat(
939         submsg(UPB_DESCRIPTOR_TYPE_MESSAGE,
940           submsg(UPB_DESCRIPTOR_TYPE_MESSAGE,
941             cat( tag(int32_type, UPB_WIRE_TYPE_VARINT), varint(2345678),
942                  tag(12345, UPB_WIRE_TYPE_VARINT), varint(2345678) ))),
943         tag(int32_type, UPB_WIRE_TYPE_VARINT), varint(22222)),
944       LINE("<")
945       LINE("%u:{")
946       LINE("  <")
947       LINE("  %u:{")
948       LINE("    <")
949       LINE("    %u:2345678")
950       LINE("    >")
951       LINE("    }")
952       LINE("  >")
953       LINE("  }")
954       LINE("%u:22222")
955       LINE(">"), msg_type, msg_type, int32_type, int32_type);
956 
957   assert_successful_parse(
958       cat( tag(UPB_DESCRIPTOR_TYPE_INT32, UPB_WIRE_TYPE_VARINT), varint(1),
959            tag(12345, UPB_WIRE_TYPE_VARINT), varint(2345678) ),
960       LINE("<")
961       LINE("%u:1")
962       LINE(">"), UPB_DESCRIPTOR_TYPE_INT32);
963 
964   // String inside submsg.
965   uint32_t msg_fn = UPB_DESCRIPTOR_TYPE_MESSAGE;
966   assert_successful_parse(
967       submsg(msg_fn,
968              cat ( tag(UPB_DESCRIPTOR_TYPE_STRING, UPB_WIRE_TYPE_DELIMITED),
969                    delim(string("abcde"))
970                  )
971              ),
972       LINE("<")
973       LINE("%u:{")
974       LINE("  <")
975       LINE("  %u:(5)\"abcde")
976       LINE("    %u:\"")
977       LINE("  >")
978       LINE("  }")
979       LINE(">"), msg_fn, UPB_DESCRIPTOR_TYPE_STRING,
980                  UPB_DESCRIPTOR_TYPE_STRING);
981 
982   // Test implicit startseq/endseq.
983   uint32_t repfl_fn = rep_fn(UPB_DESCRIPTOR_TYPE_FLOAT);
984   uint32_t repdb_fn = rep_fn(UPB_DESCRIPTOR_TYPE_DOUBLE);
985   assert_successful_parse(
986       cat( tag(repfl_fn, UPB_WIRE_TYPE_32BIT), flt(33),
987            tag(repdb_fn, UPB_WIRE_TYPE_64BIT), dbl(66) ),
988       LINE("<")
989       LINE("%u:[")
990       LINE("  %u:33")
991       LINE("]")
992       LINE("%u:[")
993       LINE("  %u:66")
994       LINE("]")
995       LINE(">"), repfl_fn, repfl_fn, repdb_fn, repdb_fn);
996 
997   // Submessage tests.
998   assert_successful_parse(
999       submsg(msg_fn, submsg(msg_fn, submsg(msg_fn, string()))),
1000       LINE("<")
1001       LINE("%u:{")
1002       LINE("  <")
1003       LINE("  %u:{")
1004       LINE("    <")
1005       LINE("    %u:{")
1006       LINE("      <")
1007       LINE("      >")
1008       LINE("      }")
1009       LINE("    >")
1010       LINE("    }")
1011       LINE("  >")
1012       LINE("  }")
1013       LINE(">"), msg_fn, msg_fn, msg_fn);
1014 
1015   uint32_t repm_fn = rep_fn(UPB_DESCRIPTOR_TYPE_MESSAGE);
1016   assert_successful_parse(
1017       submsg(repm_fn, submsg(repm_fn, string())),
1018       LINE("<")
1019       LINE("%u:[")
1020       LINE("  %u:{")
1021       LINE("    <")
1022       LINE("    %u:[")
1023       LINE("      %u:{")
1024       LINE("        <")
1025       LINE("        >")
1026       LINE("        }")
1027       LINE("    ]")
1028       LINE("    >")
1029       LINE("    }")
1030       LINE("]")
1031       LINE(">"), repm_fn, repm_fn, repm_fn, repm_fn);
1032 
1033   // Test unknown group.
1034   uint32_t unknown_group_fn = 12321;
1035   assert_successful_parse(
1036       cat( tag(unknown_group_fn, UPB_WIRE_TYPE_START_GROUP),
1037            tag(unknown_group_fn, UPB_WIRE_TYPE_END_GROUP) ),
1038       LINE("<")
1039       LINE(">")
1040   );
1041 
1042   // Test some unknown fields inside an unknown group.
1043   const string unknown_group_with_data =
1044       cat(
1045           tag(unknown_group_fn, UPB_WIRE_TYPE_START_GROUP),
1046           tag(12345, UPB_WIRE_TYPE_VARINT), varint(2345678),
1047           tag(123456789, UPB_WIRE_TYPE_32BIT), uint32(2345678),
1048           tag(123477, UPB_WIRE_TYPE_64BIT), uint64(2345678),
1049           tag(123, UPB_WIRE_TYPE_DELIMITED), varint(0),
1050           tag(unknown_group_fn, UPB_WIRE_TYPE_END_GROUP)
1051          );
1052 
1053   // Nested unknown group with data.
1054   assert_successful_parse(
1055       cat(
1056            tag(unknown_group_fn, UPB_WIRE_TYPE_START_GROUP),
1057            unknown_group_with_data,
1058            tag(unknown_group_fn, UPB_WIRE_TYPE_END_GROUP),
1059            tag(UPB_DESCRIPTOR_TYPE_INT32, UPB_WIRE_TYPE_VARINT), varint(1)
1060          ),
1061       LINE("<")
1062       LINE("%u:1")
1063       LINE(">"),
1064       UPB_DESCRIPTOR_TYPE_INT32
1065   );
1066 
1067   assert_successful_parse(
1068       cat( tag(unknown_group_fn, UPB_WIRE_TYPE_START_GROUP),
1069            tag(unknown_group_fn + 1, UPB_WIRE_TYPE_START_GROUP),
1070            tag(unknown_group_fn + 1, UPB_WIRE_TYPE_END_GROUP),
1071            tag(unknown_group_fn, UPB_WIRE_TYPE_END_GROUP) ),
1072       LINE("<")
1073       LINE(">")
1074   );
1075 
1076   // Staying within the stack limit should work properly.
1077   string buf;
1078   string textbuf;
1079   int total = MAX_NESTING - 1;
1080   for (int i = 0; i < total; i++) {
1081     buf.assign(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, buf));
1082     indentbuf(&textbuf, i);
1083     textbuf.append("<\n");
1084     indentbuf(&textbuf, i);
1085     appendf(&textbuf, "%u:{\n", UPB_DESCRIPTOR_TYPE_MESSAGE);
1086   }
1087   indentbuf(&textbuf, total);
1088   textbuf.append("<\n");
1089   indentbuf(&textbuf, total);
1090   textbuf.append(">\n");
1091   for (int i = 0; i < total; i++) {
1092     indentbuf(&textbuf, total - i - 1);
1093     textbuf.append("  }\n");
1094     indentbuf(&textbuf, total - i - 1);
1095     textbuf.append(">\n");
1096   }
1097   // Have to use run_decoder directly, because we are at max nesting and can't
1098   // afford the extra nesting that assert_successful_parse() will do.
1099   run_decoder(buf, &textbuf);
1100 }
1101 
empty_callback(const void *,upb::Handlers *)1102 void empty_callback(const void* /* closure */, upb::Handlers* /* h_ptr */) {}
1103 
test_emptyhandlers(upb::SymbolTable * symtab)1104 void test_emptyhandlers(upb::SymbolTable* symtab) {
1105   // Create an empty handlers to make sure that the decoder can handle empty
1106   // messages.
1107   HandlerRegisterData handlerdata;
1108   handlerdata.mode = test_mode;
1109 
1110   upb::HandlerCache handler_cache(empty_callback, &handlerdata);
1111   upb::pb::CodeCache pb_code_cache(&handler_cache);
1112 
1113   upb::MessageDefPtr md = upb::MessageDefPtr(Empty_getmsgdef(symtab->ptr()));
1114   global_handlers = handler_cache.Get(md);
1115   global_method = pb_code_cache.Get(md);
1116 
1117   // TODO: also test the case where a message has fields, but the fields are
1118   // submessage fields and have no handlers. This also results in a decoder
1119   // method with no field-handling code.
1120 
1121   // Ensure that the method can run with empty and non-empty input.
1122   string test_unknown_field_msg =
1123     cat(tag(1, UPB_WIRE_TYPE_VARINT), varint(42),
1124         tag(2, UPB_WIRE_TYPE_DELIMITED), delim("My test data"));
1125   const struct {
1126     const char* data;
1127     size_t length;
1128   } testdata[] = {
1129     { "", 0 },
1130     { test_unknown_field_msg.data(), test_unknown_field_msg.size() },
1131     { NULL, 0 },
1132   };
1133   for (int i = 0; testdata[i].data; i++) {
1134     VerboseParserEnvironment env(filter_hash != 0);
1135     upb::Sink sink(global_method.dest_handlers(), &closures[0]);
1136     upb::pb::DecoderPtr decoder =
1137         CreateDecoder(env.arena(), global_method, sink, env.status());
1138     env.ResetBytesSink(decoder.input());
1139     env.Reset(testdata[i].data, testdata[i].length, true, false);
1140     ASSERT(env.Start());
1141     ASSERT(env.ParseBuffer(-1));
1142     ASSERT(env.End());
1143     ASSERT(env.CheckConsistency());
1144   }
1145 }
1146 
run_tests()1147 void run_tests() {
1148   HandlerRegisterData handlerdata;
1149   handlerdata.mode = test_mode;
1150 
1151   upb::SymbolTable symtab;
1152   upb::HandlerCache handler_cache(callback, &handlerdata);
1153   upb::pb::CodeCache pb_code_cache(&handler_cache);
1154 
1155   upb::MessageDefPtr md(DecoderTest_getmsgdef(symtab.ptr()));
1156   global_handlers = handler_cache.Get(md);
1157   global_method = pb_code_cache.Get(md);
1158   completed = 0;
1159 
1160   test_invalid();
1161   test_valid();
1162 
1163   test_emptyhandlers(&symtab);
1164 }
1165 
1166 extern "C" {
1167 
run_tests(int argc,char * argv[])1168 int run_tests(int argc, char *argv[]) {
1169   if (argc > 1)
1170     filter_hash = (uint32_t)strtol(argv[1], NULL, 16);
1171   for (int i = 0; i < MAX_NESTING; i++) {
1172     closures[i] = i;
1173   }
1174 
1175   // Count tests.
1176   count = &total;
1177   total = 0;
1178   test_mode = COUNT_ONLY;
1179   run_tests();
1180   count = &completed;
1181 
1182   total *= 2;  // NO_HANDLERS, ALL_HANDLERS.
1183 
1184   test_mode = NO_HANDLERS;
1185   run_tests();
1186 
1187   test_mode = ALL_HANDLERS;
1188   run_tests();
1189 
1190   printf("All tests passed, %d assertions.\n", num_assertions);
1191   return 0;
1192 }
1193 
1194 }
1195