1 /*
2 ** Internal implementation details of the decoder that are shared between
3 ** decode.c and decode_fast.c.
4 */
5 
6 #ifndef UPB_DECODE_INT_H_
7 #define UPB_DECODE_INT_H_
8 
9 #include <setjmp.h>
10 
11 #include "upb/msg.h"
12 #include "upb/upb.int.h"
13 
14 /* Must be last. */
15 #include "upb/port_def.inc"
16 
17 #define DECODE_NOGROUP -1
18 
19 typedef struct upb_decstate {
20   const char *end;         /* Can read up to 16 bytes slop beyond this. */
21   const char *limit_ptr;   /* = end + UPB_MIN(limit, 0) */
22   upb_msg *unknown_msg;    /* If non-NULL, add unknown data at buffer flip. */
23   const char *unknown;     /* Start of unknown data. */
24   int limit;               /* Submessage limit relative to end. */
25   int depth;
26   uint32_t end_group;   /* field number of END_GROUP tag, else DECODE_NOGROUP */
27   bool alias;
28   char patch[32];
29   upb_arena arena;
30   jmp_buf err;
31 } upb_decstate;
32 
33 /* Error function that will abort decoding with longjmp(). We can't declare this
34  * UPB_NORETURN, even though it is appropriate, because if we do then compilers
35  * will "helpfully" refuse to tailcall to it
36  * (see: https://stackoverflow.com/a/55657013), which will defeat a major goal
37  * of our optimizations. That is also why we must declare it in a separate file,
38  * otherwise the compiler will see that it calls longjmp() and deduce that it is
39  * noreturn. */
40 const char *fastdecode_err(upb_decstate *d);
41 
42 extern const uint8_t upb_utf8_offsets[];
43 
44 UPB_INLINE
decode_verifyutf8_inl(const char * buf,int len)45 bool decode_verifyutf8_inl(const char *buf, int len) {
46   int i, j;
47   uint8_t offset;
48 
49   i = 0;
50   while (i < len) {
51     offset = upb_utf8_offsets[(uint8_t)buf[i]];
52     if (offset == 0 || i + offset > len) {
53       return false;
54     }
55     for (j = i + 1; j < i + offset; j++) {
56       if ((buf[j] & 0xc0) != 0x80) {
57         return false;
58       }
59     }
60     i += offset;
61   }
62   return i == len;
63 }
64 
65 /* x86-64 pointers always have the high 16 bits matching. So we can shift
66  * left 8 and right 8 without loss of information. */
decode_totable(const upb_msglayout * tablep)67 UPB_INLINE intptr_t decode_totable(const upb_msglayout *tablep) {
68   return ((intptr_t)tablep << 8) | tablep->table_mask;
69 }
70 
decode_totablep(intptr_t table)71 UPB_INLINE const upb_msglayout *decode_totablep(intptr_t table) {
72   return (const upb_msglayout*)(table >> 8);
73 }
74 
75 UPB_INLINE
decode_isdonefallback_inl(upb_decstate * d,const char * ptr,int overrun)76 const char *decode_isdonefallback_inl(upb_decstate *d, const char *ptr,
77                                       int overrun) {
78   if (overrun < d->limit) {
79     /* Need to copy remaining data into patch buffer. */
80     UPB_ASSERT(overrun < 16);
81     if (d->unknown_msg) {
82       if (!_upb_msg_addunknown(d->unknown_msg, d->unknown, ptr - d->unknown,
83                                &d->arena)) {
84         return NULL;
85       }
86       d->unknown = &d->patch[0] + overrun;
87     }
88     memset(d->patch + 16, 0, 16);
89     memcpy(d->patch, d->end, 16);
90     ptr = &d->patch[0] + overrun;
91     d->end = &d->patch[16];
92     d->limit -= 16;
93     d->limit_ptr = d->end + d->limit;
94     d->alias = false;
95     UPB_ASSERT(ptr < d->limit_ptr);
96     return ptr;
97   } else {
98     return NULL;
99   }
100 }
101 
102 const char *decode_isdonefallback(upb_decstate *d, const char *ptr,
103                                   int overrun);
104 
105 UPB_INLINE
decode_isdone(upb_decstate * d,const char ** ptr)106 bool decode_isdone(upb_decstate *d, const char **ptr) {
107   int overrun = *ptr - d->end;
108   if (UPB_LIKELY(*ptr < d->limit_ptr)) {
109     return false;
110   } else if (UPB_LIKELY(overrun == d->limit)) {
111     return true;
112   } else {
113     *ptr = decode_isdonefallback(d, *ptr, overrun);
114     return false;
115   }
116 }
117 
118 UPB_INLINE
fastdecode_tagdispatch(upb_decstate * d,const char * ptr,upb_msg * msg,intptr_t table,uint64_t hasbits,uint32_t tag)119 const char *fastdecode_tagdispatch(upb_decstate *d, const char *ptr,
120                                     upb_msg *msg, intptr_t table,
121                                     uint64_t hasbits, uint32_t tag) {
122   const upb_msglayout *table_p = decode_totablep(table);
123   uint8_t mask = table;
124   uint64_t data;
125   size_t idx = tag & mask;
126   UPB_ASSUME((idx & 7) == 0);
127   idx >>= 3;
128   data = table_p->fasttable[idx].field_data ^ tag;
129   return table_p->fasttable[idx].field_parser(d, ptr, msg, table, hasbits, data);
130 }
131 
fastdecode_loadtag(const char * ptr)132 UPB_INLINE uint32_t fastdecode_loadtag(const char* ptr) {
133   uint16_t tag;
134   memcpy(&tag, ptr, 2);
135   return tag;
136 }
137 
decode_checklimit(upb_decstate * d)138 UPB_INLINE void decode_checklimit(upb_decstate *d) {
139   UPB_ASSERT(d->limit_ptr == d->end + UPB_MIN(0, d->limit));
140 }
141 
decode_pushlimit(upb_decstate * d,const char * ptr,int size)142 UPB_INLINE int decode_pushlimit(upb_decstate *d, const char *ptr, int size) {
143   int limit = size + (int)(ptr - d->end);
144   int delta = d->limit - limit;
145   decode_checklimit(d);
146   d->limit = limit;
147   d->limit_ptr = d->end + UPB_MIN(0, limit);
148   decode_checklimit(d);
149   return delta;
150 }
151 
decode_poplimit(upb_decstate * d,const char * ptr,int saved_delta)152 UPB_INLINE void decode_poplimit(upb_decstate *d, const char *ptr,
153                                 int saved_delta) {
154   UPB_ASSERT(ptr - d->end == d->limit);
155   decode_checklimit(d);
156   d->limit += saved_delta;
157   d->limit_ptr = d->end + UPB_MIN(0, d->limit);
158   decode_checklimit(d);
159 }
160 
161 #include "upb/port_undef.inc"
162 
163 #endif  /* UPB_DECODE_INT_H_ */
164