1 /*
2 ** A number of routines for varint manipulation (we keep them all around to
3 ** have multiple approaches available for benchmarking).
4 */
5 
6 #ifndef UPB_VARINT_DECODER_H_
7 #define UPB_VARINT_DECODER_H_
8 
9 #include <assert.h>
10 #include <stdint.h>
11 #include <string.h>
12 #include "upb/upb.h"
13 
14 #include "upb/port_def.inc"
15 
16 #ifdef __cplusplus
17 extern "C" {
18 #endif
19 
20 #define UPB_MAX_WIRE_TYPE 5
21 
22 /* The maximum number of bytes that it takes to encode a 64-bit varint. */
23 #define UPB_PB_VARINT_MAX_LEN 10
24 
25 /* Array of the "native" (ie. non-packed-repeated) wire type for the given a
26  * descriptor type (upb_descriptortype_t). */
27 extern const uint8_t upb_pb_native_wire_types[];
28 
byteswap64(uint64_t val)29 UPB_INLINE uint64_t byteswap64(uint64_t val) {
30   uint64_t byte = 0xff;
31   return (val & (byte << 56) >> 56)
32     | (val & (byte << 48) >> 40)
33     | (val & (byte << 40) >> 24)
34     | (val & (byte << 32) >> 8)
35     | (val & (byte << 24) << 8)
36     | (val & (byte << 16) << 24)
37     | (val & (byte <<  8) << 40)
38     | (val & (byte <<  0) << 56);
39 }
40 
41 /* Zig-zag encoding/decoding **************************************************/
42 
upb_zzdec_32(uint64_t _n)43 UPB_INLINE int32_t upb_zzdec_32(uint64_t _n) {
44   uint32_t n = (uint32_t)_n;
45   return (n >> 1) ^ -(int32_t)(n & 1);
46 }
upb_zzdec_64(uint64_t n)47 UPB_INLINE int64_t upb_zzdec_64(uint64_t n) {
48   return (n >> 1) ^ -(int64_t)(n & 1);
49 }
upb_zzenc_32(int32_t n)50 UPB_INLINE uint32_t upb_zzenc_32(int32_t n) {
51   return ((uint32_t)n << 1) ^ (n >> 31);
52 }
upb_zzenc_64(int64_t n)53 UPB_INLINE uint64_t upb_zzenc_64(int64_t n) {
54   return ((uint64_t)n << 1) ^ (n >> 63);
55 }
56 
57 /* Decoding *******************************************************************/
58 
59 /* All decoding functions return this struct by value. */
60 typedef struct {
61   const char *p;  /* NULL if the varint was unterminated. */
62   uint64_t val;
63 } upb_decoderet;
64 
upb_decoderet_make(const char * p,uint64_t val)65 UPB_INLINE upb_decoderet upb_decoderet_make(const char *p, uint64_t val) {
66   upb_decoderet ret;
67   ret.p = p;
68   ret.val = val;
69   return ret;
70 }
71 
72 upb_decoderet upb_vdecode_max8_branch32(upb_decoderet r);
73 upb_decoderet upb_vdecode_max8_branch64(upb_decoderet r);
74 
75 /* Template for a function that checks the first two bytes with branching
76  * and dispatches 2-10 bytes with a separate function.  Note that this may read
77  * up to 10 bytes, so it must not be used unless there are at least ten bytes
78  * left in the buffer! */
79 #define UPB_VARINT_DECODER_CHECK2(name, decode_max8_function)                  \
80 UPB_INLINE upb_decoderet upb_vdecode_check2_ ## name(const char *_p) {         \
81   uint8_t *p = (uint8_t*)_p;                                                   \
82   upb_decoderet r;                                                             \
83   if ((*p & 0x80) == 0) {                                                      \
84   /* Common case: one-byte varint. */                                          \
85     return upb_decoderet_make(_p + 1, *p & 0x7fU);                             \
86   }                                                                            \
87   r = upb_decoderet_make(_p + 2, (*p & 0x7fU) | ((*(p + 1) & 0x7fU) << 7));    \
88   if ((*(p + 1) & 0x80) == 0) {                                                \
89     /* Two-byte varint. */                                                     \
90     return r;                                                                  \
91   }                                                                            \
92   /* Longer varint, fallback to out-of-line function. */                       \
93   return decode_max8_function(r);                                              \
94 }
95 
UPB_VARINT_DECODER_CHECK2(branch32,upb_vdecode_max8_branch32)96 UPB_VARINT_DECODER_CHECK2(branch32, upb_vdecode_max8_branch32)
97 UPB_VARINT_DECODER_CHECK2(branch64, upb_vdecode_max8_branch64)
98 #undef UPB_VARINT_DECODER_CHECK2
99 
100 /* Our canonical functions for decoding varints, based on the currently
101  * favored best-performing implementations. */
102 UPB_INLINE upb_decoderet upb_vdecode_fast(const char *p) {
103   if (sizeof(long) == 8)
104     return upb_vdecode_check2_branch64(p);
105   else
106     return upb_vdecode_check2_branch32(p);
107 }
108 
109 
110 /* Encoding *******************************************************************/
111 
upb_value_size(uint64_t val)112 UPB_INLINE int upb_value_size(uint64_t val) {
113 #ifdef __GNUC__
114   /* 0-based, undef if val == 0. */
115   int high_bit = val ? 63 - __builtin_clzll(val) : 0;
116 #else
117   int high_bit = 0;
118   uint64_t tmp = val;
119   while(tmp >>= 1) high_bit++;
120 #endif
121   return val == 0 ? 1 : high_bit / 8 + 1;
122 }
123 
124 /* Encodes a 64-bit varint into buf (which must be >=UPB_PB_VARINT_MAX_LEN
125  * bytes long), returning how many bytes were used.
126  *
127  * TODO: benchmark and optimize if necessary. */
upb_vencode64(uint64_t val,char * buf)128 UPB_INLINE size_t upb_vencode64(uint64_t val, char *buf) {
129   size_t i;
130   if (val == 0) { buf[0] = 0; return 1; }
131   i = 0;
132   while (val) {
133     uint8_t byte = val & 0x7fU;
134     val >>= 7;
135     if (val) byte |= 0x80U;
136     buf[i++] = byte;
137   }
138   return i;
139 }
140 
upb_varint_size(uint64_t val)141 UPB_INLINE size_t upb_varint_size(uint64_t val) {
142   char buf[UPB_PB_VARINT_MAX_LEN];
143   return upb_vencode64(val, buf);
144 }
145 
146 /* Encodes a 32-bit varint, *not* sign-extended. */
upb_vencode32(uint32_t val)147 UPB_INLINE uint64_t upb_vencode32(uint32_t val) {
148   char buf[UPB_PB_VARINT_MAX_LEN];
149   size_t bytes = upb_vencode64(val, buf);
150   uint64_t ret = 0;
151   UPB_ASSERT(bytes <= 5);
152   memcpy(&ret, buf, bytes);
153   ret = _upb_be_swap64(ret);
154   UPB_ASSERT(ret <= 0xffffffffffU);
155   return ret;
156 }
157 
158 #ifdef __cplusplus
159 }  /* extern "C" */
160 #endif
161 
162 #include "upb/port_undef.inc"
163 
164 #endif  /* UPB_VARINT_DECODER_H_ */
165