1 #ifndef MSGPACK_H
2 #define MSGPACK_H
3 
4 #include <functional>
5 
6 namespace msgpack {
7 
8 // The message pack format is dynamically typed, schema-less. Format is:
9 // message: [type][header][payload]
10 // where type is one byte, header length is a fixed length function of type
11 // payload is zero to N bytes, with the length encoded in [type][header]
12 
13 // Scalar fields include boolean, signed integer, float, string etc
14 // Composite types are sequences of messages
15 // Array field is [header][element][element]...
16 // Map field is [header][key][value][key][value]...
17 
18 // Multibyte integer fields are big endian encoded
19 // The map key can be any message type
20 // Maps may contain duplicate keys
21 // Data is not uniquely encoded, e.g. integer "8" may be stored as one byte or
22 // in as many as nine, as signed or unsigned. Implementation defined.
23 // Similarly "foo" may embed the length in the type field or in multiple bytes
24 
25 // This parser is structured as an iterator over a sequence of bytes.
26 // It calls a user provided function on each message in order to extract fields
27 // The default implementation for each scalar type is to do nothing. For map or
28 // arrays, the default implementation returns just after that message to support
29 // iterating to the next message, but otherwise has no effect.
30 
31 struct byte_range {
32   const unsigned char *start;
33   const unsigned char *end;
34 };
35 
36 const unsigned char *skip_next_message(const unsigned char *start,
37                                        const unsigned char *end);
38 
39 template <typename Derived> class functors_defaults {
40 public:
cb_string(size_t N,const unsigned char * str)41   void cb_string(size_t N, const unsigned char *str) {
42     derived().handle_string(N, str);
43   }
cb_boolean(bool x)44   void cb_boolean(bool x) { derived().handle_boolean(x); }
cb_signed(int64_t x)45   void cb_signed(int64_t x) { derived().handle_signed(x); }
cb_unsigned(uint64_t x)46   void cb_unsigned(uint64_t x) { derived().handle_unsigned(x); }
cb_array_elements(byte_range bytes)47   void cb_array_elements(byte_range bytes) {
48     derived().handle_array_elements(bytes);
49   }
cb_map_elements(byte_range key,byte_range value)50   void cb_map_elements(byte_range key, byte_range value) {
51     derived().handle_map_elements(key, value);
52   }
cb_array(uint64_t N,byte_range bytes)53   const unsigned char *cb_array(uint64_t N, byte_range bytes) {
54     return derived().handle_array(N, bytes);
55   }
cb_map(uint64_t N,byte_range bytes)56   const unsigned char *cb_map(uint64_t N, byte_range bytes) {
57     return derived().handle_map(N, bytes);
58   }
59 
60 private:
derived()61   Derived &derived() { return *static_cast<Derived *>(this); }
62 
63   // Default implementations for scalar ops are no-ops
handle_string(size_t,const unsigned char *)64   void handle_string(size_t, const unsigned char *) {}
handle_boolean(bool)65   void handle_boolean(bool) {}
handle_signed(int64_t)66   void handle_signed(int64_t) {}
handle_unsigned(uint64_t)67   void handle_unsigned(uint64_t) {}
handle_array_elements(byte_range)68   void handle_array_elements(byte_range) {}
handle_map_elements(byte_range,byte_range)69   void handle_map_elements(byte_range, byte_range) {}
70 
71   // Default implementation for sequences is to skip over the messages
handle_array(uint64_t N,byte_range bytes)72   const unsigned char *handle_array(uint64_t N, byte_range bytes) {
73     for (uint64_t i = 0; i < N; i++) {
74       const unsigned char *next = skip_next_message(bytes.start, bytes.end);
75       if (!next) {
76         return nullptr;
77       }
78       cb_array_elements(bytes);
79       bytes.start = next;
80     }
81     return bytes.start;
82   }
handle_map(uint64_t N,byte_range bytes)83   const unsigned char *handle_map(uint64_t N, byte_range bytes) {
84     for (uint64_t i = 0; i < N; i++) {
85       const unsigned char *start_key = bytes.start;
86       const unsigned char *end_key = skip_next_message(start_key, bytes.end);
87       if (!end_key) {
88         return nullptr;
89       }
90       const unsigned char *start_value = end_key;
91       const unsigned char *end_value =
92           skip_next_message(start_value, bytes.end);
93       if (!end_value) {
94         return nullptr;
95       }
96       cb_map_elements({start_key, end_key}, {start_value, end_value});
97       bytes.start = end_value;
98     }
99     return bytes.start;
100   }
101 };
102 
103 typedef enum : uint8_t {
104 #define X(NAME, WIDTH, PAYLOAD, LOWER, UPPER) NAME,
105 #include "msgpack.def"
106 #undef X
107 } type;
108 
109 [[noreturn]] void internal_error();
110 type parse_type(unsigned char x);
111 unsigned bytes_used_fixed(type ty);
112 
113 typedef uint64_t (*payload_info_t)(const unsigned char *);
114 payload_info_t payload_info(msgpack::type ty);
115 
116 template <typename T, typename R> R bitcast(T x);
117 
118 template <typename F, msgpack::type ty>
handle_msgpack_given_type(byte_range bytes,F f)119 const unsigned char *handle_msgpack_given_type(byte_range bytes, F f) {
120   const unsigned char *start = bytes.start;
121   const unsigned char *end = bytes.end;
122   const uint64_t available = end - start;
123   assert(available != 0);
124   assert(ty == parse_type(*start));
125 
126   const uint64_t bytes_used = bytes_used_fixed(ty);
127   if (available < bytes_used) {
128     return 0;
129   }
130   const uint64_t available_post_header = available - bytes_used;
131 
132   const payload_info_t info = payload_info(ty);
133   const uint64_t N = info(start);
134 
135   switch (ty) {
136   case msgpack::t:
137   case msgpack::f: {
138     // t is 0b11000010, f is 0b11000011, masked with 0x1
139     f.cb_boolean(N);
140     return start + bytes_used;
141   }
142 
143   case msgpack::posfixint:
144   case msgpack::uint8:
145   case msgpack::uint16:
146   case msgpack::uint32:
147   case msgpack::uint64: {
148     f.cb_unsigned(N);
149     return start + bytes_used;
150   }
151 
152   case msgpack::negfixint:
153   case msgpack::int8:
154   case msgpack::int16:
155   case msgpack::int32:
156   case msgpack::int64: {
157     f.cb_signed(bitcast<uint64_t, int64_t>(N));
158     return start + bytes_used;
159   }
160 
161   case msgpack::fixstr:
162   case msgpack::str8:
163   case msgpack::str16:
164   case msgpack::str32: {
165     if (available_post_header < N) {
166       return 0;
167     } else {
168       f.cb_string(N, start + bytes_used);
169       return start + bytes_used + N;
170     }
171   }
172 
173   case msgpack::fixarray:
174   case msgpack::array16:
175   case msgpack::array32: {
176     return f.cb_array(N, {start + bytes_used, end});
177   }
178 
179   case msgpack::fixmap:
180   case msgpack::map16:
181   case msgpack::map32: {
182     return f.cb_map(N, {start + bytes_used, end});
183   }
184 
185   case msgpack::nil:
186   case msgpack::bin8:
187   case msgpack::bin16:
188   case msgpack::bin32:
189   case msgpack::float32:
190   case msgpack::float64:
191   case msgpack::ext8:
192   case msgpack::ext16:
193   case msgpack::ext32:
194   case msgpack::fixext1:
195   case msgpack::fixext2:
196   case msgpack::fixext4:
197   case msgpack::fixext8:
198   case msgpack::fixext16:
199   case msgpack::never_used: {
200     if (available_post_header < N) {
201       return 0;
202     }
203     return start + bytes_used + N;
204   }
205   }
206   internal_error();
207 }
208 
209 template <typename F>
handle_msgpack(byte_range bytes,F f)210 const unsigned char *handle_msgpack(byte_range bytes, F f) {
211   const unsigned char *start = bytes.start;
212   const unsigned char *end = bytes.end;
213   const uint64_t available = end - start;
214   if (available == 0) {
215     return 0;
216   }
217   const type ty = parse_type(*start);
218 
219   switch (ty) {
220 #define X(NAME, WIDTH, PAYLOAD, LOWER, UPPER)                                  \
221   case msgpack::NAME:                                                          \
222     return handle_msgpack_given_type<F, msgpack::NAME>(bytes, f);
223 #include "msgpack.def"
224 #undef X
225   }
226 
227   internal_error();
228 }
229 
230 bool message_is_string(byte_range bytes, const char *str);
231 
foronly_string(byte_range bytes,C callback)232 template <typename C> void foronly_string(byte_range bytes, C callback) {
233   struct inner : functors_defaults<inner> {
234     inner(C &cb) : cb(cb) {}
235     C &cb;
236     void handle_string(size_t N, const unsigned char *str) { cb(N, str); }
237   };
238   handle_msgpack<inner>(bytes, {callback});
239 }
240 
foronly_unsigned(byte_range bytes,C callback)241 template <typename C> void foronly_unsigned(byte_range bytes, C callback) {
242   struct inner : functors_defaults<inner> {
243     inner(C &cb) : cb(cb) {}
244     C &cb;
245     void handle_unsigned(uint64_t x) { cb(x); }
246   };
247   handle_msgpack<inner>(bytes, {callback});
248 }
249 
foreach_array(byte_range bytes,C callback)250 template <typename C> void foreach_array(byte_range bytes, C callback) {
251   struct inner : functors_defaults<inner> {
252     inner(C &cb) : cb(cb) {}
253     C &cb;
254     void handle_array_elements(byte_range element) { cb(element); }
255   };
256   handle_msgpack<inner>(bytes, {callback});
257 }
258 
foreach_map(byte_range bytes,C callback)259 template <typename C> void foreach_map(byte_range bytes, C callback) {
260   struct inner : functors_defaults<inner> {
261     inner(C &cb) : cb(cb) {}
262     C &cb;
263     void handle_map_elements(byte_range key, byte_range value) {
264       cb(key, value);
265     }
266   };
267   handle_msgpack<inner>(bytes, {callback});
268 }
269 
270 // Crude approximation to json
271 void dump(byte_range);
272 
273 } // namespace msgpack
274 
275 #endif
276