1 /*
2  * libwebsockets - small server side websockets and web server implementation
3  *
4  * Copyright (C) 2010 - 2019 Andy Green <andy@warmcat.com>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 
25 /* Do not treat - as a terminal character, so "my-token" is one token */
26 #define LWS_TOKENIZE_F_MINUS_NONTERM	(1 << 0)
27 /* Separately report aggregate colon-delimited tokens */
28 #define LWS_TOKENIZE_F_AGG_COLON	(1 << 1)
29 /* Enforce sequencing for a simple token , token , token ... list */
30 #define LWS_TOKENIZE_F_COMMA_SEP_LIST	(1 << 2)
31 /* Allow more characters in the tokens and less delimiters... default is
32  * only alphanumeric + underscore in tokens */
33 #define LWS_TOKENIZE_F_RFC7230_DELIMS	(1 << 3)
34 /* Do not treat . as a terminal character, so "warmcat.com" is one token */
35 #define LWS_TOKENIZE_F_DOT_NONTERM	(1 << 4)
36 /* If something starts looking like a float, like 1.2, force to be string token.
37  * This lets you receive dotted-quads like 192.168.0.1 as string tokens, and
38  * avoids illegal float format detection like 1.myserver.com */
39 #define LWS_TOKENIZE_F_NO_FLOATS	(1 << 5)
40 /* Instead of LWS_TOKZE_INTEGER, report integers as any other string token */
41 #define LWS_TOKENIZE_F_NO_INTEGERS	(1 << 6)
42 /* # makes the rest of the line a comment */
43 #define LWS_TOKENIZE_F_HASH_COMMENT	(1 << 7)
44 /* Do not treat / as a terminal character, so "multipart/related" is one token */
45 #define LWS_TOKENIZE_F_SLASH_NONTERM	(1 << 8)
46 
47 typedef enum {
48 
49 	LWS_TOKZE_ERRS			=  5, /* the number of errors defined */
50 
51 	LWS_TOKZE_ERR_BROKEN_UTF8	= -5,	/* malformed or partial utf8 */
52 	LWS_TOKZE_ERR_UNTERM_STRING	= -4,	/* ended while we were in "" */
53 	LWS_TOKZE_ERR_MALFORMED_FLOAT	= -3,	/* like 0..1 or 0.1.1 */
54 	LWS_TOKZE_ERR_NUM_ON_LHS	= -2,	/* like 123= or 0.1= */
55 	LWS_TOKZE_ERR_COMMA_LIST	= -1,	/* like ",tok", or, "tok,," */
56 
57 	LWS_TOKZE_ENDED = 0,		/* no more content */
58 
59 	/* Note: results have ordinal 1+, EOT is 0 and errors are < 0 */
60 
61 	LWS_TOKZE_DELIMITER,		/* a delimiter appeared */
62 	LWS_TOKZE_TOKEN,		/* a token appeared */
63 	LWS_TOKZE_INTEGER,		/* an integer appeared */
64 	LWS_TOKZE_FLOAT,		/* a float appeared */
65 	LWS_TOKZE_TOKEN_NAME_EQUALS,	/* token [whitespace] = */
66 	LWS_TOKZE_TOKEN_NAME_COLON,	/* token [whitespace] : (only with
67 					   LWS_TOKENIZE_F_AGG_COLON flag) */
68 	LWS_TOKZE_QUOTED_STRING,	/* "*", where * may have any char */
69 
70 } lws_tokenize_elem;
71 
72 /*
73  * helper enums to allow caller to enforce legal delimiter sequencing, eg
74  * disallow "token,,token", "token,", and ",token"
75  */
76 
77 enum lws_tokenize_delimiter_tracking {
78 	LWSTZ_DT_NEED_FIRST_CONTENT,
79 	LWSTZ_DT_NEED_DELIM,
80 	LWSTZ_DT_NEED_NEXT_CONTENT,
81 };
82 
83 typedef struct lws_tokenize {
84 	const char *start; /**< set to the start of the string to tokenize */
85 	const char *token; /**< the start of an identified token or delimiter */
86 	size_t len;	/**< set to the length of the string to tokenize */
87 	size_t token_len;	/**< the length of the identied token or delimiter */
88 
89 	uint16_t flags;	/**< optional LWS_TOKENIZE_F_ flags, or 0 */
90 	uint8_t delim;
91 
92 	int8_t e; /**< convenient for storing lws_tokenize return */
93 } lws_tokenize_t;
94 
95 /**
96  * lws_tokenize() - breaks down a string into tokens and delimiters in-place
97  *
98  * \param ts: the lws_tokenize struct to init
99  * \param start: the string to tokenize
100  * \param flags: LWS_TOKENIZE_F_ option flags
101  *
102  * This initializes the tokenize struct to point to the given string, and
103  * sets the length to 2GiB - 1 (so there must be a terminating NUL)... you can
104  * override this requirement by setting ts.len yourself before using it.
105  *
106  * .delim is also initialized to LWSTZ_DT_NEED_FIRST_CONTENT.
107  */
108 
109 LWS_VISIBLE LWS_EXTERN void
110 lws_tokenize_init(struct lws_tokenize *ts, const char *start, int flags);
111 
112 /**
113  * lws_tokenize() - breaks down a string into tokens and delimiters in-place
114  *
115  * \param ts: the lws_tokenize struct with information and state on what to do
116  *
117  * The \p ts struct should have its start, len and flags members initialized to
118  * reflect the string to be tokenized and any options.
119  *
120  * Then `lws_tokenize()` may be called repeatedly on the struct, returning one
121  * of `lws_tokenize_elem` each time, and with the struct's `token` and
122  * `token_len` members set to describe the content of the delimiter or token
123  * payload each time.
124  *
125  * There are no allocations during the process.
126  *
127  * returns lws_tokenize_elem that was identified (LWS_TOKZE_ENDED means reached
128  * the end of the string).
129  */
130 
131 LWS_VISIBLE LWS_EXTERN lws_tokenize_elem
132 lws_tokenize(struct lws_tokenize *ts);
133 
134 /**
135  * lws_tokenize_cstr() - copy token string to NUL-terminated buffer
136  *
137  * \param ts: pointer to lws_tokenize struct to operate on
138  * \param str: destination buffer
139  * \pparam max: bytes in destination buffer
140  *
141  * returns 0 if OK or nonzero if the string + NUL won't fit.
142  */
143 
144 LWS_VISIBLE LWS_EXTERN int
145 lws_tokenize_cstr(struct lws_tokenize *ts, char *str, size_t max);
146 
147 
148 /*
149  * lws_strexp: flexible string expansion helper api
150  *
151  * This stateful helper can handle multiple separate input chunks and multiple
152  * output buffer loads with arbitrary boundaries between literals and expanded
153  * symbols.  This allows it to handle fragmented input as well as arbitrarily
154  * long symbol expansions that are bigger than the output buffer itself.
155  *
156  * A user callback is used to convert symbol names to the symbol value.
157  *
158  * A single byte buffer for input and another for output can process any
159  * length substitution then.  The state object is around 64 bytes on a 64-bit
160  * system and it only uses 8 bytes stack.
161  */
162 
163 
164 typedef int (*lws_strexp_expand_cb)(void *priv, const char *name, char *out,
165 				    size_t *pos, size_t olen, size_t *exp_ofs);
166 
167 typedef struct lws_strexp {
168 	char			name[32];
169 	lws_strexp_expand_cb	cb;
170 	void			*priv;
171 	char			*out;
172 	size_t			olen;
173 	size_t			pos;
174 
175 	size_t			exp_ofs;
176 
177 	uint8_t			name_pos;
178 	char			state;
179 } lws_strexp_t;
180 
181 enum {
182 	LSTRX_DONE,			/* it completed OK */
183 	LSTRX_FILLED_OUT,		/* out buf filled and needs resetting */
184 	LSTRX_FATAL_NAME_TOO_LONG = -1,	/* fatal */
185 	LSTRX_FATAL_NAME_UNKNOWN  = -2,
186 };
187 
188 
189 /**
190  * lws_strexp_init() - initialize an lws_strexp_t for use
191  *
192  * \p exp: the exp object to init
193  * \p priv: the user's object pointer to pass to callback
194  * \p cb: the callback to expand named objects
195  * \p out: the start of the output buffer
196  * \p olen: the length of the output buffer in bytes
197  *
198  * Prepares an lws_strexp_t for use and sets the initial output buffer
199  */
200 LWS_VISIBLE LWS_EXTERN void
201 lws_strexp_init(lws_strexp_t *exp, void *priv, lws_strexp_expand_cb cb,
202 		char *out, size_t olen);
203 
204 /**
205  * lws_strexp_reset_out() - reset the output buffer on an existing strexp
206  *
207  * \p exp: the exp object to init
208  * \p out: the start of the output buffer
209  * \p olen: the length of the output buffer in bytes
210  *
211  * Provides a new output buffer for lws_strexp_expand() to continue to write
212  * into.  It can be the same as the old one if it has been copied out or used.
213  * The position of the next write will be reset to the start of the given buf.
214  */
215 LWS_VISIBLE LWS_EXTERN void
216 lws_strexp_reset_out(lws_strexp_t *exp, char *out, size_t olen);
217 
218 /**
219  * lws_strexp_expand() - copy / expand a string into the output buffer
220  *
221  * \p exp: the exp object for the copy / expansion
222  * \p in: the start of the next input data
223  * \p len: the length of the input data
224  * \p pused_in: pointer to write the amount of input used
225  * \p pused_out: pointer to write the amount of output used
226  *
227  * Copies in to the output buffer set in exp, expanding any ${name} tokens using
228  * the callback.  \p *pused_in is set to the number of input chars used and
229  * \p *pused_out the number of output characters used
230  *
231  * May return LSTRX_FILLED_OUT early with *pused < len if the output buffer is
232  * filled.  Handle the output buffer and reset it with lws_strexp_reset_out()
233  * before calling again with adjusted in / len to continue.
234  *
235  * In the case of large expansions, the expansion itself may fill the output
236  * buffer, in which case the expansion callback returns the LSTRX_FILLED_OUT
237  * and will be called again to continue with its *exp_ofs parameter set
238  * appropriately.
239  */
240 LWS_VISIBLE LWS_EXTERN int
241 lws_strexp_expand(lws_strexp_t *exp, const char *in, size_t len,
242 		  size_t *pused_in, size_t *pused_out);
243 
244