1 /**
2  * Copyright (c) 2019, The Linux Foundation. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are
6  * met:
7  *    * Redistributions of source code must retain the above copyright
8  *      notice, this list of conditions and the following disclaimer.
9  *    * Redistributions in binary form must reproduce the above
10  *      copyright notice, this list of conditions and the following
11  *      disclaimer in the documentation and/or other materials provided
12  *      with the distribution.
13  *    * Neither the name of The Linux Foundation nor the names of its
14  *      contributors may be used to endorse or promote products derived
15  *      from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
18  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
21  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
24  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
26  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
27  * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 #ifndef SBUF_PARSER_H
31 #define SBUF_PARSER_H
32 
33 #include "sbuf.h"
34 
35 /**
36  * Greedy Recursive Descent Parser in C
37  *
38  * Stop using strstr or regular expressions.  This simple Recursive Descent Parser can be
39  * used to handle complex grammars.
40  *
41  * For example:
42  *   parsing a query string form a uri
43  *   input: "file:///foo/bar_far.so.1?_blah1&_bar=barval5&_barfar"
44  *   expected output:
45  *      parsed query: _blah1 =
46  *      parsed query: _bar = barval5
47  *      parsed query: _barfar =
48  *
49  *   static int qmark(struct sbuf *buf) {
50  *      return sbuf_char(buf, '?');
51  *   }
52  *   static int notandoreq(struct sbuf *buf) {
53  *      return sbuf_notchars(buf, "&=");
54  *   }
55  *   static int notand(struct sbuf *buf) {
56  *      return sbuf_notchar(buf, '&');
57  *   }
58  *
59  *   const char *name;
60  *   int nameLen;
61  *   const char *value;
62  *   int valueLen;
63  *   const char *data = "file:///foo/bar_far.so.1?_blah1&_bar=barval5&_barfar";
64  *
65  *   //initialize
66  *   sbuf_parser_init(&buf, data, strlen(data));
67  *
68  *   //parse until question mark
69  *   assert(sbuf_until(&buf, sbuf_any, qmark));
70  *
71  *   //parse each query
72  *   while(!sbuf_end(&buf)) {
73  *      //record where the name starts
74  *      name = sbuf_cur(&buf);
75  *
76  *      //name is valid until '=' or '&'
77  *      assert(sbuf_many1(&buf, notandoreq));
78  *      nameLen = sbuf_cur(&buf) - name;
79  *
80  *      value = 0;
81  *      valueLen = 0;
82  *      //if the next char is a '=' then we also get a value
83  *      if(sbuf_char(&buf, '=')) {
84  *         value = sbuf_cur(&buf);
85  *
86  *         //value is until the next query that starts with '&'
87  *         assert(sbuf_many1(&buf, notand));
88  *         valueLen = sbuf_cur(&buf) - value;
89  *      }
90  *      //expect '&' or end
91  *      sbuf_char(&buf, '&');
92  *      printf("parsed query: %.*s = %.*s\n", nameLen, name, valueLen, value);
93  *   }
94  *
95  */
96 
97 //! init
sbuf_parser_init(struct sbuf * buf,const char * data,int dataLen)98 static __inline void sbuf_parser_init(struct sbuf* buf, const char *data, int dataLen) {
99    sbuf_init(buf, 0, (void*)data, dataLen);
100 }
101 
102 //! current postiion
sbuf_cur(struct sbuf * buf)103 static __inline char *sbuf_cur(struct sbuf* buf) {
104    return (char*)sbuf_head(buf);
105 }
106 
107 //! look at the next character if the buffer is still valid
sbuf_peek(struct sbuf * buf,char * c)108 static __inline int sbuf_peek(struct sbuf* buf, char* c) {
109    if(!sbuf_valid(buf)) {
110       return 0;
111    }
112    *c = *sbuf_cur(buf);
113    return 1;
114 }
115 
116 //! returns true if the buffer is ended
sbuf_end(struct sbuf * buf)117 static __inline int sbuf_end(struct sbuf* buf) {
118    return sbuf_left(buf) == 0;
119 }
120 
121 //! consume 1 char if its in string chars
sbuf_chars(struct sbuf * buf,const char * chars)122 static __inline int sbuf_chars(struct sbuf *buf, const char *chars) {
123    int i = 0;
124    char c;
125    if(!sbuf_peek(buf, &c)) {
126       return 0;
127    }
128    for(i = 0; chars[i] != 0; ++i) {
129       if(c == chars[i]) {
130          sbuf_advance(buf, 1);
131          return 1;
132       }
133    }
134    return 0;
135 }
136 
137 //! consume 1 char only if its not in string chars
sbuf_notchars(struct sbuf * buf,const char * chars)138 static __inline int sbuf_notchars(struct sbuf *buf, const char *chars) {
139    int i = 0;
140    char c;
141    if(!sbuf_peek(buf, &c)) {
142       return 0;
143    }
144    for(i = 0; chars[i] != 0; ++i) {
145       if(c == chars[i]) {
146          return 0;
147       }
148    }
149    sbuf_advance(buf, 1);
150    return 1;
151 }
152 
153 //! consume only char t
sbuf_char(struct sbuf * buf,const char t)154 static __inline int sbuf_char(struct sbuf *buf, const char t) {
155    char str[2] = {t, 0};
156    return sbuf_chars(buf, str);
157 }
158 
159 //! consume any char except for t
sbuf_notchar(struct sbuf * buf,const char t)160 static __inline int sbuf_notchar(struct sbuf *buf, const char t) {
161    char str[2] = {t, 0};
162    return sbuf_notchars(buf, str);
163 }
164 
165 /**
166  * consume any char
167  */
sbuf_any(struct sbuf * buf)168 static __inline int sbuf_any(struct sbuf* buf) {
169    return sbuf_notchars(buf, "");
170 }
171 
172 
173 /**
174  * range is pairs of characters
175  *
176  * pairs are inclusive, start must be less then or equal then the end
177  *
178  * for example: AZaz09--..
179  *   matches uppercase and lowercase letters, numbers, dashes and dots
180  *
181  */
sbuf_range(struct sbuf * buf,const char * chars)182 static __inline int sbuf_range(struct sbuf *buf, const char *chars) {
183    int i, j;
184    char c;
185    if(!sbuf_peek(buf, &c)) {
186       return 0;
187    }
188    for(i = 0, j = 1; chars[i] != 0 && chars[j] != 0; i+=2,j+=2) {
189       if(c >= chars[i] && c <= chars[j]) {
190          sbuf_advance(buf, 1);
191          return 1;
192       }
193    }
194    return 0;
195 }
196 
197 
198 /**
199  * greedly consume and match the entire string
200  * empty string always succeeds without consuming any data
201  */
sbuf_string(struct sbuf * buf,const char * str)202 static __inline int sbuf_string(struct sbuf *buf, const char *str) {
203    int i = 0;
204    for(i = 0; str[i] != 0; ++i) {
205       if(!sbuf_char(buf, str[i])) {
206          return 0;
207       }
208    }
209    return 1;
210 }
211 
212 /**
213  * consumes until fails
214  */
sbuf_many(struct sbuf * buf,int (* consume)(struct sbuf * buf))215 static __inline int sbuf_many(struct sbuf *buf,
216                               int(*consume)(struct sbuf *buf))
217 {
218    if(!sbuf_valid(buf)) {
219       return 0;
220    }
221    while(consume(buf)) {;}
222    return 1;
223 }
224 /**
225  * consumes until fails, must consume at least 1
226  */
sbuf_many1(struct sbuf * buf,int (* consume)(struct sbuf * buf))227 static __inline int sbuf_many1(struct sbuf *buf,
228                                int(*consume)(struct sbuf *buf))
229 {
230    if(!consume(buf)) {
231       return 0;
232    }
233    sbuf_many(buf, consume);
234    return 1;
235 }
236 /**
237  * runs 'consume' until 'stop' succeeds
238  * 'stop' must fail in such a way that it doesn't consume any data
239  */
sbuf_until(struct sbuf * buf,int (* consume)(struct sbuf * buf),int (* stop)(struct sbuf * buf))240 static __inline int sbuf_until(struct sbuf *buf,
241                                int(*consume)(struct sbuf *buf),
242                                int(*stop)(struct sbuf *buf))
243 {
244    while(!stop(buf)) {
245       if(!consume(buf))  {
246          return 0;
247       }
248    }
249    return 1;
250 }
251 
252 /**
253  * allows for backtracking,
254  * @param parser, runs parser and only consume if it succeeds
255  */
sbuf_try(struct sbuf * buf,int (* parser)(struct sbuf * buf))256 static __inline int sbuf_try(struct sbuf *buf, int(*parser)(struct sbuf *buf))
257 {
258    struct sbuf tryp;
259    sbuf_parser_init(&tryp, sbuf_cur(buf), sbuf_left(buf));
260    if(parser(&tryp)) {
261       sbuf_advance(buf, sbuf_cur(&tryp) - sbuf_cur(buf));
262       return 1;
263    }
264    return 0;
265 }
266 #endif // SBUF_PARSER_H
267