1 #ifndef _XEXMLPARSER_HPP
2 #define _XEXMLPARSER_HPP
3 /*-------------------------------------------------------------------------
4  * drawElements Quality Program Test Executor
5  * ------------------------------------------
6  *
7  * Copyright 2014 The Android Open Source Project
8  *
9  * Licensed under the Apache License, Version 2.0 (the "License");
10  * you may not use this file except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  *//*!
22  * \file
23  * \brief XML Parser.
24  *
25  * \todo [2012-06-07 pyry] Not supported / handled properly:
26  *  - xml namespaces (<ns:Element>)
27  *  - backslash escapes in strings
28  *  - &quot; -style escapes
29  *  - utf-8
30  *//*--------------------------------------------------------------------*/
31 
32 #include "xeDefs.hpp"
33 #include "deRingBuffer.hpp"
34 
35 #include <string>
36 #include <map>
37 
38 namespace xe
39 {
40 namespace xml
41 {
42 
43 enum Token
44 {
45 	TOKEN_INCOMPLETE = 0,					//!< Not enough data to determine token.
46 	TOKEN_END_OF_STRING,					//!< End of document string.
47 	TOKEN_DATA,								//!< Block of data (anything outside tags).
48 	TOKEN_COMMENT,							//!< <!-- comment -->
49 	TOKEN_IDENTIFIER,						//!< Identifier (in tags).
50 	TOKEN_STRING,							//!< String (in tags).
51 	TOKEN_TAG_START,						//!< <
52 	TOKEN_TAG_END,							//!< >
53 	TOKEN_END_TAG_START,					//!< </
54 	TOKEN_EMPTY_ELEMENT_END,				//!< />
55 	TOKEN_PROCESSING_INSTRUCTION_START,		//!< <?
56 	TOKEN_PROCESSING_INSTRUCTION_END,		//!< ?>
57 	TOKEN_EQUAL,							//!< =
58 	TOKEN_ENTITY,							//!< Entity reference, such as &amp;
59 
60 	TOKEN_LAST
61 };
62 
63 enum Element
64 {
65 	ELEMENT_INCOMPLETE = 0,	//!< Incomplete element.
66 	ELEMENT_START,			//!< Element start.
67 	ELEMENT_END,			//!< Element end.
68 	ELEMENT_DATA,			//!< Data element.
69 	ELEMENT_END_OF_STRING,	//!< End of document string.
70 
71 	ELEMENT_LAST
72 };
73 
74 const char* getTokenName (Token token);
75 
76 // \todo [2012-10-17 pyry] Add line number etc.
77 class ParseError : public xe::ParseError
78 {
79 public:
ParseError(const std::string & message)80 	ParseError (const std::string& message) : xe::ParseError(message) {}
81 };
82 
83 class Tokenizer
84 {
85 public:
86 						Tokenizer			(void);
87 						~Tokenizer			(void);
88 
89 	void				clear				(void);		//!< Resets tokenizer to initial state.
90 
91 	void				feed				(const deUint8* bytes, int numBytes);
92 	void				advance				(void);
93 
getToken(void) const94 	Token				getToken			(void) const		{ return m_curToken;	}
getTokenLen(void) const95 	int					getTokenLen			(void) const		{ return m_curTokenLen;	}
getTokenByte(int offset) const96 	deUint8				getTokenByte		(int offset) const	{ DE_ASSERT(m_curToken != TOKEN_INCOMPLETE && m_curToken != TOKEN_END_OF_STRING); return m_buf.peekBack(offset); }
97 	void				getTokenStr			(std::string& dst) const;
98 	void				appendTokenStr		(std::string& dst) const;
99 
100 	void				getString			(std::string& dst) const;
101 
102 private:
103 						Tokenizer			(const Tokenizer& other);
104 	Tokenizer&			operator=			(const Tokenizer& other);
105 
106 	int					getChar				(int offset) const;
107 
108 	void				error				(const std::string& what);
109 
110 	enum State
111 	{
112 		STATE_DATA = 0,
113 		STATE_TAG,
114 		STATE_IDENTIFIER,
115 		STATE_VALUE,
116 		STATE_COMMENT,
117 		STATE_ENTITY,
118 
119 		STATE_LAST
120 	};
121 
122 	enum
123 	{
124 		END_OF_STRING	= 0,			//!< End of string (0).
125 		END_OF_BUFFER	= 0xffffffff	//!< End of current data buffer.
126 	};
127 
128 	Token						m_curToken;			//!< Current token.
129 	int							m_curTokenLen;		//!< Length of current token.
130 
131 	State						m_state;			//!< Tokenization state.
132 
133 	de::RingBuffer<deUint8>		m_buf;
134 };
135 
136 class Parser
137 {
138 public:
139 	typedef std::map<std::string, std::string>		AttributeMap;
140 	typedef AttributeMap::const_iterator			AttributeIter;
141 
142 						Parser				(void);
143 						~Parser				(void);
144 
145 	void				clear				(void);		//!< Resets parser to initial state.
146 
147 	void				feed				(const deUint8* bytes, int numBytes);
148 	void				advance				(void);
149 
getElement(void) const150 	Element				getElement			(void) const						{ return m_element;										}
151 
152 	// For ELEMENT_START / ELEMENT_END.
getElementName(void) const153 	const char*			getElementName		(void) const						{ return m_elementName.c_str();							}
154 
155 	// For ELEMENT_START.
hasAttribute(const char * name) const156 	bool				hasAttribute		(const char* name) const			{ return m_attributes.find(name) != m_attributes.end();	}
getAttribute(const char * name) const157 	const char*			getAttribute		(const char* name) const			{ return m_attributes.find(name)->second.c_str();		}
attributes(void) const158 	const AttributeMap&	attributes			(void) const						{ return m_attributes;									}
159 
160 	// For ELEMENT_DATA.
161 	int					getDataSize			(void) const;
162 	deUint8				getDataByte			(int offset) const;
163 	void				getDataStr			(std::string& dst) const;
164 	void				appendDataStr		(std::string& dst) const;
165 
166 private:
167 						Parser				(const Parser& other);
168 	Parser&				operator=			(const Parser& other);
169 
170 	void				parseEntityValue	(void);
171 
172 	void				error				(const std::string& what);
173 
174 	enum State
175 	{
176 		STATE_DATA = 0,						//!< Initial state - assuming data or tag open.
177 		STATE_ENTITY,						//!< Parsed entity is stored - overrides data.
178 		STATE_IN_PROCESSING_INSTRUCTION,	//!< In processing instruction.
179 		STATE_START_TAG_OPEN,				//!< Start tag open.
180 		STATE_END_TAG_OPEN,					//!< End tag open.
181 		STATE_EXPECTING_END_TAG_CLOSE,		//!< Expecting end tag close.
182 		STATE_ATTRIBUTE_LIST,				//!< Expecting attribute list.
183 		STATE_EXPECTING_ATTRIBUTE_EQ,		//!< Got attribute name, expecting =.
184 		STATE_EXPECTING_ATTRIBUTE_VALUE,	//!< Expecting attribute value.
185 		STATE_YIELD_EMPTY_ELEMENT_END,		//!< Empty element: start has been reported but not end.
186 
187 		STATE_LAST
188 	};
189 
190 	Tokenizer			m_tokenizer;
191 
192 	Element				m_element;
193 	std::string			m_elementName;
194 	AttributeMap		m_attributes;
195 
196 	State				m_state;
197 	std::string			m_attribName;
198 	std::string			m_entityValue;		//!< Data override, such as entity value.
199 };
200 
201 // Inline implementations
202 
getTokenStr(std::string & dst) const203 inline void Tokenizer::getTokenStr (std::string& dst) const
204 {
205 	DE_ASSERT(m_curToken != TOKEN_INCOMPLETE && m_curToken != TOKEN_END_OF_STRING);
206 	dst.resize(m_curTokenLen);
207 	for (int ndx = 0; ndx < m_curTokenLen; ndx++)
208 		dst[ndx] = m_buf.peekBack(ndx);
209 }
210 
appendTokenStr(std::string & dst) const211 inline void Tokenizer::appendTokenStr (std::string& dst) const
212 {
213 	DE_ASSERT(m_curToken != TOKEN_INCOMPLETE && m_curToken != TOKEN_END_OF_STRING);
214 
215 	size_t oldLen = dst.size();
216 	dst.resize(oldLen+m_curTokenLen);
217 
218 	for (int ndx = 0; ndx < m_curTokenLen; ndx++)
219 		dst[oldLen+ndx] = m_buf.peekBack(ndx);
220 }
221 
getDataSize(void) const222 inline int Parser::getDataSize (void) const
223 {
224 	if (m_state != STATE_ENTITY)
225 		return m_tokenizer.getTokenLen();
226 	else
227 		return (int)m_entityValue.size();
228 }
229 
getDataByte(int offset) const230 inline deUint8 Parser::getDataByte (int offset) const
231 {
232 	if (m_state != STATE_ENTITY)
233 		return m_tokenizer.getTokenByte(offset);
234 	else
235 		return (deUint8)m_entityValue[offset];
236 }
237 
getDataStr(std::string & dst) const238 inline void Parser::getDataStr (std::string& dst) const
239 {
240 	if (m_state != STATE_ENTITY)
241 		return m_tokenizer.getTokenStr(dst);
242 	else
243 		dst = m_entityValue;
244 }
245 
appendDataStr(std::string & dst) const246 inline void Parser::appendDataStr (std::string& dst) const
247 {
248 	if (m_state != STATE_ENTITY)
249 		return m_tokenizer.appendTokenStr(dst);
250 	else
251 		dst += m_entityValue;
252 }
253 
254 } // xml
255 } // xe
256 
257 #endif // _XEXMLPARSER_HPP
258