1 /** \file
2  * Implementation of the ANTLR3 string and string factory classes
3  */
4 
5 // [The "BSD licence"]
6 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
7 // http://www.temporal-wave.com
8 // http://www.linkedin.com/in/jimidle
9 //
10 // All rights reserved.
11 //
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions
14 // are met:
15 // 1. Redistributions of source code must retain the above copyright
16 //    notice, this list of conditions and the following disclaimer.
17 // 2. Redistributions in binary form must reproduce the above copyright
18 //    notice, this list of conditions and the following disclaimer in the
19 //    documentation and/or other materials provided with the distribution.
20 // 3. The name of the author may not be used to endorse or promote products
21 //    derived from this software without specific prior written permission.
22 //
23 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
27 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
28 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
32 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 
34 #include    <antlr3string.h>
35 
36 /* Factory API
37  */
38 static    pANTLR3_STRING    newRaw8	(pANTLR3_STRING_FACTORY factory);
39 static    pANTLR3_STRING    newRawUTF16	(pANTLR3_STRING_FACTORY factory);
40 static    pANTLR3_STRING    newSize8	(pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size);
41 static    pANTLR3_STRING    newSizeUTF16	(pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size);
42 static    pANTLR3_STRING    newPtr8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
43 static    pANTLR3_STRING    newPtrUTF16_8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
44 static    pANTLR3_STRING    newPtrUTF16_UTF16	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
45 static    pANTLR3_STRING    newStr8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
46 static    pANTLR3_STRING    newStrUTF16_8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
47 static    pANTLR3_STRING    newStrUTF16_UTF16	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
48 static    void		    destroy	(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
49 static    pANTLR3_STRING    printable8	(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
50 static    pANTLR3_STRING    printableUTF16	(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
51 static    void		    closeFactory(pANTLR3_STRING_FACTORY factory);
52 
53 /* String API
54  */
55 static    pANTLR3_UINT8	    set8	(pANTLR3_STRING string, const char * chars);
56 static    pANTLR3_UINT8	    setUTF16_8	(pANTLR3_STRING string, const char * chars);
57 static    pANTLR3_UINT8	    setUTF16_UTF16	(pANTLR3_STRING string, const char * chars);
58 static    pANTLR3_UINT8	    append8	(pANTLR3_STRING string, const char * newbit);
59 static    pANTLR3_UINT8	    appendUTF16_8	(pANTLR3_STRING string, const char * newbit);
60 static    pANTLR3_UINT8	    appendUTF16_UTF16	(pANTLR3_STRING string, const char * newbit);
61 static	  pANTLR3_UINT8	    insert8	(pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);
62 static	  pANTLR3_UINT8	    insertUTF16_8	(pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);
63 static	  pANTLR3_UINT8	    insertUTF16_UTF16	(pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);
64 
65 static    pANTLR3_UINT8	    setS	(pANTLR3_STRING string, pANTLR3_STRING chars);
66 static    pANTLR3_UINT8	    appendS	(pANTLR3_STRING string, pANTLR3_STRING newbit);
67 static	  pANTLR3_UINT8	    insertS	(pANTLR3_STRING string, ANTLR3_UINT32 point, pANTLR3_STRING newbit);
68 
69 static    pANTLR3_UINT8	    addc8	(pANTLR3_STRING string, ANTLR3_UINT32 c);
70 static    pANTLR3_UINT8	    addcUTF16	(pANTLR3_STRING string, ANTLR3_UINT32 c);
71 static    pANTLR3_UINT8	    addi8	(pANTLR3_STRING string, ANTLR3_INT32 i);
72 static    pANTLR3_UINT8	    addiUTF16	(pANTLR3_STRING string, ANTLR3_INT32 i);
73 static	  pANTLR3_UINT8	    inserti8	(pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i);
74 static	  pANTLR3_UINT8	    insertiUTF16	(pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i);
75 
76 static    ANTLR3_UINT32     compare8	(pANTLR3_STRING string, const char * compStr);
77 static    ANTLR3_UINT32     compareUTF16_8	(pANTLR3_STRING string, const char * compStr);
78 static    ANTLR3_UINT32     compareUTF16_UTF16(pANTLR3_STRING string, const char * compStr);
79 static    ANTLR3_UINT32     compareS	(pANTLR3_STRING string, pANTLR3_STRING compStr);
80 static    ANTLR3_UCHAR      charAt8	(pANTLR3_STRING string, ANTLR3_UINT32 offset);
81 static    ANTLR3_UCHAR      charAtUTF16	(pANTLR3_STRING string, ANTLR3_UINT32 offset);
82 static    pANTLR3_STRING    subString8	(pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex);
83 static    pANTLR3_STRING    subStringUTF16	(pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex);
84 static	  ANTLR3_INT32	    toInt32_8	(pANTLR3_STRING string);
85 static	  ANTLR3_INT32	    toInt32_UTF16  (pANTLR3_STRING string);
86 static	  pANTLR3_STRING    to8_8		(pANTLR3_STRING string);
87 static	  pANTLR3_STRING    to8_UTF16		(pANTLR3_STRING string);
88 static	pANTLR3_STRING		toUTF8_8	(pANTLR3_STRING string);
89 static	pANTLR3_STRING		toUTF8_UTF16	(pANTLR3_STRING string);
90 
91 /* Local helpers
92  */
93 static	void			stringInit8	(pANTLR3_STRING string);
94 static	void			stringInitUTF16	(pANTLR3_STRING string);
95 static	void	ANTLR3_CDECL	stringFree	(pANTLR3_STRING string);
96 
97 ANTLR3_API pANTLR3_STRING_FACTORY
antlr3StringFactoryNew(ANTLR3_UINT32 encoding)98 antlr3StringFactoryNew(ANTLR3_UINT32 encoding)
99 {
100 	pANTLR3_STRING_FACTORY  factory;
101 
102 	/* Allocate memory
103 	*/
104 	factory	= (pANTLR3_STRING_FACTORY) ANTLR3_CALLOC(1, sizeof(ANTLR3_STRING_FACTORY));
105 
106 	if	(factory == NULL)
107 	{
108 		return	NULL;
109 	}
110 
111 	/* Now we make a new list to track the strings.
112 	*/
113 	factory->strings	= antlr3VectorNew(0);
114 	factory->index	= 0;
115 
116 	if	(factory->strings == NULL)
117 	{
118 		ANTLR3_FREE(factory);
119 		return	NULL;
120 	}
121 
122     // Install the API
123     //
124     // TODO: These encodings need equivalent functions to
125     // UTF16 and 8Bit if I am going to support those encodings in the STRING stuff.
126 	// The STRING stuff was intended as a quick and dirty hack for people that did not
127 	// want to worry about memory and performance very much, but nobody ever reads the
128 	// notes or comments or uses the email list search. I want to discourage using these
129 	// interfaces as it is much more efficient to use the pointers within the tokens
130 	// directly, so I am not implementing the string stuff for the newer encodings.
131     // We install the standard 8 and 16 bit functions for the UTF 8 and 16 but they
132 	// will not be useful beyond returning the text.
133 	//
134     switch(encoding)
135     {
136 		case    ANTLR3_ENC_UTF32:
137 			break;
138 
139 		case    ANTLR3_ENC_UTF32BE:
140 			break;
141 
142 		case    ANTLR3_ENC_UTF32LE:
143 			break;
144 
145 		case    ANTLR3_ENC_UTF16BE:
146 		case    ANTLR3_ENC_UTF16LE:
147 		case    ANTLR3_ENC_UTF16:
148 
149 			factory->newRaw	    =  newRawUTF16;
150 			factory->newSize	=  newSizeUTF16;
151 			factory->newPtr	    =  newPtrUTF16_UTF16;
152 			factory->newPtr8	=  newPtrUTF16_8;
153 			factory->newStr	    =  newStrUTF16_UTF16;
154 			factory->newStr8	=  newStrUTF16_8;
155 			factory->printable	=  printableUTF16;
156 			factory->destroy	=  destroy;
157 			factory->close	    =  closeFactory;
158 			break;
159 
160 		case    ANTLR3_ENC_UTF8:
161 		case    ANTLR3_ENC_EBCDIC:
162 		case    ANTLR3_ENC_8BIT:
163 		default:
164 
165 			factory->newRaw	    =  newRaw8;
166 			factory->newSize	=  newSize8;
167 			factory->newPtr	    =  newPtr8;
168 			factory->newPtr8	=  newPtr8;
169 			factory->newStr	    =  newStr8;
170 			factory->newStr8	=  newStr8;
171 			factory->printable	=  printable8;
172 			factory->destroy	=  destroy;
173 			factory->close	    =  closeFactory;
174 			break;
175     }
176 	return  factory;
177 }
178 
179 
180 /**
181  *
182  * \param factory
183  * \return
184  */
185 static    pANTLR3_STRING
newRaw8(pANTLR3_STRING_FACTORY factory)186 newRaw8	(pANTLR3_STRING_FACTORY factory)
187 {
188     pANTLR3_STRING  string;
189 
190     string  = (pANTLR3_STRING) ANTLR3_MALLOC(sizeof(ANTLR3_STRING));
191 
192     if	(string == NULL)
193     {
194 		return	NULL;
195     }
196 
197     /* Structure is allocated, now fill in the API etc.
198      */
199     stringInit8(string);
200     string->factory = factory;
201 
202     /* Add the string into the allocated list
203      */
204     factory->strings->set(factory->strings, factory->index, (void *) string, (void (ANTLR3_CDECL *)(void *))(stringFree), ANTLR3_TRUE);
205     string->index   = factory->index++;
206 
207     return string;
208 }
209 /**
210  *
211  * \param factory
212  * \return
213  */
214 static    pANTLR3_STRING
newRawUTF16(pANTLR3_STRING_FACTORY factory)215 newRawUTF16	(pANTLR3_STRING_FACTORY factory)
216 {
217     pANTLR3_STRING  string;
218 
219     string  = (pANTLR3_STRING) ANTLR3_MALLOC(sizeof(ANTLR3_STRING));
220 
221     if	(string == NULL)
222     {
223 		return	NULL;
224     }
225 
226     /* Structure is allocated, now fill in the API etc.
227      */
228     stringInitUTF16(string);
229     string->factory = factory;
230 
231     /* Add the string into the allocated list
232      */
233     factory->strings->set(factory->strings, factory->index, (void *) string, (void (ANTLR3_CDECL *)(void *))(stringFree), ANTLR3_TRUE);
234     string->index   = factory->index++;
235 
236     return string;
237 }
238 static
stringFree(pANTLR3_STRING string)239 void	ANTLR3_CDECL stringFree  (pANTLR3_STRING string)
240 {
241     /* First free the string itself if there was anything in it
242      */
243     if	(string->chars)
244     {
245 	ANTLR3_FREE(string->chars);
246     }
247 
248     /* Now free the space for this string
249      */
250     ANTLR3_FREE(string);
251 
252     return;
253 }
254 /**
255  *
256  * \param string
257  * \return
258  */
259 static	void
stringInit8(pANTLR3_STRING string)260 stringInit8  (pANTLR3_STRING string)
261 {
262     string->len			= 0;
263     string->size		= 0;
264     string->chars		= NULL;
265     string->encoding	= ANTLR3_ENC_8BIT ;
266 
267     /* API for 8 bit strings*/
268 
269     string->set		= set8;
270     string->set8	= set8;
271     string->append	= append8;
272     string->append8	= append8;
273     string->insert	= insert8;
274     string->insert8	= insert8;
275     string->addi	= addi8;
276     string->inserti	= inserti8;
277     string->addc	= addc8;
278     string->charAt	= charAt8;
279     string->compare	= compare8;
280     string->compare8	= compare8;
281     string->subString	= subString8;
282     string->toInt32	= toInt32_8;
283     string->to8		= to8_8;
284     string->toUTF8	= toUTF8_8;
285     string->compareS	= compareS;
286     string->setS	= setS;
287     string->appendS	= appendS;
288     string->insertS	= insertS;
289 
290 }
291 /**
292  *
293  * \param string
294  * \return
295  */
296 static	void
stringInitUTF16(pANTLR3_STRING string)297 stringInitUTF16  (pANTLR3_STRING string)
298 {
299     string->len		= 0;
300     string->size	= 0;
301     string->chars	= NULL;
302     string->encoding	= ANTLR3_ENC_8BIT;
303 
304     /* API for UTF16 strings */
305 
306     string->set		= setUTF16_UTF16;
307     string->set8	= setUTF16_8;
308     string->append	= appendUTF16_UTF16;
309     string->append8	= appendUTF16_8;
310     string->insert	= insertUTF16_UTF16;
311     string->insert8	= insertUTF16_8;
312     string->addi	= addiUTF16;
313     string->inserti	= insertiUTF16;
314     string->addc	= addcUTF16;
315     string->charAt	= charAtUTF16;
316     string->compare	= compareUTF16_UTF16;
317     string->compare8	= compareUTF16_8;
318     string->subString	= subStringUTF16;
319     string->toInt32	= toInt32_UTF16;
320     string->to8		= to8_UTF16;
321     string->toUTF8	= toUTF8_UTF16;
322 
323     string->compareS	= compareS;
324     string->setS	= setS;
325     string->appendS	= appendS;
326     string->insertS	= insertS;
327 }
328 /**
329  *
330  * \param string
331  * \return
332  * TODO: Implement UTF-8
333  */
334 static	void
stringInitUTF8(pANTLR3_STRING string)335 stringInitUTF8  (pANTLR3_STRING string)
336 {
337     string->len	    = 0;
338     string->size    = 0;
339     string->chars   = NULL;
340 
341     /* API */
342 
343 }
344 
345 // Convert an 8 bit string into a UTF8 representation, which is in fact just the string itself
346 // a memcpy as we make no assumptions about the 8 bit encoding.
347 //
348 static	pANTLR3_STRING
toUTF8_8(pANTLR3_STRING string)349 toUTF8_8	(pANTLR3_STRING string)
350 {
351 	return string->factory->newPtr(string->factory, (pANTLR3_UINT8)(string->chars), string->len);
352 }
353 
354 // Convert a UTF16 string into a UTF8 representation using the Unicode.org
355 // supplied C algorithms, which are now contained within the ANTLR3 C runtime
356 // as permitted by the Unicode license (within the source code antlr3convertutf.c/.h
357 // UCS2 has the same encoding as UTF16 so we can use UTF16 converter.
358 //
359 static	pANTLR3_STRING
toUTF8_UTF16(pANTLR3_STRING string)360 toUTF8_UTF16	(pANTLR3_STRING string)
361 {
362 
363     UTF8	      * outputEnd;
364     UTF16	      * inputEnd;
365     pANTLR3_STRING	utf8String;
366 
367     ConversionResult	cResult;
368 
369     // Allocate the output buffer, which needs to accommodate potentially
370     // 3X (in bytes) the input size (in chars).
371     //
372     utf8String	= string->factory->newStr8(string->factory, (pANTLR3_UINT8)"");
373 
374     if	(utf8String != NULL)
375     {
376         // Free existing allocation
377         //
378         ANTLR3_FREE(utf8String->chars);
379 
380         // Reallocate according to maximum expected size
381         //
382         utf8String->size	= string->len *3;
383         utf8String->chars	= (pANTLR3_UINT8)ANTLR3_MALLOC(utf8String->size +1);
384 
385         if	(utf8String->chars != NULL)
386         {
387             inputEnd  = (UTF16 *)	(string->chars);
388             outputEnd = (UTF8 *)	(utf8String->chars);
389 
390             // Call the Unicode converter
391             //
392             cResult =  ConvertUTF16toUTF8
393                 (
394                 (const UTF16**)&inputEnd,
395                 ((const UTF16 *)(string->chars)) + string->len,
396                 &outputEnd,
397                 outputEnd + utf8String->size - 1,
398                 lenientConversion
399                 );
400 
401             // We don't really care if things failed or not here, we just converted
402             // everything that was vaguely possible and stopped when it wasn't. It is
403             // up to the grammar programmer to verify that the input is sensible.
404             //
405             utf8String->len = ANTLR3_UINT32_CAST(((pANTLR3_UINT8)outputEnd) - utf8String->chars);
406 
407             *(outputEnd+1) = '\0';		// Always null terminate
408         }
409     }
410     return utf8String;
411 }
412 
413 /**
414  * Creates a new string with enough capacity for size 8 bit characters plus a terminator.
415  *
416  * \param[in] factory - Pointer to the string factory that owns strings
417  * \param[in] size - In characters
418  * \return pointer to the new string.
419  */
420 static    pANTLR3_STRING
newSize8(pANTLR3_STRING_FACTORY factory,ANTLR3_UINT32 size)421 newSize8	(pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size)
422 {
423     pANTLR3_STRING  string;
424 
425     string  = factory->newRaw(factory);
426 
427     if	(string == NULL)
428     {
429         return	string;
430     }
431 
432     /* Always add one more byte for a terminator ;-)
433     */
434     string->chars	= (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_UINT8) * (size+1)));
435     *(string->chars)	= '\0';
436     string->size	= size + 1;
437 
438 
439     return string;
440 }
441 /**
442  * Creates a new string with enough capacity for size UTF16 characters plus a terminator.
443  *
444  * \param[in] factory - Pointer to the string factory that owns strings
445  * \param[in] size - In characters (count double for surrogate pairs!!!)
446  * \return pointer to the new string.
447  */
448 static    pANTLR3_STRING
newSizeUTF16(pANTLR3_STRING_FACTORY factory,ANTLR3_UINT32 size)449 newSizeUTF16	(pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size)
450 {
451     pANTLR3_STRING  string;
452 
453     string  = factory->newRaw(factory);
454 
455     if	(string == NULL)
456     {
457         return	string;
458     }
459 
460     /* Always add one more byte for a terminator ;-)
461     */
462     string->chars	= (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_UINT16) * (size+1)));
463     *(string->chars)	= '\0';
464     string->size	= size+1;	/* Size is always in characters, as is len */
465 
466     return string;
467 }
468 
469 /** Creates a new 8 bit string initialized with the 8 bit characters at the
470  *  supplied ptr, of pre-determined size.
471  * \param[in] factory - Pointer to the string factory that owns the strings
472  * \param[in] ptr - Pointer to 8 bit encoded characters
473  * \return pointer to the new string
474  */
475 static    pANTLR3_STRING
newPtr8(pANTLR3_STRING_FACTORY factory,pANTLR3_UINT8 ptr,ANTLR3_UINT32 size)476 newPtr8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
477 {
478 	pANTLR3_STRING  string;
479 
480 	string  = factory->newSize(factory, size);
481 
482 	if	(string == NULL)
483 	{
484 		return	NULL;
485 	}
486 
487 	if	(size <= 0)
488 	{
489 		return	string;
490 	}
491 
492 	if	(ptr != NULL)
493 	{
494 		ANTLR3_MEMMOVE(string->chars, (const void *)ptr, size);
495 		*(string->chars + size) = '\0';	    /* Terminate, these strings are usually used for Token streams and printing etc.	*/
496 		string->len = size;
497 	}
498 
499 	return  string;
500 }
501 
502 /** Creates a new UTF16 string initialized with the 8 bit characters at the
503  *  supplied 8 bit character ptr, of pre-determined size.
504  * \param[in] factory - Pointer to the string factory that owns the strings
505  * \param[in] ptr - Pointer to 8 bit encoded characters
506  * \return pointer to the new string
507  */
508 static    pANTLR3_STRING
newPtrUTF16_8(pANTLR3_STRING_FACTORY factory,pANTLR3_UINT8 ptr,ANTLR3_UINT32 size)509 newPtrUTF16_8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
510 {
511 	pANTLR3_STRING  string;
512 
513 	/* newSize accepts size in characters, not bytes
514 	*/
515 	string  = factory->newSize(factory, size);
516 
517 	if	(string == NULL)
518 	{
519 		return	NULL;
520 	}
521 
522 	if	(size <= 0)
523 	{
524 		return	string;
525 	}
526 
527 	if	(ptr != NULL)
528 	{
529 		pANTLR3_UINT16	out;
530 		ANTLR3_INT32    inSize;
531 
532 		out = (pANTLR3_UINT16)(string->chars);
533 		inSize	= size;
534 
535 		while	(inSize-- > 0)
536 		{
537 			*out++ = (ANTLR3_UINT16)(*ptr++);
538 		}
539 
540 		/* Terminate, these strings are usually used for Token streams and printing etc.
541 		*/
542 		*(((pANTLR3_UINT16)(string->chars)) + size) = '\0';
543 
544 		string->len = size;
545 	}
546 
547 	return  string;
548 }
549 
550 /** Creates a new UTF16 string initialized with the UTF16 characters at the
551  *  supplied ptr, of pre-determined size.
552  * \param[in] factory - Pointer to the string factory that owns the strings
553  * \param[in] ptr - Pointer to UTF16 encoded characters
554  * \return pointer to the new string
555  */
556 static    pANTLR3_STRING
newPtrUTF16_UTF16(pANTLR3_STRING_FACTORY factory,pANTLR3_UINT8 ptr,ANTLR3_UINT32 size)557 newPtrUTF16_UTF16	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
558 {
559 	pANTLR3_STRING  string;
560 
561 	string  = factory->newSize(factory, size);
562 
563 	if	(string == NULL)
564 	{
565 		return	NULL;
566 	}
567 
568 	if	(size <= 0)
569 	{
570 		return	string;
571 	}
572 
573 	if	(ptr != NULL)
574 	{
575 		ANTLR3_MEMMOVE(string->chars, (const void *)ptr, (size * sizeof(ANTLR3_UINT16)));
576 
577 		/* Terminate, these strings are usually used for Token streams and printing etc.
578 		*/
579 		*(((pANTLR3_UINT16)(string->chars)) + size) = '\0';
580 		string->len = size;
581 	}
582 
583 	return  string;
584 }
585 
586 /** Create a new 8 bit string from the supplied, null terminated, 8 bit string pointer.
587  * \param[in] factory - Pointer to the string factory that owns strings.
588  * \param[in] ptr - Pointer to the 8 bit encoded string
589  * \return Pointer to the newly initialized string
590  */
591 static    pANTLR3_STRING
newStr8(pANTLR3_STRING_FACTORY factory,pANTLR3_UINT8 ptr)592 newStr8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
593 {
594     return factory->newPtr8(factory, ptr, (ANTLR3_UINT32)strlen((const char *)ptr));
595 }
596 
597 /** Create a new UTF16 string from the supplied, null terminated, 8 bit string pointer.
598  * \param[in] factory - Pointer to the string factory that owns strings.
599  * \param[in] ptr - Pointer to the 8 bit encoded string
600  * \return POinter to the newly initialized string
601  */
602 static    pANTLR3_STRING
newStrUTF16_8(pANTLR3_STRING_FACTORY factory,pANTLR3_UINT8 ptr)603 newStrUTF16_8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
604 {
605     return factory->newPtr8(factory, ptr, (ANTLR3_UINT32)strlen((const char *)ptr));
606 }
607 
608 /** Create a new UTF16 string from the supplied, null terminated, UTF16 string pointer.
609  * \param[in] factory - Pointer to the string factory that owns strings.
610  * \param[in] ptr - Pointer to the UTF16 encoded string
611  * \return Pointer to the newly initialized string
612  */
613 static    pANTLR3_STRING
newStrUTF16_UTF16(pANTLR3_STRING_FACTORY factory,pANTLR3_UINT8 ptr)614 newStrUTF16_UTF16	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
615 {
616     pANTLR3_UINT16  in;
617     ANTLR3_UINT32   count;
618 
619     /** First, determine the length of the input string
620      */
621     in	    = (pANTLR3_UINT16)ptr;
622     count   = 0;
623 
624     while   (*in++ != '\0')
625     {
626 		count++;
627     }
628     return factory->newPtr(factory, ptr, count);
629 }
630 
631 static    void
destroy(pANTLR3_STRING_FACTORY factory,pANTLR3_STRING string)632 destroy	(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string)
633 {
634     // Record which string we are deleting
635     //
636     ANTLR3_UINT32 strIndex = string->index;
637 
638     // Ensure that the string was not factory made, or we would try
639     // to delete memory that wasn't allocated outside the factory
640     // block.
641     // Remove the specific indexed string from the vector
642     //
643     factory->strings->del(factory->strings, strIndex);
644 
645     // One less string in the vector, so decrement the factory index
646     // so that the next string allocated is indexed correctly with
647     // respect to the vector.
648     //
649     factory->index--;
650 
651     // Now we have to reindex the strings in the vector that followed
652     // the one we just deleted. We only do this if the one we just deleted
653     // was not the last one.
654     //
655     if  (strIndex< factory->index)
656     {
657         // We must reindex the strings after the one we just deleted.
658         // The one that follows the one we just deleted is also out
659         // of whack, so we start there.
660         //
661         ANTLR3_UINT32 i;
662 
663         for (i = strIndex; i < factory->index; i++)
664         {
665             // Renumber the entry
666             //
667             ((pANTLR3_STRING)(factory->strings->elements[i].element))->index = i;
668         }
669     }
670 
671     // The string has been destroyed and the elements of the factory are reindexed.
672     //
673 
674 }
675 
676 static    pANTLR3_STRING
printable8(pANTLR3_STRING_FACTORY factory,pANTLR3_STRING instr)677 printable8(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING instr)
678 {
679     pANTLR3_STRING  string;
680 
681     /* We don't need to be too efficient here, this is mostly for error messages and so on.
682      */
683     pANTLR3_UINT8   scannedText;
684     ANTLR3_UINT32   i;
685 
686     /* Assume we need as much as twice as much space to parse out the control characters
687      */
688     string  = factory->newSize(factory, instr->len *2 + 1);
689 
690     /* Scan through and replace unprintable (in terms of this routine)
691      * characters
692      */
693     scannedText = string->chars;
694 
695     for	(i = 0; i < instr->len; i++)
696     {
697 		if (*(instr->chars + i) == '\n')
698 		{
699 			*scannedText++ = '\\';
700 			*scannedText++ = 'n';
701 		}
702 		else if (*(instr->chars + i) == '\r')
703 		{
704 			*scannedText++ = '\\';
705 			*scannedText++ = 'r';
706 		}
707 		else if	(!isprint(*(instr->chars +i)))
708 		{
709 			*scannedText++ = '?';
710 		}
711 		else
712 		{
713 			*scannedText++ = *(instr->chars + i);
714 		}
715     }
716     *scannedText  = '\0';
717 
718     string->len	= (ANTLR3_UINT32)(scannedText - string->chars);
719 
720     return  string;
721 }
722 
723 static    pANTLR3_STRING
printableUTF16(pANTLR3_STRING_FACTORY factory,pANTLR3_STRING instr)724 printableUTF16(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING instr)
725 {
726     pANTLR3_STRING  string;
727 
728     /* We don't need to be too efficient here, this is mostly for error messages and so on.
729      */
730     pANTLR3_UINT16  scannedText;
731     pANTLR3_UINT16  inText;
732     ANTLR3_UINT32   i;
733     ANTLR3_UINT32   outLen;
734 
735     /* Assume we need as much as twice as much space to parse out the control characters
736      */
737     string  = factory->newSize(factory, instr->len *2 + 1);
738 
739     /* Scan through and replace unprintable (in terms of this routine)
740      * characters
741      */
742     scannedText = (pANTLR3_UINT16)(string->chars);
743     inText	= (pANTLR3_UINT16)(instr->chars);
744     outLen	= 0;
745 
746     for	(i = 0; i < instr->len; i++)
747     {
748 		if (*(inText + i) == '\n')
749 		{
750 			*scannedText++   = '\\';
751 			*scannedText++   = 'n';
752 			outLen	    += 2;
753 		}
754 		else if (*(inText + i) == '\r')
755 		{
756 			*scannedText++   = '\\';
757 			*scannedText++   = 'r';
758 			outLen	    += 2;
759 		}
760 		else if	(!isprint(*(inText +i)))
761 		{
762 			*scannedText++ = '?';
763 			outLen++;
764 		}
765 		else
766 		{
767 			*scannedText++ = *(inText + i);
768 			outLen++;
769 		}
770     }
771     *scannedText  = '\0';
772 
773     string->len	= outLen;
774 
775     return  string;
776 }
777 
778 /** Fascist Capitalist Pig function created
779  *  to oppress the workers comrade.
780  */
781 static    void
closeFactory(pANTLR3_STRING_FACTORY factory)782 closeFactory	(pANTLR3_STRING_FACTORY factory)
783 {
784     /* Delete the vector we were tracking the strings with, this will
785      * causes all the allocated strings to be deallocated too
786      */
787     factory->strings->free(factory->strings);
788 
789     /* Delete the space for the factory itself
790      */
791     ANTLR3_FREE((void *)factory);
792 }
793 
794 static    pANTLR3_UINT8
append8(pANTLR3_STRING string,const char * newbit)795 append8	(pANTLR3_STRING string, const char * newbit)
796 {
797     ANTLR3_UINT32 len;
798 
799     len	= (ANTLR3_UINT32)strlen(newbit);
800 
801     if	(string->size < (string->len + len + 1))
802     {
803 		string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + len + 1));
804 		string->size	= string->len + len + 1;
805     }
806 
807     /* Note we copy one more byte than the strlen in order to get the trailing
808      */
809     ANTLR3_MEMMOVE((void *)(string->chars + string->len), newbit, (ANTLR3_UINT32)(len+1));
810     string->len	+= len;
811 
812     return string->chars;
813 }
814 
815 static    pANTLR3_UINT8
appendUTF16_8(pANTLR3_STRING string,const char * newbit)816 appendUTF16_8	(pANTLR3_STRING string, const char * newbit)
817 {
818     ANTLR3_UINT32   len;
819     pANTLR3_UINT16  apPoint;
820     ANTLR3_UINT32   count;
821 
822     len	= (ANTLR3_UINT32)strlen(newbit);
823 
824     if	(string->size < (string->len + len + 1))
825     {
826 		string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)((sizeof(ANTLR3_UINT16)*(string->len + len + 1))));
827 		string->size	= string->len + len + 1;
828     }
829 
830     apPoint = ((pANTLR3_UINT16)string->chars) + string->len;
831     string->len	+= len;
832 
833     for	(count = 0; count < len; count++)
834     {
835 		*apPoint++   = *(newbit + count);
836     }
837     *apPoint = '\0';
838 
839     return string->chars;
840 }
841 
842 static    pANTLR3_UINT8
appendUTF16_UTF16(pANTLR3_STRING string,const char * newbit)843 appendUTF16_UTF16	(pANTLR3_STRING string, const char * newbit)
844 {
845     ANTLR3_UINT32 len;
846     pANTLR3_UINT16  in;
847 
848     /** First, determine the length of the input string
849      */
850     in	    = (pANTLR3_UINT16)newbit;
851     len   = 0;
852 
853     while   (*in++ != '\0')
854     {
855 		len++;
856     }
857 
858     if	(string->size < (string->len + len + 1))
859     {
860 		string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)( sizeof(ANTLR3_UINT16) *(string->len + len + 1) ));
861 		string->size	= string->len + len + 1;
862     }
863 
864     /* Note we copy one more byte than the strlen in order to get the trailing delimiter
865      */
866     ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + string->len), newbit, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len+1)));
867     string->len	+= len;
868 
869     return string->chars;
870 }
871 
872 static    pANTLR3_UINT8
set8(pANTLR3_STRING string,const char * chars)873 set8	(pANTLR3_STRING string, const char * chars)
874 {
875     ANTLR3_UINT32	len;
876 
877     len = (ANTLR3_UINT32)strlen(chars);
878     if	(string->size < len + 1)
879     {
880 		string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(len + 1));
881 		string->size	= len + 1;
882     }
883 
884     /* Note we copy one more byte than the strlen in order to get the trailing '\0'
885      */
886     ANTLR3_MEMMOVE((void *)(string->chars), chars, (ANTLR3_UINT32)(len+1));
887     string->len	    = len;
888 
889     return  string->chars;
890 
891 }
892 
893 static    pANTLR3_UINT8
setUTF16_8(pANTLR3_STRING string,const char * chars)894 setUTF16_8	(pANTLR3_STRING string, const char * chars)
895 {
896     ANTLR3_UINT32	len;
897     ANTLR3_UINT32	count;
898     pANTLR3_UINT16	apPoint;
899 
900     len = (ANTLR3_UINT32)strlen(chars);
901     if	(string->size < len + 1)
902 	{
903 		string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len + 1)));
904 		string->size	= len + 1;
905     }
906     apPoint = ((pANTLR3_UINT16)string->chars);
907     string->len	= len;
908 
909     for	(count = 0; count < string->len; count++)
910     {
911 		*apPoint++   = *(chars + count);
912     }
913     *apPoint = '\0';
914 
915     return  string->chars;
916 }
917 
918 static    pANTLR3_UINT8
setUTF16_UTF16(pANTLR3_STRING string,const char * chars)919 setUTF16_UTF16    (pANTLR3_STRING string, const char * chars)
920 {
921     ANTLR3_UINT32   len;
922     pANTLR3_UINT16  in;
923 
924     /** First, determine the length of the input string
925      */
926     in	    = (pANTLR3_UINT16)chars;
927     len   = 0;
928 
929     while   (*in++ != '\0')
930     {
931 		len++;
932     }
933 
934     if	(string->size < len + 1)
935     {
936 		string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len + 1)));
937 		string->size	= len + 1;
938     }
939 
940     /* Note we copy one more byte than the strlen in order to get the trailing '\0'
941      */
942     ANTLR3_MEMMOVE((void *)(string->chars), chars, (ANTLR3_UINT32)((len+1) * sizeof(ANTLR3_UINT16)));
943     string->len	    = len;
944 
945     return  string->chars;
946 
947 }
948 
949 static    pANTLR3_UINT8
addc8(pANTLR3_STRING string,ANTLR3_UINT32 c)950 addc8	(pANTLR3_STRING string, ANTLR3_UINT32 c)
951 {
952     if	(string->size < string->len + 2)
953     {
954 		string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + 2));
955 		string->size	= string->len + 2;
956     }
957     *(string->chars + string->len)	= (ANTLR3_UINT8)c;
958     *(string->chars + string->len + 1)	= '\0';
959     string->len++;
960 
961     return  string->chars;
962 }
963 
964 static    pANTLR3_UINT8
addcUTF16(pANTLR3_STRING string,ANTLR3_UINT32 c)965 addcUTF16	(pANTLR3_STRING string, ANTLR3_UINT32 c)
966 {
967     pANTLR3_UINT16  ptr;
968 
969     if	(string->size < string->len + 2)
970     {
971 		string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16) * (string->len + 2)));
972 		string->size	= string->len + 2;
973     }
974     ptr	= (pANTLR3_UINT16)(string->chars);
975 
976     *(ptr + string->len)	= (ANTLR3_UINT16)c;
977     *(ptr + string->len + 1)	= '\0';
978     string->len++;
979 
980     return  string->chars;
981 }
982 
983 static    pANTLR3_UINT8
addi8(pANTLR3_STRING string,ANTLR3_INT32 i)984 addi8	(pANTLR3_STRING string, ANTLR3_INT32 i)
985 {
986     ANTLR3_UINT8	    newbit[32];
987 
988     sprintf((char *)newbit, "%d", i);
989 
990     return  string->append8(string, (const char *)newbit);
991 }
992 static    pANTLR3_UINT8
addiUTF16(pANTLR3_STRING string,ANTLR3_INT32 i)993 addiUTF16	(pANTLR3_STRING string, ANTLR3_INT32 i)
994 {
995     ANTLR3_UINT8	    newbit[32];
996 
997     sprintf((char *)newbit, "%d", i);
998 
999     return  string->append8(string, (const char *)newbit);
1000 }
1001 
1002 static	  pANTLR3_UINT8
inserti8(pANTLR3_STRING string,ANTLR3_UINT32 point,ANTLR3_INT32 i)1003 inserti8    (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i)
1004 {
1005     ANTLR3_UINT8	    newbit[32];
1006 
1007     sprintf((char *)newbit, "%d", i);
1008     return  string->insert8(string, point, (const char *)newbit);
1009 }
1010 static	  pANTLR3_UINT8
insertiUTF16(pANTLR3_STRING string,ANTLR3_UINT32 point,ANTLR3_INT32 i)1011 insertiUTF16    (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i)
1012 {
1013     ANTLR3_UINT8	    newbit[32];
1014 
1015     sprintf((char *)newbit, "%d", i);
1016     return  string->insert8(string, point, (const char *)newbit);
1017 }
1018 
1019 static	pANTLR3_UINT8
insert8(pANTLR3_STRING string,ANTLR3_UINT32 point,const char * newbit)1020 insert8	(pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
1021 {
1022     ANTLR3_UINT32	len;
1023 
1024     if	(point >= string->len)
1025     {
1026 		return	string->append(string, newbit);
1027     }
1028 
1029     len	= (ANTLR3_UINT32)strlen(newbit);
1030 
1031     if	(len == 0)
1032     {
1033 		return	string->chars;
1034     }
1035 
1036     if	(string->size < (string->len + len + 1))
1037     {
1038 		string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + len + 1));
1039 		string->size	= string->len + len + 1;
1040     }
1041 
1042     /* Move the characters we are inserting before, including the delimiter
1043      */
1044     ANTLR3_MEMMOVE((void *)(string->chars + point + len), (void *)(string->chars + point), (ANTLR3_UINT32)(string->len - point + 1));
1045 
1046     /* Note we copy the exact number of bytes
1047      */
1048     ANTLR3_MEMMOVE((void *)(string->chars + point), newbit, (ANTLR3_UINT32)(len));
1049 
1050     string->len += len;
1051 
1052     return  string->chars;
1053 }
1054 
1055 static	pANTLR3_UINT8
insertUTF16_8(pANTLR3_STRING string,ANTLR3_UINT32 point,const char * newbit)1056 insertUTF16_8	(pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
1057 {
1058     ANTLR3_UINT32	len;
1059     ANTLR3_UINT32	count;
1060     pANTLR3_UINT16	inPoint;
1061 
1062     if	(point >= string->len)
1063     {
1064 		return	string->append8(string, newbit);
1065     }
1066 
1067     len	= (ANTLR3_UINT32)strlen(newbit);
1068 
1069     if	(len == 0)
1070     {
1071 		return	string->chars;
1072     }
1073 
1074     if	(string->size < (string->len + len + 1))
1075     {
1076 	string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len + len + 1)));
1077 	string->size	= string->len + len + 1;
1078     }
1079 
1080     /* Move the characters we are inserting before, including the delimiter
1081      */
1082     ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point + len), (void *)(((pANTLR3_UINT16)string->chars) + point), (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len - point + 1)));
1083 
1084     string->len += len;
1085 
1086     inPoint = ((pANTLR3_UINT16)(string->chars))+point;
1087     for	(count = 0; count<len; count++)
1088     {
1089 		*(inPoint + count) = (ANTLR3_UINT16)(*(newbit+count));
1090     }
1091 
1092     return  string->chars;
1093 }
1094 
1095 static	pANTLR3_UINT8
insertUTF16_UTF16(pANTLR3_STRING string,ANTLR3_UINT32 point,const char * newbit)1096 insertUTF16_UTF16	(pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
1097 {
1098     ANTLR3_UINT32	len;
1099     pANTLR3_UINT16	in;
1100 
1101     if	(point >= string->len)
1102     {
1103 		return	string->append(string, newbit);
1104     }
1105 
1106     /** First, determine the length of the input string
1107      */
1108     in	    = (pANTLR3_UINT16)newbit;
1109     len	    = 0;
1110 
1111     while   (*in++ != '\0')
1112     {
1113 		len++;
1114     }
1115 
1116     if	(len == 0)
1117     {
1118 		return	string->chars;
1119     }
1120 
1121     if	(string->size < (string->len + len + 1))
1122     {
1123 		string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len + len + 1)));
1124 		string->size	= string->len + len + 1;
1125     }
1126 
1127     /* Move the characters we are inserting before, including the delimiter
1128      */
1129     ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point + len), (void *)(((pANTLR3_UINT16)string->chars) + point), (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len - point + 1)));
1130 
1131 
1132     /* Note we copy the exact number of characters
1133      */
1134     ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point), newbit, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len)));
1135 
1136     string->len += len;
1137 
1138     return  string->chars;
1139 }
1140 
setS(pANTLR3_STRING string,pANTLR3_STRING chars)1141 static    pANTLR3_UINT8	    setS	(pANTLR3_STRING string, pANTLR3_STRING chars)
1142 {
1143     return  string->set(string, (const char *)(chars->chars));
1144 }
1145 
appendS(pANTLR3_STRING string,pANTLR3_STRING newbit)1146 static    pANTLR3_UINT8	    appendS	(pANTLR3_STRING string, pANTLR3_STRING newbit)
1147 {
1148     /* We may be passed an empty string, in which case we just return the current pointer
1149      */
1150     if	(newbit == NULL || newbit->len == 0 || newbit->size == 0 || newbit->chars == NULL)
1151     {
1152 		return	string->chars;
1153     }
1154     else
1155     {
1156 		return  string->append(string, (const char *)(newbit->chars));
1157     }
1158 }
1159 
insertS(pANTLR3_STRING string,ANTLR3_UINT32 point,pANTLR3_STRING newbit)1160 static	  pANTLR3_UINT8	    insertS	(pANTLR3_STRING string, ANTLR3_UINT32 point, pANTLR3_STRING newbit)
1161 {
1162     return  string->insert(string, point, (const char *)(newbit->chars));
1163 }
1164 
1165 /* Function that compares the text of a string to the supplied
1166  * 8 bit character string and returns a result a la strcmp()
1167  */
1168 static ANTLR3_UINT32
compare8(pANTLR3_STRING string,const char * compStr)1169 compare8	(pANTLR3_STRING string, const char * compStr)
1170 {
1171     return  strcmp((const char *)(string->chars), compStr);
1172 }
1173 
1174 /* Function that compares the text of a string with the supplied character string
1175  * (which is assumed to be in the same encoding as the string itself) and returns a result
1176  * a la strcmp()
1177  */
1178 static ANTLR3_UINT32
compareUTF16_8(pANTLR3_STRING string,const char * compStr)1179 compareUTF16_8	(pANTLR3_STRING string, const char * compStr)
1180 {
1181     pANTLR3_UINT16  ourString;
1182     ANTLR3_UINT32   charDiff;
1183 
1184     ourString	= (pANTLR3_UINT16)(string->chars);
1185 
1186     while   (((ANTLR3_UCHAR)(*ourString) != '\0') && ((ANTLR3_UCHAR)(*compStr) != '\0'))
1187     {
1188 		charDiff = *ourString - *compStr;
1189 		if  (charDiff != 0)
1190 		{
1191 			return charDiff;
1192 		}
1193 		ourString++;
1194 		compStr++;
1195     }
1196 
1197     /* At this point, one of the strings was terminated
1198      */
1199     return (ANTLR3_UINT32)((ANTLR3_UCHAR)(*ourString) - (ANTLR3_UCHAR)(*compStr));
1200 
1201 }
1202 
1203 /* Function that compares the text of a string with the supplied character string
1204  * (which is assumed to be in the same encoding as the string itself) and returns a result
1205  * a la strcmp()
1206  */
1207 static ANTLR3_UINT32
compareUTF16_UTF16(pANTLR3_STRING string,const char * compStr8)1208 compareUTF16_UTF16	(pANTLR3_STRING string, const char * compStr8)
1209 {
1210     pANTLR3_UINT16  ourString;
1211     pANTLR3_UINT16  compStr;
1212     ANTLR3_UINT32   charDiff;
1213 
1214     ourString	= (pANTLR3_UINT16)(string->chars);
1215     compStr	= (pANTLR3_UINT16)(compStr8);
1216 
1217     while   (((ANTLR3_UCHAR)(*ourString) != '\0') && ((ANTLR3_UCHAR)(*((pANTLR3_UINT16)compStr)) != '\0'))
1218     {
1219 		charDiff = *ourString - *compStr;
1220 		if  (charDiff != 0)
1221 		{
1222 			return charDiff;
1223 		}
1224 		ourString++;
1225 		compStr++;
1226     }
1227 
1228     /* At this point, one of the strings was terminated
1229      */
1230     return (ANTLR3_UINT32)((ANTLR3_UCHAR)(*ourString) - (ANTLR3_UCHAR)(*compStr));
1231 }
1232 
1233 /* Function that compares the text of a string with the supplied string
1234  * (which is assumed to be in the same encoding as the string itself) and returns a result
1235  * a la strcmp()
1236  */
1237 static ANTLR3_UINT32
compareS(pANTLR3_STRING string,pANTLR3_STRING compStr)1238 compareS    (pANTLR3_STRING string, pANTLR3_STRING compStr)
1239 {
1240     return  string->compare(string, (const char *)compStr->chars);
1241 }
1242 
1243 
1244 /* Function that returns the character indexed at the supplied
1245  * offset as a 32 bit character.
1246  */
1247 static ANTLR3_UCHAR
charAt8(pANTLR3_STRING string,ANTLR3_UINT32 offset)1248 charAt8	    (pANTLR3_STRING string, ANTLR3_UINT32 offset)
1249 {
1250     if	(offset > string->len)
1251     {
1252 		return (ANTLR3_UCHAR)'\0';
1253     }
1254     else
1255     {
1256 		return  (ANTLR3_UCHAR)(*(string->chars + offset));
1257     }
1258 }
1259 
1260 /* Function that returns the character indexed at the supplied
1261  * offset as a 32 bit character.
1262  */
1263 static ANTLR3_UCHAR
charAtUTF16(pANTLR3_STRING string,ANTLR3_UINT32 offset)1264 charAtUTF16    (pANTLR3_STRING string, ANTLR3_UINT32 offset)
1265 {
1266     if	(offset > string->len)
1267     {
1268 		return (ANTLR3_UCHAR)'\0';
1269     }
1270     else
1271     {
1272 		return  (ANTLR3_UCHAR)(*((pANTLR3_UINT16)(string->chars) + offset));
1273     }
1274 }
1275 
1276 /* Function that returns a substring of the supplied string a la .subString(s,e)
1277  * in java runtimes.
1278  */
1279 static pANTLR3_STRING
subString8(pANTLR3_STRING string,ANTLR3_UINT32 startIndex,ANTLR3_UINT32 endIndex)1280 subString8   (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex)
1281 {
1282     pANTLR3_STRING newStr;
1283 
1284     if	(endIndex > string->len)
1285     {
1286 		endIndex = string->len + 1;
1287     }
1288     newStr  = string->factory->newPtr(string->factory, string->chars + startIndex, endIndex - startIndex);
1289 
1290     return newStr;
1291 }
1292 
1293 /* Returns a substring of the supplied string a la .subString(s,e)
1294  * in java runtimes.
1295  */
1296 static pANTLR3_STRING
subStringUTF16(pANTLR3_STRING string,ANTLR3_UINT32 startIndex,ANTLR3_UINT32 endIndex)1297 subStringUTF16  (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex)
1298 {
1299     pANTLR3_STRING newStr;
1300 
1301     if	(endIndex > string->len)
1302     {
1303 		endIndex = string->len + 1;
1304     }
1305     newStr  = string->factory->newPtr(string->factory, (pANTLR3_UINT8)((pANTLR3_UINT16)(string->chars) + startIndex), endIndex - startIndex);
1306 
1307     return newStr;
1308 }
1309 
1310 /* Function that can convert the characters in the string to an integer
1311  */
1312 static ANTLR3_INT32
toInt32_8(struct ANTLR3_STRING_struct * string)1313 toInt32_8	    (struct ANTLR3_STRING_struct * string)
1314 {
1315     return  atoi((const char *)(string->chars));
1316 }
1317 
1318 /* Function that can convert the characters in the string to an integer
1319  */
1320 static ANTLR3_INT32
toInt32_UTF16(struct ANTLR3_STRING_struct * string)1321 toInt32_UTF16       (struct ANTLR3_STRING_struct * string)
1322 {
1323     pANTLR3_UINT16  input;
1324     ANTLR3_INT32   value;
1325     ANTLR3_BOOLEAN  negate;
1326 
1327     value   = 0;
1328     input   = (pANTLR3_UINT16)(string->chars);
1329     negate  = ANTLR3_FALSE;
1330 
1331     if	(*input == (ANTLR3_UCHAR)'-')
1332     {
1333 		negate = ANTLR3_TRUE;
1334 		input++;
1335     }
1336     else if (*input == (ANTLR3_UCHAR)'+')
1337     {
1338 		input++;
1339     }
1340 
1341     while   (*input != '\0' && isdigit(*input))
1342     {
1343 		value	 = value * 10;
1344 		value	+= ((ANTLR3_UINT32)(*input) - (ANTLR3_UINT32)'0');
1345 		input++;
1346     }
1347 
1348     return negate ? -value : value;
1349 }
1350 
1351 /* Function that returns a pointer to an 8 bit version of the string,
1352  * which in this case is just the string as this is
1353  * 8 bit encodiing anyway.
1354  */
to8_8(pANTLR3_STRING string)1355 static	  pANTLR3_STRING	    to8_8	(pANTLR3_STRING string)
1356 {
1357     return  string;
1358 }
1359 
1360 /* Function that returns an 8 bit version of the string,
1361  * which in this case is returning all the UTF16 characters
1362  * narrowed back into 8 bits, with characters that are too large
1363  * replaced with '_'
1364  */
to8_UTF16(pANTLR3_STRING string)1365 static	  pANTLR3_STRING    to8_UTF16	(pANTLR3_STRING string)
1366 {
1367 	pANTLR3_STRING  newStr;
1368 	ANTLR3_UINT32   i;
1369 
1370 	/* Create a new 8 bit string
1371 	*/
1372 	newStr  = newRaw8(string->factory);
1373 
1374 	if	(newStr == NULL)
1375 	{
1376 		return	NULL;
1377 	}
1378 
1379 	/* Always add one more byte for a terminator
1380 	*/
1381 	newStr->chars   = (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(string->len + 1));
1382 	newStr->size    = string->len + 1;
1383 	newStr->len	    = string->len;
1384 
1385 	/* Now copy each UTF16 charActer , making it an 8 bit character of
1386 	* some sort.
1387 	*/
1388 	for	(i=0; i<string->len; i++)
1389 	{
1390 		ANTLR3_UCHAR	c;
1391 
1392 		c = *(((pANTLR3_UINT16)(string->chars)) + i);
1393 
1394 		*(newStr->chars + i) = (ANTLR3_UINT8)(c > 255 ? '_' : c);
1395 	}
1396 
1397 	/* Terminate
1398 	*/
1399 	*(newStr->chars + newStr->len) = '\0';
1400 
1401 	return newStr;
1402 }
1403