1 /*
2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3 %                                                                             %
4 %                                                                             %
5 %                                                                             %
6 %                    TTTTT   OOO   K   K  EEEEE  N   N                        %
7 %                      T    O   O  K  K   E      NN  N                        %
8 %                      T    O   O  KKK    EEE    N N N                        %
9 %                      T    O   O  K  K   E      N  NN                        %
10 %                      T     OOO   K   K  EEEEE  N   N                        %
11 %                                                                             %
12 %                                                                             %
13 %                         MagickCore Token Methods                            %
14 %                                                                             %
15 %                             Software Design                                 %
16 %                                  Cristy                                     %
17 %                              January 1993                                   %
18 %                                                                             %
19 %                                                                             %
20 %  Copyright 1999-2019 ImageMagick Studio LLC, a non-profit organization      %
21 %  dedicated to making software imaging solutions freely available.           %
22 %                                                                             %
23 %  You may not use this file except in compliance with the License.  You may  %
24 %  obtain a copy of the License at                                            %
25 %                                                                             %
26 %    https://imagemagick.org/script/license.php                               %
27 %                                                                             %
28 %  Unless required by applicable law or agreed to in writing, software        %
29 %  distributed under the License is distributed on an "AS IS" BASIS,          %
30 %  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   %
31 %  See the License for the specific language governing permissions and        %
32 %  limitations under the License.                                             %
33 %                                                                             %
34 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
35 %
36 %
37 %
38 */
39 
40 /*
41   Include declarations.
42 */
43 #include "MagickCore/studio.h"
44 #include "MagickCore/exception.h"
45 #include "MagickCore/exception-private.h"
46 #include "MagickCore/image.h"
47 #include "MagickCore/image-private.h"
48 #include "MagickCore/memory_.h"
49 #include "MagickCore/memory-private.h"
50 #include "MagickCore/string_.h"
51 #include "MagickCore/string-private.h"
52 #include "MagickCore/token.h"
53 #include "MagickCore/token-private.h"
54 #include "MagickCore/utility.h"
55 #include "MagickCore/utility-private.h"
56 
57 /*
58   Typedef declaractions.
59 */
60 struct _TokenInfo
61 {
62   int
63     state;
64 
65   MagickStatusType
66     flag;
67 
68   ssize_t
69     offset;
70 
71   char
72     quote;
73 
74   size_t
75     signature;
76 };
77 
78 /*
79 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
80 %                                                                             %
81 %                                                                             %
82 %                                                                             %
83 %   A c q u i r e T o k e n I n f o                                           %
84 %                                                                             %
85 %                                                                             %
86 %                                                                             %
87 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
88 %
89 %  AcquireTokenInfo() allocates the TokenInfo structure.
90 %
91 %  The format of the AcquireTokenInfo method is:
92 %
93 %      TokenInfo *AcquireTokenInfo()
94 %
95 */
AcquireTokenInfo(void)96 MagickExport TokenInfo *AcquireTokenInfo(void)
97 {
98   TokenInfo
99     *token_info;
100 
101   token_info=(TokenInfo *) AcquireCriticalMemory(sizeof(*token_info));
102   token_info->signature=MagickCoreSignature;
103   return(token_info);
104 }
105 
106 /*
107 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
108 %                                                                             %
109 %                                                                             %
110 %                                                                             %
111 %   D e s t r o y T o k e n I n f o                                           %
112 %                                                                             %
113 %                                                                             %
114 %                                                                             %
115 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
116 %
117 %  DestroyTokenInfo() deallocates memory associated with an TokenInfo
118 %  structure.
119 %
120 %  The format of the DestroyTokenInfo method is:
121 %
122 %      TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
123 %
124 %  A description of each parameter follows:
125 %
126 %    o token_info: Specifies a pointer to an TokenInfo structure.
127 %
128 */
DestroyTokenInfo(TokenInfo * token_info)129 MagickExport TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
130 {
131   (void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
132   assert(token_info != (TokenInfo *) NULL);
133   assert(token_info->signature == MagickCoreSignature);
134   token_info->signature=(~MagickCoreSignature);
135   token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
136   return(token_info);
137 }
138 
139 /*
140 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
141 %                                                                             %
142 %                                                                             %
143 %                                                                             %
144 +   G e t N e x t T o k e n                                                   %
145 %                                                                             %
146 %                                                                             %
147 %                                                                             %
148 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
149 %
150 %  GetNextToken() gets a token from the token stream.  A token is defined as
151 %  a sequence of characters delimited by whitespace (e.g. clip-path), a
152 %  sequence delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
153 %  parenthesis (e.g. rgb(0,0,0)).  GetNextToken() also recognizes these
154 %  separator characters: ':', '=', ',', and ';'.
155 %
156 %  The format of the GetNextToken method is:
157 %
158 %      void GetNextToken(const char *start,const char **end,
159 %        const size_t extent,char *token)
160 %
161 %  A description of each parameter follows:
162 %
163 %    o start: the start of the token sequence.
164 %
165 %    o end: point to the end of the token sequence.
166 %
167 %    o extent: maximum extent of the token.
168 %
169 %    o token: copy the token to this buffer.
170 %
171 */
GetNextToken(const char * start,const char ** end,const size_t extent,char * token)172 MagickExport void GetNextToken(const char *start,const char **end,
173   const size_t extent,char *token)
174 {
175   double
176     value;
177 
178   register char
179     *q;
180 
181   register const char
182     *p;
183 
184   register ssize_t
185     i;
186 
187   assert(start != (const char *) NULL);
188   assert(token != (char *) NULL);
189   i=0;
190   p=start;
191   while ((isspace((int) ((unsigned char) *p)) != 0) && (*p != '\0'))
192     p++;
193   switch (*p)
194   {
195     case '\0':
196       break;
197     case '"':
198     case '\'':
199     case '`':
200     case '{':
201     {
202       register char
203         escape;
204 
205       switch (*p)
206       {
207         case '"': escape='"'; break;
208         case '\'': escape='\''; break;
209         case '`': escape='\''; break;
210         case '{': escape='}'; break;
211         default: escape=(*p); break;
212       }
213       for (p++; *p != '\0'; p++)
214       {
215         if ((*p == '\\') && ((*(p+1) == escape) || (*(p+1) == '\\')))
216           p++;
217         else
218           if (*p == escape)
219             {
220               p++;
221               break;
222             }
223         if (i < (ssize_t) (extent-1))
224           token[i++]=(*p);
225         if ((size_t) (p-start) >= (extent-1))
226           break;
227       }
228       break;
229     }
230     case '/':
231     {
232       if (i < (ssize_t) (extent-1))
233         token[i++]=(*p);
234       p++;
235       if ((*p == '>') || (*p == '/'))
236         {
237           if (i < (ssize_t) (extent-1))
238             token[i++]=(*p);
239           p++;
240         }
241       break;
242     }
243     default:
244     {
245       char
246         *q;
247 
248       value=StringToDouble(p,&q);
249       (void) value;
250       if ((p != q) && (*p != ','))
251         {
252           for ( ; (p < q) && (*p != ','); p++)
253           {
254             if (i < (ssize_t) (extent-1))
255               token[i++]=(*p);
256             if ((size_t) (p-start) >= (extent-1))
257               break;
258           }
259           if (*p == '%')
260             {
261               if (i < (ssize_t) (extent-1))
262                 token[i++]=(*p);
263               p++;
264             }
265           break;
266         }
267       if ((*p != '\0') && (isalpha((int) ((unsigned char) *p)) == 0) &&
268           (*p != *DirectorySeparator) && (*p != '#') && (*p != '<'))
269         {
270           if (i < (ssize_t) (extent-1))
271             token[i++]=(*p);
272           p++;
273           break;
274         }
275       for ( ; *p != '\0'; p++)
276       {
277         if (((isspace((int) ((unsigned char) *p)) != 0) || (*p == '=') ||
278             (*p == ',') || (*p == ':') || (*p == ';')) && (*(p-1) != '\\'))
279           break;
280         if ((i > 0) && (*p == '<'))
281           break;
282         if (i < (ssize_t) (extent-1))
283           token[i++]=(*p);
284         if (*p == '>')
285           break;
286         if (*p == '(')
287           {
288             for (p++; *p != '\0'; p++)
289             {
290               if (i < (ssize_t) (extent-1))
291                 token[i++]=(*p);
292               if ((*p == ')') && (*(p-1) != '\\'))
293                 break;
294               if ((size_t) (p-start) >= (extent-1))
295                 break;
296             }
297             if (*p == '\0')
298               break;
299           }
300         if ((size_t) (p-start) >= (extent-1))
301           break;
302       }
303       break;
304     }
305   }
306   token[i]='\0';
307   q=strrchr(token,')');
308   if ((LocaleNCompare(token,"url(#",5) == 0) && (q != (char *) NULL))
309     {
310       *q='\0';
311       (void) memmove(token,token+5,(size_t) (q-token-4));
312     }
313   while (isspace((int) ((unsigned char) *p)) != 0)
314     p++;
315   if (end != (const char **) NULL)
316     *end=(const char *) p;
317 }
318 
319 /*
320 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
321 %                                                                             %
322 %                                                                             %
323 %                                                                             %
324 %   G l o b E x p r e s s i o n                                               %
325 %                                                                             %
326 %                                                                             %
327 %                                                                             %
328 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
329 %
330 %  GlobExpression() returns MagickTrue if the expression matches the pattern.
331 %
332 %  The format of the GlobExpression function is:
333 %
334 %      MagickBooleanType GlobExpression(const char *expression,
335 %        const char *pattern,const MagickBooleanType case_insensitive)
336 %
337 %  A description of each parameter follows:
338 %
339 %    o expression: Specifies a pointer to a text string containing a file name.
340 %
341 %    o pattern: Specifies a pointer to a text string containing a pattern.
342 %
343 %    o case_insensitive: set to MagickTrue to ignore the case when matching
344 %      an expression.
345 %
346 */
GlobExpression(const char * expression,const char * pattern,const MagickBooleanType case_insensitive)347 MagickExport MagickBooleanType GlobExpression(const char *expression,
348   const char *pattern,const MagickBooleanType case_insensitive)
349 {
350   MagickBooleanType
351     done,
352     match;
353 
354   register const char
355     *p;
356 
357   /*
358     Return on empty pattern or '*'.
359   */
360   if (pattern == (char *) NULL)
361     return(MagickTrue);
362   if (GetUTFCode(pattern) == 0)
363     return(MagickTrue);
364   if (LocaleCompare(pattern,"*") == 0)
365     return(MagickTrue);
366   p=pattern+strlen(pattern)-1;
367   if ((GetUTFCode(p) == ']') && (strchr(pattern,'[') != (char *) NULL))
368     {
369       ExceptionInfo
370         *exception;
371 
372       ImageInfo
373         *image_info;
374 
375       /*
376         Determine if pattern is a scene, i.e. img0001.pcd[2].
377       */
378       image_info=AcquireImageInfo();
379       (void) CopyMagickString(image_info->filename,pattern,MagickPathExtent);
380       exception=AcquireExceptionInfo();
381       (void) SetImageInfo(image_info,0,exception);
382       exception=DestroyExceptionInfo(exception);
383       if (LocaleCompare(image_info->filename,pattern) != 0)
384         {
385           image_info=DestroyImageInfo(image_info);
386           return(MagickFalse);
387         }
388       image_info=DestroyImageInfo(image_info);
389     }
390   /*
391     Evaluate glob expression.
392   */
393   done=MagickFalse;
394   while ((GetUTFCode(pattern) != 0) && (done == MagickFalse))
395   {
396     if (GetUTFCode(expression) == 0)
397       if ((GetUTFCode(pattern) != '{') && (GetUTFCode(pattern) != '*'))
398         break;
399     switch (GetUTFCode(pattern))
400     {
401       case '*':
402       {
403         MagickBooleanType
404           status;
405 
406         status=MagickFalse;
407         while (GetUTFCode(pattern) == '*')
408           pattern+=GetUTFOctets(pattern);
409         while ((GetUTFCode(expression) != 0) && (status == MagickFalse))
410         {
411           status=GlobExpression(expression,pattern,case_insensitive);
412           expression+=GetUTFOctets(expression);
413         }
414         if (status != MagickFalse)
415           {
416             while (GetUTFCode(expression) != 0)
417               expression+=GetUTFOctets(expression);
418             while (GetUTFCode(pattern) != 0)
419               pattern+=GetUTFOctets(pattern);
420           }
421         break;
422       }
423       case '[':
424       {
425         int
426           c;
427 
428         pattern+=GetUTFOctets(pattern);
429         for ( ; ; )
430         {
431           if ((GetUTFCode(pattern) == 0) || (GetUTFCode(pattern) == ']'))
432             {
433               done=MagickTrue;
434               break;
435             }
436           if (GetUTFCode(pattern) == '\\')
437             {
438               pattern+=GetUTFOctets(pattern);
439               if (GetUTFCode(pattern) == 0)
440                 {
441                   done=MagickTrue;
442                   break;
443                 }
444              }
445           if (GetUTFCode(pattern+GetUTFOctets(pattern)) == '-')
446             {
447               c=GetUTFCode(pattern);
448               pattern+=GetUTFOctets(pattern);
449               pattern+=GetUTFOctets(pattern);
450               if (GetUTFCode(pattern) == ']')
451                 {
452                   done=MagickTrue;
453                   break;
454                 }
455               if (GetUTFCode(pattern) == '\\')
456                 {
457                   pattern+=GetUTFOctets(pattern);
458                   if (GetUTFCode(pattern) == 0)
459                     {
460                       done=MagickTrue;
461                       break;
462                     }
463                 }
464               if ((GetUTFCode(expression) < c) ||
465                   (GetUTFCode(expression) > GetUTFCode(pattern)))
466                 {
467                   pattern+=GetUTFOctets(pattern);
468                   continue;
469                 }
470             }
471           else
472             if (GetUTFCode(pattern) != GetUTFCode(expression))
473               {
474                 pattern+=GetUTFOctets(pattern);
475                 continue;
476               }
477           pattern+=GetUTFOctets(pattern);
478           while ((GetUTFCode(pattern) != ']') && (GetUTFCode(pattern) != 0))
479           {
480             if ((GetUTFCode(pattern) == '\\') &&
481                 (GetUTFCode(pattern+GetUTFOctets(pattern)) > 0))
482               pattern+=GetUTFOctets(pattern);
483             pattern+=GetUTFOctets(pattern);
484           }
485           if (GetUTFCode(pattern) != 0)
486             {
487               pattern+=GetUTFOctets(pattern);
488               expression+=GetUTFOctets(expression);
489             }
490           break;
491         }
492         break;
493       }
494       case '?':
495       {
496         pattern+=GetUTFOctets(pattern);
497         expression+=GetUTFOctets(expression);
498         break;
499       }
500       case '{':
501       {
502         char
503           *target;
504 
505         register char
506           *p;
507 
508         target=AcquireString(pattern);
509         p=target;
510         pattern++;
511         while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
512         {
513           *p++=(*pattern++);
514           if ((GetUTFCode(pattern) == ',') || (GetUTFCode(pattern) == '}'))
515             {
516               *p='\0';
517               match=GlobExpression(expression,target,case_insensitive);
518               if (match != MagickFalse)
519                 {
520                   expression+=MagickMin(strlen(expression),strlen(target));
521                   break;
522                 }
523               p=target;
524               pattern+=GetUTFOctets(pattern);
525             }
526         }
527         while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
528           pattern+=GetUTFOctets(pattern);
529         if (GetUTFCode(pattern) != 0)
530           pattern+=GetUTFOctets(pattern);
531         target=DestroyString(target);
532         break;
533       }
534       case '\\':
535       {
536         pattern+=GetUTFOctets(pattern);
537         if (GetUTFCode(pattern) == 0)
538           break;
539       }
540       default:
541       {
542         if (case_insensitive != MagickFalse)
543           {
544             if (LocaleLowercase((int) GetUTFCode(expression)) != LocaleLowercase((int) GetUTFCode(pattern)))
545               {
546                 done=MagickTrue;
547                 break;
548               }
549           }
550         else
551           if (GetUTFCode(expression) != GetUTFCode(pattern))
552             {
553               done=MagickTrue;
554               break;
555             }
556         expression+=GetUTFOctets(expression);
557         pattern+=GetUTFOctets(pattern);
558       }
559     }
560   }
561   while (GetUTFCode(pattern) == '*')
562     pattern+=GetUTFOctets(pattern);
563   match=(GetUTFCode(expression) == 0) && (GetUTFCode(pattern) == 0) ?
564     MagickTrue : MagickFalse;
565   return(match);
566 }
567 
568 /*
569 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
570 %                                                                             %
571 %                                                                             %
572 %                                                                             %
573 +     I s G l o b                                                             %
574 %                                                                             %
575 %                                                                             %
576 %                                                                             %
577 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
578 %
579 %  IsGlob() returns MagickTrue if the path specification contains a globbing
580 %  pattern.
581 %
582 %  The format of the IsGlob method is:
583 %
584 %      MagickBooleanType IsGlob(const char *geometry)
585 %
586 %  A description of each parameter follows:
587 %
588 %    o path: the path.
589 %
590 */
IsGlob(const char * path)591 MagickPrivate MagickBooleanType IsGlob(const char *path)
592 {
593   MagickBooleanType
594     status = MagickFalse;
595 
596   register const char
597     *p;
598 
599   if (IsPathAccessible(path) != MagickFalse)
600     return(MagickFalse);
601   for (p=path; *p != '\0'; p++)
602   {
603     switch (*p)
604     {
605       case '*':
606       case '?':
607       case '{':
608       case '}':
609       case '[':
610       case ']':
611       {
612         status=MagickTrue;
613         break;
614       }
615       default:
616         break;
617     }
618   }
619   return(status);
620 }
621 
622 /*
623 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
624 %                                                                             %
625 %                                                                             %
626 %                                                                             %
627 %   T o k e n i z e r                                                         %
628 %                                                                             %
629 %                                                                             %
630 %                                                                             %
631 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
632 %
633 %  Tokenizer() is a generalized, finite state token parser.  It extracts tokens
634 %  one at a time from a string of characters.  The characters used for white
635 %  space, for break characters, and for quotes can be specified.  Also,
636 %  characters in the string can be preceded by a specifiable escape character
637 %  which removes any special meaning the character may have.
638 %
639 %  Here is some terminology:
640 %
641 %    o token: A single unit of information in the form of a group of
642 %      characters.
643 %
644 %    o white space: Apace that gets ignored (except within quotes or when
645 %      escaped), like blanks and tabs. in addition, white space terminates a
646 %      non-quoted token.
647 %
648 %    o break set: One or more characters that separates non-quoted tokens.
649 %      Commas are a common break character. The usage of break characters to
650 %      signal the end of a token is the same as that of white space, except
651 %      multiple break characters with nothing or only white space between
652 %      generate a null token for each two break characters together.
653 %
654 %      For example, if blank is set to be the white space and comma is set to
655 %      be the break character, the line
656 %
657 %        A, B, C ,  , DEF
658 %
659 %        ... consists of 5 tokens:
660 %
661 %        1)  "A"
662 %        2)  "B"
663 %        3)  "C"
664 %        4)  "" (the null string)
665 %        5)  "DEF"
666 %
667 %    o Quote character: A character that, when surrounding a group of other
668 %      characters, causes the group of characters to be treated as a single
669 %      token, no matter how many white spaces or break characters exist in
670 %      the group. Also, a token always terminates after the closing quote.
671 %      For example, if ' is the quote character, blank is white space, and
672 %      comma is the break character, the following string
673 %
674 %        A, ' B, CD'EF GHI
675 %
676 %        ... consists of 4 tokens:
677 %
678 %        1)  "A"
679 %        2)  " B, CD" (note the blanks & comma)
680 %        3)  "EF"
681 %        4)  "GHI"
682 %
683 %      The quote characters themselves do not appear in the resultant
684 %      tokens.  The double quotes are delimiters i use here for
685 %      documentation purposes only.
686 %
687 %    o Escape character: A character which itself is ignored but which
688 %      causes the next character to be used as is.  ^ and \ are often used
689 %      as escape characters. An escape in the last position of the string
690 %      gets treated as a "normal" (i.e., non-quote, non-white, non-break,
691 %      and non-escape) character. For example, assume white space, break
692 %      character, and quote are the same as in the above examples, and
693 %      further, assume that ^ is the escape character. Then, in the string
694 %
695 %        ABC, ' DEF ^' GH' I ^ J K^ L ^
696 %
697 %        ... there are 7 tokens:
698 %
699 %        1)  "ABC"
700 %        2)  " DEF ' GH"
701 %        3)  "I"
702 %        4)  " "     (a lone blank)
703 %        5)  "J"
704 %        6)  "K L"
705 %        7)  "^"     (passed as is at end of line)
706 %
707 %  The format of the Tokenizer method is:
708 %
709 %      int Tokenizer(TokenInfo *token_info,const unsigned flag,char *token,
710 %        const size_t max_token_length,const char *line,const char *white,
711 %        const char *break_set,const char *quote,const char escape,
712 %        char *breaker,int *next,char *quoted)
713 %
714 %  A description of each parameter follows:
715 %
716 %    o flag: right now, only the low order 3 bits are used.
717 %
718 %        1 => convert non-quoted tokens to upper case
719 %        2 => convert non-quoted tokens to lower case
720 %        0 => do not convert non-quoted tokens
721 %
722 %    o token: a character string containing the returned next token
723 %
724 %    o max_token_length: the maximum size of "token".  Characters beyond
725 %      "max_token_length" are truncated.
726 %
727 %    o string: the string to be parsed.
728 %
729 %    o white: a string of the valid white spaces.  example:
730 %
731 %        char whitesp[]={" \t"};
732 %
733 %      blank and tab will be valid white space.
734 %
735 %    o break: a string of the valid break characters. example:
736 %
737 %        char breakch[]={";,"};
738 %
739 %      semicolon and comma will be valid break characters.
740 %
741 %    o quote: a string of the valid quote characters. An example would be
742 %
743 %        char whitesp[]={"'\"");
744 %
745 %      (this causes single and double quotes to be valid) Note that a
746 %      token starting with one of these characters needs the same quote
747 %      character to terminate it.
748 %
749 %      for example:
750 %
751 %        "ABC '
752 %
753 %      is unterminated, but
754 %
755 %        "DEF" and 'GHI'
756 %
757 %      are properly terminated.  Note that different quote characters
758 %      can appear on the same line; only for a given token do the quote
759 %      characters have to be the same.
760 %
761 %    o escape: the escape character (NOT a string ... only one
762 %      allowed). Use zero if none is desired.
763 %
764 %    o breaker: the break character used to terminate the current
765 %      token.  If the token was quoted, this will be the quote used.  If
766 %      the token is the last one on the line, this will be zero.
767 %
768 %    o next: this variable points to the first character of the
769 %      next token.  it gets reset by "tokenizer" as it steps through the
770 %      string.  Set it to 0 upon initialization, and leave it alone
771 %      after that.  You can change it if you want to jump around in the
772 %      string or re-parse from the beginning, but be careful.
773 %
774 %    o quoted: set to True if the token was quoted and MagickFalse
775 %      if not.  You may need this information (for example:  in C, a
776 %      string with quotes around it is a character string, while one
777 %      without is an identifier).
778 %
779 %    o result: 0 if we haven't reached EOS (end of string), and 1
780 %      if we have.
781 %
782 */
783 
784 #define IN_WHITE 0
785 #define IN_TOKEN 1
786 #define IN_QUOTE 2
787 #define IN_OZONE 3
788 
sindex(int c,const char * string)789 static ssize_t sindex(int c,const char *string)
790 {
791   register const char
792     *p;
793 
794   for (p=string; *p != '\0'; p++)
795     if (c == (int) (*p))
796       return((ssize_t) (p-string));
797   return(-1);
798 }
799 
StoreToken(TokenInfo * token_info,char * string,size_t max_token_length,int c)800 static void StoreToken(TokenInfo *token_info,char *string,
801   size_t max_token_length,int c)
802 {
803   register ssize_t
804     i;
805 
806   if ((token_info->offset < 0) ||
807       ((size_t) token_info->offset >= (max_token_length-1)))
808     return;
809   i=token_info->offset++;
810   string[i]=(char) c;
811   if (token_info->state == IN_QUOTE)
812     return;
813   switch (token_info->flag & 0x03)
814   {
815     case 1:
816     {
817       string[i]=(char) LocaleUppercase(c);
818       break;
819     }
820     case 2:
821     {
822       string[i]=(char) LocaleLowercase(c);
823       break;
824     }
825     default:
826       break;
827   }
828 }
829 
Tokenizer(TokenInfo * token_info,const unsigned flag,char * token,const size_t max_token_length,const char * line,const char * white,const char * break_set,const char * quote,const char escape,char * breaker,int * next,char * quoted)830 MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
831   char *token,const size_t max_token_length,const char *line,const char *white,
832   const char *break_set,const char *quote,const char escape,char *breaker,
833   int *next,char *quoted)
834 {
835   int
836     c;
837 
838   register ssize_t
839     i;
840 
841   *breaker='\0';
842   *quoted='\0';
843   if (line[*next] == '\0')
844     return(1);
845   token_info->state=IN_WHITE;
846   token_info->quote=(char) MagickFalse;
847   token_info->flag=flag;
848   for (token_info->offset=0; (int) line[*next] != 0; (*next)++)
849   {
850     c=(int) line[*next];
851     i=sindex(c,break_set);
852     if (i >= 0)
853       {
854         switch (token_info->state)
855         {
856           case IN_WHITE:
857           case IN_TOKEN:
858           case IN_OZONE:
859           {
860             (*next)++;
861             *breaker=break_set[i];
862             token[token_info->offset]='\0';
863             return(0);
864           }
865           case IN_QUOTE:
866           {
867             StoreToken(token_info,token,max_token_length,c);
868             break;
869           }
870         }
871         continue;
872       }
873     i=sindex(c,quote);
874     if (i >= 0)
875       {
876         switch (token_info->state)
877         {
878           case IN_WHITE:
879           {
880             token_info->state=IN_QUOTE;
881             token_info->quote=quote[i];
882             *quoted=(char) MagickTrue;
883             break;
884           }
885           case IN_QUOTE:
886           {
887             if (quote[i] != token_info->quote)
888               StoreToken(token_info,token,max_token_length,c);
889             else
890               {
891                 token_info->state=IN_OZONE;
892                 token_info->quote='\0';
893               }
894             break;
895           }
896           case IN_TOKEN:
897           case IN_OZONE:
898           {
899             *breaker=(char) c;
900             token[token_info->offset]='\0';
901             return(0);
902           }
903         }
904         continue;
905       }
906     i=sindex(c,white);
907     if (i >= 0)
908       {
909         switch (token_info->state)
910         {
911           case IN_WHITE:
912           case IN_OZONE:
913             break;
914           case IN_TOKEN:
915           {
916             token_info->state=IN_OZONE;
917             break;
918           }
919           case IN_QUOTE:
920           {
921             StoreToken(token_info,token,max_token_length,c);
922             break;
923           }
924         }
925         continue;
926       }
927     if (c == (int) escape)
928       {
929         if (line[(*next)+1] == '\0')
930           {
931             *breaker='\0';
932             StoreToken(token_info,token,max_token_length,c);
933             (*next)++;
934             token[token_info->offset]='\0';
935             return(0);
936           }
937         switch (token_info->state)
938         {
939           case IN_WHITE:
940           {
941             (*next)--;
942             token_info->state=IN_TOKEN;
943             break;
944           }
945           case IN_TOKEN:
946           case IN_QUOTE:
947           {
948             (*next)++;
949             c=(int) line[*next];
950             StoreToken(token_info,token,max_token_length,c);
951             break;
952           }
953           case IN_OZONE:
954           {
955             token[token_info->offset]='\0';
956             return(0);
957           }
958         }
959         continue;
960       }
961     switch (token_info->state)
962     {
963       case IN_WHITE:
964       {
965         token_info->state=IN_TOKEN;
966         StoreToken(token_info,token,max_token_length,c);
967         break;
968       }
969       case IN_TOKEN:
970       case IN_QUOTE:
971       {
972         StoreToken(token_info,token,max_token_length,c);
973         break;
974       }
975       case IN_OZONE:
976       {
977         token[token_info->offset]='\0';
978         return(0);
979       }
980     }
981   }
982   token[token_info->offset]='\0';
983   return(0);
984 }
985