1 /*
2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3 %                                                                             %
4 %                                                                             %
5 %                                                                             %
6 %                    TTTTT   OOO   K   K  EEEEE  N   N                        %
7 %                      T    O   O  K  K   E      NN  N                        %
8 %                      T    O   O  KKK    EEE    N N N                        %
9 %                      T    O   O  K  K   E      N  NN                        %
10 %                      T     OOO   K   K  EEEEE  N   N                        %
11 %                                                                             %
12 %                                                                             %
13 %                         MagickCore Token Methods                            %
14 %                                                                             %
15 %                             Software Design                                 %
16 %                                  Cristy                                     %
17 %                              January 1993                                   %
18 %                                                                             %
19 %                                                                             %
20 %  Copyright 1999-2016 ImageMagick Studio LLC, a non-profit organization      %
21 %  dedicated to making software imaging solutions freely available.           %
22 %                                                                             %
23 %  You may not use this file except in compliance with the License.  You may  %
24 %  obtain a copy of the License at                                            %
25 %                                                                             %
26 %    http://www.imagemagick.org/script/license.php                            %
27 %                                                                             %
28 %  Unless required by applicable law or agreed to in writing, software        %
29 %  distributed under the License is distributed on an "AS IS" BASIS,          %
30 %  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   %
31 %  See the License for the specific language governing permissions and        %
32 %  limitations under the License.                                             %
33 %                                                                             %
34 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
35 %
36 %
37 %
38 */
39 
40 /*
41   Include declarations.
42 */
43 #include "MagickCore/studio.h"
44 #include "MagickCore/exception.h"
45 #include "MagickCore/exception-private.h"
46 #include "MagickCore/image.h"
47 #include "MagickCore/memory_.h"
48 #include "MagickCore/string_.h"
49 #include "MagickCore/string-private.h"
50 #include "MagickCore/token.h"
51 #include "MagickCore/token-private.h"
52 #include "MagickCore/utility.h"
53 #include "MagickCore/utility-private.h"
54 
55 /*
56   Typedef declaractions.
57 */
58 struct _TokenInfo
59 {
60   int
61     state;
62 
63   MagickStatusType
64     flag;
65 
66   ssize_t
67     offset;
68 
69   char
70     quote;
71 
72   size_t
73     signature;
74 };
75 
76 /*
77 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
78 %                                                                             %
79 %                                                                             %
80 %                                                                             %
81 %   A c q u i r e T o k e n I n f o                                           %
82 %                                                                             %
83 %                                                                             %
84 %                                                                             %
85 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
86 %
87 %  AcquireTokenInfo() allocates the TokenInfo structure.
88 %
89 %  The format of the AcquireTokenInfo method is:
90 %
91 %      TokenInfo *AcquireTokenInfo()
92 %
93 */
AcquireTokenInfo(void)94 MagickExport TokenInfo *AcquireTokenInfo(void)
95 {
96   TokenInfo
97     *token_info;
98 
99   token_info=(TokenInfo *) AcquireMagickMemory(sizeof(*token_info));
100   if (token_info == (TokenInfo *) NULL)
101     ThrowFatalException(ResourceLimitFatalError,"MemoryAllocationFailed");
102   token_info->signature=MagickCoreSignature;
103   return(token_info);
104 }
105 
106 /*
107 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
108 %                                                                             %
109 %                                                                             %
110 %                                                                             %
111 %   D e s t r o y T o k e n I n f o                                           %
112 %                                                                             %
113 %                                                                             %
114 %                                                                             %
115 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
116 %
117 %  DestroyTokenInfo() deallocates memory associated with an TokenInfo
118 %  structure.
119 %
120 %  The format of the DestroyTokenInfo method is:
121 %
122 %      TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
123 %
124 %  A description of each parameter follows:
125 %
126 %    o token_info: Specifies a pointer to an TokenInfo structure.
127 %
128 */
DestroyTokenInfo(TokenInfo * token_info)129 MagickExport TokenInfo *DestroyTokenInfo(TokenInfo *token_info)
130 {
131   (void) LogMagickEvent(TraceEvent,GetMagickModule(),"...");
132   assert(token_info != (TokenInfo *) NULL);
133   assert(token_info->signature == MagickCoreSignature);
134   token_info->signature=(~MagickCoreSignature);
135   token_info=(TokenInfo *) RelinquishMagickMemory(token_info);
136   return(token_info);
137 }
138 
139 /*
140 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
141 %                                                                             %
142 %                                                                             %
143 %                                                                             %
144 +   G e t N e x t T o k e n                                                   %
145 %                                                                             %
146 %                                                                             %
147 %                                                                             %
148 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
149 %
150 %  GetNextToken() gets a token from the token stream.  A token is defined as
151 %  a sequence of characters delimited by whitespace (e.g. clip-path), a
152 %  sequence delimited with quotes (.e.g "Quote me"), or a sequence enclosed in
153 %  parenthesis (e.g. rgb(0,0,0)).  GetNextToken() also recognizes these
154 %  separator characters: ':', '=', ',', and ';'.
155 %
156 %  The format of the GetNextToken method is:
157 %
158 %      void GetNextToken(const char *start,const char **end,
159 %        const size_t extent,char *token)
160 %
161 %  A description of each parameter follows:
162 %
163 %    o start: the start of the token sequence.
164 %
165 %    o end: point to the end of the token sequence.
166 %
167 %    o extent: maximum extent of the token.
168 %
169 %    o token: copy the token to this buffer.
170 %
171 */
GetNextToken(const char * start,const char ** end,const size_t extent,char * token)172 MagickExport void GetNextToken(const char *start,const char **end,
173   const size_t extent,char *token)
174 {
175   double
176     value;
177 
178   register const char
179     *p;
180 
181   register ssize_t
182     i;
183 
184   assert(start != (const char *) NULL);
185   assert(token != (char *) NULL);
186   i=0;
187   p=start;
188   while ((isspace((int) ((unsigned char) *p)) != 0) && (*p != '\0'))
189     p++;
190   switch (*p)
191   {
192     case '\0':
193       break;
194     case '"':
195     case '\'':
196     case '`':
197     case '{':
198     {
199       register char
200         escape;
201 
202       switch (*p)
203       {
204         case '"': escape='"'; break;
205         case '\'': escape='\''; break;
206         case '`': escape='\''; break;
207         case '{': escape='}'; break;
208         default: escape=(*p); break;
209       }
210       for (p++; *p != '\0'; p++)
211       {
212         if ((*p == '\\') && ((*(p+1) == escape) || (*(p+1) == '\\')))
213           p++;
214         else
215           if (*p == escape)
216             {
217               p++;
218               break;
219             }
220         if (i < (ssize_t) (extent-1))
221           token[i++]=(*p);
222       }
223       break;
224     }
225     case '/':
226     {
227       if (i < (ssize_t) (extent-1))
228         token[i++]=(*p++);
229       if ((*p == '>') || (*p == '/'))
230         if (i < (ssize_t) (extent-1))
231           token[i++]=(*p++);
232       break;
233     }
234     default:
235     {
236       char
237         *q;
238 
239       value=StringToDouble(p,&q);
240       (void) value;
241       if ((p != q) && (*p != ','))
242         {
243           for ( ; (p < q) && (*p != ','); p++)
244             if (i < (ssize_t) (extent-1))
245               token[i++]=(*p);
246           if (*p == '%')
247             if (i < (ssize_t) (extent-1))
248               token[i++]=(*p++);
249           break;
250         }
251       if ((*p != '\0') && (isalpha((int) ((unsigned char) *p)) == 0) &&
252           (*p != *DirectorySeparator) && (*p != '#') && (*p != '<'))
253         {
254           if (i < (ssize_t) (extent-1))
255             token[i++]=(*p++);
256           break;
257         }
258       for ( ; *p != '\0'; p++)
259       {
260         if (((isspace((int) ((unsigned char) *p)) != 0) || (*p == '=') ||
261             (*p == ',') || (*p == ':') || (*p == ';')) && (*(p-1) != '\\'))
262           break;
263         if ((i > 0) && (*p == '<'))
264           break;
265         if (i < (ssize_t) (extent-1))
266           token[i++]=(*p);
267         if (*p == '>')
268           break;
269         if (*p == '(')
270           for (p++; *p != '\0'; p++)
271           {
272             if (i < (ssize_t) (extent-1))
273               token[i++]=(*p);
274             if ((*p == ')') && (*(p-1) != '\\'))
275               break;
276           }
277       }
278       break;
279     }
280   }
281   token[i]='\0';
282   if (LocaleNCompare(token,"url(",4) == 0)
283     {
284       ssize_t
285         offset;
286 
287       offset=4;
288       if (token[offset] == '#')
289         offset++;
290       i=(ssize_t) strlen(token);
291       (void) CopyMagickString(token,token+offset,MagickPathExtent);
292       token[i-offset-1]='\0';
293     }
294   while (isspace((int) ((unsigned char) *p)) != 0)
295     p++;
296   if (end != (const char **) NULL)
297     *end=(const char *) p;
298 }
299 
300 /*
301 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
302 %                                                                             %
303 %                                                                             %
304 %                                                                             %
305 %   G l o b E x p r e s s i o n                                               %
306 %                                                                             %
307 %                                                                             %
308 %                                                                             %
309 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
310 %
311 %  GlobExpression() returns MagickTrue if the expression matches the pattern.
312 %
313 %  The format of the GlobExpression function is:
314 %
315 %      MagickBooleanType GlobExpression(const char *expression,
316 %        const char *pattern,const MagickBooleanType case_insensitive)
317 %
318 %  A description of each parameter follows:
319 %
320 %    o expression: Specifies a pointer to a text string containing a file name.
321 %
322 %    o pattern: Specifies a pointer to a text string containing a pattern.
323 %
324 %    o case_insensitive: set to MagickTrue to ignore the case when matching
325 %      an expression.
326 %
327 */
GlobExpression(const char * expression,const char * pattern,const MagickBooleanType case_insensitive)328 MagickExport MagickBooleanType GlobExpression(const char *expression,
329   const char *pattern,const MagickBooleanType case_insensitive)
330 {
331   MagickBooleanType
332     done,
333     match;
334 
335   register const char
336     *p;
337 
338   /*
339     Return on empty pattern or '*'.
340   */
341   if (pattern == (char *) NULL)
342     return(MagickTrue);
343   if (GetUTFCode(pattern) == 0)
344     return(MagickTrue);
345   if (LocaleCompare(pattern,"*") == 0)
346     return(MagickTrue);
347   p=pattern+strlen(pattern)-1;
348   if ((GetUTFCode(p) == ']') && (strchr(pattern,'[') != (char *) NULL))
349     {
350       ExceptionInfo
351         *exception;
352 
353       ImageInfo
354         *image_info;
355 
356       /*
357         Determine if pattern is a scene, i.e. img0001.pcd[2].
358       */
359       image_info=AcquireImageInfo();
360       (void) CopyMagickString(image_info->filename,pattern,MagickPathExtent);
361       exception=AcquireExceptionInfo();
362       (void) SetImageInfo(image_info,0,exception);
363       exception=DestroyExceptionInfo(exception);
364       if (LocaleCompare(image_info->filename,pattern) != 0)
365         {
366           image_info=DestroyImageInfo(image_info);
367           return(MagickFalse);
368         }
369       image_info=DestroyImageInfo(image_info);
370     }
371   /*
372     Evaluate glob expression.
373   */
374   done=MagickFalse;
375   while ((GetUTFCode(pattern) != 0) && (done == MagickFalse))
376   {
377     if (GetUTFCode(expression) == 0)
378       if ((GetUTFCode(pattern) != '{') && (GetUTFCode(pattern) != '*'))
379         break;
380     switch (GetUTFCode(pattern))
381     {
382       case '*':
383       {
384         MagickBooleanType
385           status;
386 
387         status=MagickFalse;
388         pattern+=GetUTFOctets(pattern);
389         while ((GetUTFCode(expression) != 0) && (status == MagickFalse))
390         {
391           status=GlobExpression(expression,pattern,case_insensitive);
392           expression+=GetUTFOctets(expression);
393         }
394         if (status != MagickFalse)
395           {
396             while (GetUTFCode(expression) != 0)
397               expression+=GetUTFOctets(expression);
398             while (GetUTFCode(pattern) != 0)
399               pattern+=GetUTFOctets(pattern);
400           }
401         break;
402       }
403       case '[':
404       {
405         int
406           c;
407 
408         pattern+=GetUTFOctets(pattern);
409         for ( ; ; )
410         {
411           if ((GetUTFCode(pattern) == 0) || (GetUTFCode(pattern) == ']'))
412             {
413               done=MagickTrue;
414               break;
415             }
416           if (GetUTFCode(pattern) == '\\')
417             {
418               pattern+=GetUTFOctets(pattern);
419               if (GetUTFCode(pattern) == 0)
420                 {
421                   done=MagickTrue;
422                   break;
423                 }
424              }
425           if (GetUTFCode(pattern+GetUTFOctets(pattern)) == '-')
426             {
427               c=GetUTFCode(pattern);
428               pattern+=GetUTFOctets(pattern);
429               pattern+=GetUTFOctets(pattern);
430               if (GetUTFCode(pattern) == ']')
431                 {
432                   done=MagickTrue;
433                   break;
434                 }
435               if (GetUTFCode(pattern) == '\\')
436                 {
437                   pattern+=GetUTFOctets(pattern);
438                   if (GetUTFCode(pattern) == 0)
439                     {
440                       done=MagickTrue;
441                       break;
442                     }
443                 }
444               if ((GetUTFCode(expression) < c) ||
445                   (GetUTFCode(expression) > GetUTFCode(pattern)))
446                 {
447                   pattern+=GetUTFOctets(pattern);
448                   continue;
449                 }
450             }
451           else
452             if (GetUTFCode(pattern) != GetUTFCode(expression))
453               {
454                 pattern+=GetUTFOctets(pattern);
455                 continue;
456               }
457           pattern+=GetUTFOctets(pattern);
458           while ((GetUTFCode(pattern) != ']') && (GetUTFCode(pattern) != 0))
459           {
460             if ((GetUTFCode(pattern) == '\\') &&
461                 (GetUTFCode(pattern+GetUTFOctets(pattern)) > 0))
462               pattern+=GetUTFOctets(pattern);
463             pattern+=GetUTFOctets(pattern);
464           }
465           if (GetUTFCode(pattern) != 0)
466             {
467               pattern+=GetUTFOctets(pattern);
468               expression+=GetUTFOctets(expression);
469             }
470           break;
471         }
472         break;
473       }
474       case '?':
475       {
476         pattern+=GetUTFOctets(pattern);
477         expression+=GetUTFOctets(expression);
478         break;
479       }
480       case '{':
481       {
482         pattern+=GetUTFOctets(pattern);
483         while ((GetUTFCode(pattern) != '}') && (GetUTFCode(pattern) != 0))
484         {
485           p=expression;
486           match=MagickTrue;
487           while ((GetUTFCode(p) != 0) && (GetUTFCode(pattern) != 0) &&
488                  (GetUTFCode(pattern) != ',') && (GetUTFCode(pattern) != '}') &&
489                  (match != MagickFalse))
490           {
491             if (GetUTFCode(pattern) == '\\')
492               pattern+=GetUTFOctets(pattern);
493             match=(GetUTFCode(pattern) == GetUTFCode(p)) ? MagickTrue :
494               MagickFalse;
495             p+=GetUTFOctets(p);
496             pattern+=GetUTFOctets(pattern);
497           }
498           if (GetUTFCode(pattern) == 0)
499             {
500               match=MagickFalse;
501               done=MagickTrue;
502               break;
503             }
504           else
505             if (match != MagickFalse)
506               {
507                 expression=p;
508                 while ((GetUTFCode(pattern) != '}') &&
509                        (GetUTFCode(pattern) != 0))
510                 {
511                   pattern+=GetUTFOctets(pattern);
512                   if (GetUTFCode(pattern) == '\\')
513                     {
514                       pattern+=GetUTFOctets(pattern);
515                       if (GetUTFCode(pattern) == '}')
516                         pattern+=GetUTFOctets(pattern);
517                     }
518                 }
519               }
520             else
521               {
522                 while ((GetUTFCode(pattern) != '}') &&
523                        (GetUTFCode(pattern) != ',') &&
524                        (GetUTFCode(pattern) != 0))
525                 {
526                   pattern+=GetUTFOctets(pattern);
527                   if (GetUTFCode(pattern) == '\\')
528                     {
529                       pattern+=GetUTFOctets(pattern);
530                       if ((GetUTFCode(pattern) == '}') ||
531                           (GetUTFCode(pattern) == ','))
532                         pattern+=GetUTFOctets(pattern);
533                     }
534                 }
535               }
536             if (GetUTFCode(pattern) != 0)
537               pattern+=GetUTFOctets(pattern);
538           }
539         break;
540       }
541       case '\\':
542       {
543         pattern+=GetUTFOctets(pattern);
544         if (GetUTFCode(pattern) == 0)
545           break;
546       }
547       default:
548       {
549         if (case_insensitive != MagickFalse)
550           {
551             if (tolower((int) GetUTFCode(expression)) !=
552                 tolower((int) GetUTFCode(pattern)))
553               {
554                 done=MagickTrue;
555                 break;
556               }
557           }
558         else
559           if (GetUTFCode(expression) != GetUTFCode(pattern))
560             {
561               done=MagickTrue;
562               break;
563             }
564         expression+=GetUTFOctets(expression);
565         pattern+=GetUTFOctets(pattern);
566       }
567     }
568   }
569   while (GetUTFCode(pattern) == '*')
570     pattern+=GetUTFOctets(pattern);
571   match=(GetUTFCode(expression) == 0) && (GetUTFCode(pattern) == 0) ?
572     MagickTrue : MagickFalse;
573   return(match);
574 }
575 
576 /*
577 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
578 %                                                                             %
579 %                                                                             %
580 %                                                                             %
581 +     I s G l o b                                                             %
582 %                                                                             %
583 %                                                                             %
584 %                                                                             %
585 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
586 %
587 %  IsGlob() returns MagickTrue if the path specification contains a globbing
588 %  pattern.
589 %
590 %  The format of the IsGlob method is:
591 %
592 %      MagickBooleanType IsGlob(const char *geometry)
593 %
594 %  A description of each parameter follows:
595 %
596 %    o path: the path.
597 %
598 */
IsGlob(const char * path)599 MagickPrivate MagickBooleanType IsGlob(const char *path)
600 {
601   MagickBooleanType
602     status = MagickFalse;
603 
604   register const char
605     *p;
606 
607   if (IsPathAccessible(path) != MagickFalse)
608     return(MagickFalse);
609   for (p=path; *p != '\0'; p++)
610   {
611     switch (*p)
612     {
613       case '*':
614       case '?':
615       case '{':
616       case '}':
617       case '[':
618       case ']':
619       {
620         status=MagickTrue;
621         break;
622       }
623       default:
624         break;
625     }
626   }
627   return(status);
628 }
629 
630 /*
631 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
632 %                                                                             %
633 %                                                                             %
634 %                                                                             %
635 %   T o k e n i z e r                                                         %
636 %                                                                             %
637 %                                                                             %
638 %                                                                             %
639 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
640 %
641 %  Tokenizer() is a generalized, finite state token parser.  It extracts tokens
642 %  one at a time from a string of characters.  The characters used for white
643 %  space, for break characters, and for quotes can be specified.  Also,
644 %  characters in the string can be preceded by a specifiable escape character
645 %  which removes any special meaning the character may have.
646 %
647 %  Here is some terminology:
648 %
649 %    o token: A single unit of information in the form of a group of
650 %      characters.
651 %
652 %    o white space: Apace that gets ignored (except within quotes or when
653 %      escaped), like blanks and tabs. in addition, white space terminates a
654 %      non-quoted token.
655 %
656 %    o break set: One or more characters that separates non-quoted tokens.
657 %      Commas are a common break character. The usage of break characters to
658 %      signal the end of a token is the same as that of white space, except
659 %      multiple break characters with nothing or only white space between
660 %      generate a null token for each two break characters together.
661 %
662 %      For example, if blank is set to be the white space and comma is set to
663 %      be the break character, the line
664 %
665 %        A, B, C ,  , DEF
666 %
667 %        ... consists of 5 tokens:
668 %
669 %        1)  "A"
670 %        2)  "B"
671 %        3)  "C"
672 %        4)  "" (the null string)
673 %        5)  "DEF"
674 %
675 %    o Quote character: A character that, when surrounding a group of other
676 %      characters, causes the group of characters to be treated as a single
677 %      token, no matter how many white spaces or break characters exist in
678 %      the group. Also, a token always terminates after the closing quote.
679 %      For example, if ' is the quote character, blank is white space, and
680 %      comma is the break character, the following string
681 %
682 %        A, ' B, CD'EF GHI
683 %
684 %        ... consists of 4 tokens:
685 %
686 %        1)  "A"
687 %        2)  " B, CD" (note the blanks & comma)
688 %        3)  "EF"
689 %        4)  "GHI"
690 %
691 %      The quote characters themselves do not appear in the resultant
692 %      tokens.  The double quotes are delimiters i use here for
693 %      documentation purposes only.
694 %
695 %    o Escape character: A character which itself is ignored but which
696 %      causes the next character to be used as is.  ^ and \ are often used
697 %      as escape characters. An escape in the last position of the string
698 %      gets treated as a "normal" (i.e., non-quote, non-white, non-break,
699 %      and non-escape) character. For example, assume white space, break
700 %      character, and quote are the same as in the above examples, and
701 %      further, assume that ^ is the escape character. Then, in the string
702 %
703 %        ABC, ' DEF ^' GH' I ^ J K^ L ^
704 %
705 %        ... there are 7 tokens:
706 %
707 %        1)  "ABC"
708 %        2)  " DEF ' GH"
709 %        3)  "I"
710 %        4)  " "     (a lone blank)
711 %        5)  "J"
712 %        6)  "K L"
713 %        7)  "^"     (passed as is at end of line)
714 %
715 %  The format of the Tokenizer method is:
716 %
717 %      int Tokenizer(TokenInfo *token_info,const unsigned flag,char *token,
718 %        const size_t max_token_length,const char *line,const char *white,
719 %        const char *break_set,const char *quote,const char escape,
720 %        char *breaker,int *next,char *quoted)
721 %
722 %  A description of each parameter follows:
723 %
724 %    o flag: right now, only the low order 3 bits are used.
725 %
726 %        1 => convert non-quoted tokens to upper case
727 %        2 => convert non-quoted tokens to lower case
728 %        0 => do not convert non-quoted tokens
729 %
730 %    o token: a character string containing the returned next token
731 %
732 %    o max_token_length: the maximum size of "token".  Characters beyond
733 %      "max_token_length" are truncated.
734 %
735 %    o string: the string to be parsed.
736 %
737 %    o white: a string of the valid white spaces.  example:
738 %
739 %        char whitesp[]={" \t"};
740 %
741 %      blank and tab will be valid white space.
742 %
743 %    o break: a string of the valid break characters. example:
744 %
745 %        char breakch[]={";,"};
746 %
747 %      semicolon and comma will be valid break characters.
748 %
749 %    o quote: a string of the valid quote characters. An example would be
750 %
751 %        char whitesp[]={"'\"");
752 %
753 %      (this causes single and double quotes to be valid) Note that a
754 %      token starting with one of these characters needs the same quote
755 %      character to terminate it.
756 %
757 %      for example:
758 %
759 %        "ABC '
760 %
761 %      is unterminated, but
762 %
763 %        "DEF" and 'GHI'
764 %
765 %      are properly terminated.  Note that different quote characters
766 %      can appear on the same line; only for a given token do the quote
767 %      characters have to be the same.
768 %
769 %    o escape: the escape character (NOT a string ... only one
770 %      allowed). Use zero if none is desired.
771 %
772 %    o breaker: the break character used to terminate the current
773 %      token.  If the token was quoted, this will be the quote used.  If
774 %      the token is the last one on the line, this will be zero.
775 %
776 %    o next: this variable points to the first character of the
777 %      next token.  it gets reset by "tokenizer" as it steps through the
778 %      string.  Set it to 0 upon initialization, and leave it alone
779 %      after that.  You can change it if you want to jump around in the
780 %      string or re-parse from the beginning, but be careful.
781 %
782 %    o quoted: set to True if the token was quoted and MagickFalse
783 %      if not.  You may need this information (for example:  in C, a
784 %      string with quotes around it is a character string, while one
785 %      without is an identifier).
786 %
787 %    o result: 0 if we haven't reached EOS (end of string), and 1
788 %      if we have.
789 %
790 */
791 
792 #define IN_WHITE 0
793 #define IN_TOKEN 1
794 #define IN_QUOTE 2
795 #define IN_OZONE 3
796 
sindex(int c,const char * string)797 static ssize_t sindex(int c,const char *string)
798 {
799   register const char
800     *p;
801 
802   for (p=string; *p != '\0'; p++)
803     if (c == (int) (*p))
804       return((ssize_t) (p-string));
805   return(-1);
806 }
807 
StoreToken(TokenInfo * token_info,char * string,size_t max_token_length,int c)808 static void StoreToken(TokenInfo *token_info,char *string,
809   size_t max_token_length,int c)
810 {
811   register ssize_t
812     i;
813 
814   if ((token_info->offset < 0) ||
815       ((size_t) token_info->offset >= (max_token_length-1)))
816     return;
817   i=token_info->offset++;
818   string[i]=(char) c;
819   if (token_info->state == IN_QUOTE)
820     return;
821   switch (token_info->flag & 0x03)
822   {
823     case 1:
824     {
825       string[i]=(char) toupper(c);
826       break;
827     }
828     case 2:
829     {
830       string[i]=(char) tolower(c);
831       break;
832     }
833     default:
834       break;
835   }
836 }
837 
Tokenizer(TokenInfo * token_info,const unsigned flag,char * token,const size_t max_token_length,const char * line,const char * white,const char * break_set,const char * quote,const char escape,char * breaker,int * next,char * quoted)838 MagickExport int Tokenizer(TokenInfo *token_info,const unsigned flag,
839   char *token,const size_t max_token_length,const char *line,const char *white,
840   const char *break_set,const char *quote,const char escape,char *breaker,
841   int *next,char *quoted)
842 {
843   int
844     c;
845 
846   register ssize_t
847     i;
848 
849   *breaker='\0';
850   *quoted='\0';
851   if (line[*next] == '\0')
852     return(1);
853   token_info->state=IN_WHITE;
854   token_info->quote=(char) MagickFalse;
855   token_info->flag=flag;
856   for (token_info->offset=0; (int) line[*next] != 0; (*next)++)
857   {
858     c=(int) line[*next];
859     i=sindex(c,break_set);
860     if (i >= 0)
861       {
862         switch (token_info->state)
863         {
864           case IN_WHITE:
865           case IN_TOKEN:
866           case IN_OZONE:
867           {
868             (*next)++;
869             *breaker=break_set[i];
870             token[token_info->offset]='\0';
871             return(0);
872           }
873           case IN_QUOTE:
874           {
875             StoreToken(token_info,token,max_token_length,c);
876             break;
877           }
878         }
879         continue;
880       }
881     i=sindex(c,quote);
882     if (i >= 0)
883       {
884         switch (token_info->state)
885         {
886           case IN_WHITE:
887           {
888             token_info->state=IN_QUOTE;
889             token_info->quote=quote[i];
890             *quoted=(char) MagickTrue;
891             break;
892           }
893           case IN_QUOTE:
894           {
895             if (quote[i] != token_info->quote)
896               StoreToken(token_info,token,max_token_length,c);
897             else
898               {
899                 token_info->state=IN_OZONE;
900                 token_info->quote='\0';
901               }
902             break;
903           }
904           case IN_TOKEN:
905           case IN_OZONE:
906           {
907             *breaker=(char) c;
908             token[token_info->offset]='\0';
909             return(0);
910           }
911         }
912         continue;
913       }
914     i=sindex(c,white);
915     if (i >= 0)
916       {
917         switch (token_info->state)
918         {
919           case IN_WHITE:
920           case IN_OZONE:
921             break;
922           case IN_TOKEN:
923           {
924             token_info->state=IN_OZONE;
925             break;
926           }
927           case IN_QUOTE:
928           {
929             StoreToken(token_info,token,max_token_length,c);
930             break;
931           }
932         }
933         continue;
934       }
935     if (c == (int) escape)
936       {
937         if (line[(*next)+1] == '\0')
938           {
939             *breaker='\0';
940             StoreToken(token_info,token,max_token_length,c);
941             (*next)++;
942             token[token_info->offset]='\0';
943             return(0);
944           }
945         switch (token_info->state)
946         {
947           case IN_WHITE:
948           {
949             (*next)--;
950             token_info->state=IN_TOKEN;
951             break;
952           }
953           case IN_TOKEN:
954           case IN_QUOTE:
955           {
956             (*next)++;
957             c=(int) line[*next];
958             StoreToken(token_info,token,max_token_length,c);
959             break;
960           }
961           case IN_OZONE:
962           {
963             token[token_info->offset]='\0';
964             return(0);
965           }
966         }
967         continue;
968       }
969     switch (token_info->state)
970     {
971       case IN_WHITE:
972       {
973         token_info->state=IN_TOKEN;
974         StoreToken(token_info,token,max_token_length,c);
975         break;
976       }
977       case IN_TOKEN:
978       case IN_QUOTE:
979       {
980         StoreToken(token_info,token,max_token_length,c);
981         break;
982       }
983       case IN_OZONE:
984       {
985         token[token_info->offset]='\0';
986         return(0);
987       }
988     }
989   }
990   token[token_info->offset]='\0';
991   return(0);
992 }
993