1 /*
2   Copyright 1999-2019 ImageMagick Studio LLC, a non-profit organization
3   dedicated to making software imaging solutions freely available.
4 
5   You may not use this file except in compliance with the License.  You may
6   obtain a copy of the License at
7 
8     https://imagemagick.org/script/license.php
9 
10   Unless required by applicable law or agreed to in writing, software
11   distributed under the License is distributed on an "AS IS" BASIS,
12   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   See the License for the specific language governing permissions and
14   limitations under the License.
15 
16   MagickCore private token methods.
17 */
18 #ifndef MAGICKCORE_TOKEN_PRIVATE_H
19 #define MAGICKCORE_TOKEN_PRIVATE_H
20 
21 #if defined(__cplusplus) || defined(c_plusplus)
22 extern "C" {
23 #endif
24 
25 #ifndef EILSEQ
26   #define EILSEQ  ENOENT
27 #endif
28 
29 #define MaxMultibyteCodes  6
30 
31 extern MagickPrivate MagickBooleanType
32   IsGlob(const char *) magick_attribute((__pure__));
33 
34 typedef struct
35 {
36   int
37     code_mask,
38     code_value,
39     utf_mask,
40     utf_value;
41 } UTFInfo;
42 
43 static UTFInfo
44   utf_info[MaxMultibyteCodes] =
45   {
46     { 0x80, 0x00, 0x000007f, 0x0000000 },  /* 1 byte sequence */
47     { 0xE0, 0xC0, 0x00007ff, 0x0000080 },  /* 2 byte sequence */
48     { 0xF0, 0xE0, 0x000ffff, 0x0000800 },  /* 3 byte sequence */
49     { 0xF8, 0xF0, 0x01fffff, 0x0010000 },  /* 4 byte sequence */
50     { 0xFC, 0xF8, 0x03fffff, 0x0200000 },  /* 5 byte sequence */
51     { 0xFE, 0xFC, 0x7ffffff, 0x4000000 },  /* 6 byte sequence */
52   };
53 
ConvertLatin1ToUTF8(const unsigned char * content)54 static inline unsigned char *ConvertLatin1ToUTF8(const unsigned char *content)
55 {
56   int
57     c;
58 
59   register const unsigned char
60     *p;
61 
62   register unsigned char
63     *q;
64 
65   size_t
66     length;
67 
68   unsigned char
69     *utf8;
70 
71   length=0;
72   for (p=content; *p != '\0'; p++)
73     length+=(*p & 0x80) != 0 ? 2 : 1;
74   utf8=(unsigned char *) NULL;
75   if (~length >= 1)
76     utf8=(unsigned char *) AcquireQuantumMemory(length+1UL,sizeof(*utf8));
77   if (utf8 == (unsigned char *) NULL)
78     return((unsigned char *) NULL);
79   q=utf8;
80   for (p=content; *p != '\0'; p++)
81   {
82     c=(*p);
83     if ((c & 0x80) == 0)
84       *q++=(unsigned char) c;
85     else
86       {
87         *q++=(unsigned char) (0xc0 | ((c >> 6) & 0x3f));
88         *q++=(unsigned char) (0x80 | (c & 0x3f));
89       }
90   }
91   *q='\0';
92   return(utf8);
93 }
94 
GetNextUTFCode(const char * text,unsigned int * octets)95 static inline int GetNextUTFCode(const char *text,unsigned int *octets)
96 {
97   int
98     code;
99 
100   register ssize_t
101     i;
102 
103   register int
104     c,
105     unicode;
106 
107   *octets=1;
108   if (text == (const char *) NULL)
109     {
110       errno=EINVAL;
111       return(-1);
112     }
113   code=(int) (*text++) & 0xff;
114   unicode=code;
115   for (i=0; i < MaxMultibyteCodes; i++)
116   {
117     if ((code & utf_info[i].code_mask) == utf_info[i].code_value)
118       {
119         unicode&=utf_info[i].utf_mask;
120         if (unicode < utf_info[i].utf_value)
121           break;
122         *octets=(unsigned int) (i+1);
123         return(unicode);
124       }
125     c=(int) (*text++ ^ 0x80) & 0xff;
126     if ((c & 0xc0) != 0)
127       break;
128     if (unicode > 0x10FFFF)
129       break;
130     unicode=(unicode << 6) | c;
131   }
132   errno=EILSEQ;
133   return(-1);
134 }
135 
GetUTFCode(const char * text)136 static inline int GetUTFCode(const char *text)
137 {
138   unsigned int
139     octets;
140 
141   return(GetNextUTFCode(text,&octets));
142 }
143 
GetUTFOctets(const char * text)144 static inline unsigned int GetUTFOctets(const char *text)
145 {
146   unsigned int
147     octets;
148 
149   (void) GetNextUTFCode(text,&octets);
150   return(octets);
151 }
152 
IsUTFSpace(int code)153 static inline MagickBooleanType IsUTFSpace(int code)
154 {
155   if (((code >= 0x0009) && (code <= 0x000d)) || (code == 0x0020) ||
156       (code == 0x0085) || (code == 0x00a0) || (code == 0x1680) ||
157       (code == 0x180e) || ((code >= 0x2000) && (code <= 0x200a)) ||
158       (code == 0x2028) || (code == 0x2029) || (code == 0x202f) ||
159       (code == 0x205f) || (code == 0x3000))
160     return(MagickTrue);
161   return(MagickFalse);
162 }
163 
IsUTFValid(int code)164 static inline MagickBooleanType IsUTFValid(int code)
165 {
166   int
167     mask;
168 
169   mask=(int) 0x7fffffff;
170   if (((code & ~mask) != 0) && ((code < 0xd800) || (code > 0xdfff)) &&
171       (code != 0xfffe) && (code != 0xffff))
172     return(MagickFalse);
173   return(MagickTrue);
174 }
175 
IsUTFAscii(int code)176 static inline MagickBooleanType IsUTFAscii(int code)
177 {
178   int
179     mask;
180 
181   mask=(int) 0x7f;
182   if ((code & ~mask) != 0)
183     return(MagickFalse);
184   return(MagickTrue);
185 }
186 
187 #if defined(__cplusplus) || defined(c_plusplus)
188 }
189 #endif
190 
191 #endif
192