1 /*
2   Copyright 1999-2021 ImageMagick Studio LLC, a non-profit organization
3   dedicated to making software imaging solutions freely available.
4 
5   You may not use this file except in compliance with the License.  You may
6   obtain a copy of the License at
7 
8     https://imagemagick.org/script/license.php
9 
10   Unless required by applicable law or agreed to in writing, software
11   distributed under the License is distributed on an "AS IS" BASIS,
12   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   See the License for the specific language governing permissions and
14   limitations under the License.
15 
16   MagickCore private token methods.
17 */
18 #ifndef MAGICKCORE_TOKEN_PRIVATE_H
19 #define MAGICKCORE_TOKEN_PRIVATE_H
20 
21 #if defined(__cplusplus) || defined(c_plusplus)
22 extern "C" {
23 #endif
24 
25 #ifndef EILSEQ
26   #define EILSEQ  ENOENT
27 #endif
28 
29 #define MaxMultibyteCodes  6
30 
31 extern MagickPrivate MagickBooleanType
32   IsGlob(const char *) magick_attribute((__pure__));
33 
34 typedef struct
35 {
36   int
37     code_mask,
38     code_value,
39     utf_mask,
40     utf_value;
41 } UTFInfo;
42 
43 static UTFInfo
44   utf_info[MaxMultibyteCodes] =
45   {
46     { 0x80, 0x00, 0x000007f, 0x0000000 },  /* 1 byte sequence */
47     { 0xE0, 0xC0, 0x00007ff, 0x0000080 },  /* 2 byte sequence */
48     { 0xF0, 0xE0, 0x000ffff, 0x0000800 },  /* 3 byte sequence */
49     { 0xF8, 0xF0, 0x01fffff, 0x0010000 },  /* 4 byte sequence */
50     { 0xFC, 0xF8, 0x03fffff, 0x0200000 },  /* 5 byte sequence */
51     { 0xFE, 0xFC, 0x7ffffff, 0x4000000 },  /* 6 byte sequence */
52   };
53 
ConvertLatin1ToUTF8(const unsigned char * magick_restrict content)54 static inline unsigned char *ConvertLatin1ToUTF8(
55   const unsigned char *magick_restrict content)
56 {
57   int
58     c;
59 
60   const unsigned char
61     *magick_restrict p;
62 
63   unsigned char
64     *magick_restrict q;
65 
66   size_t
67     length;
68 
69   unsigned char
70     *utf8;
71 
72   length=0;
73   for (p=content; *p != '\0'; p++)
74     length+=(*p & 0x80) != 0 ? 2 : 1;
75   utf8=(unsigned char *) NULL;
76   if (~length >= 1)
77     utf8=(unsigned char *) AcquireQuantumMemory(length+1UL,sizeof(*utf8));
78   if (utf8 == (unsigned char *) NULL)
79     return((unsigned char *) NULL);
80   q=utf8;
81   for (p=content; *p != '\0'; p++)
82   {
83     c=(*p);
84     if ((c & 0x80) == 0)
85       *q++=(unsigned char) c;
86     else
87       {
88         *q++=(unsigned char) (0xc0 | ((c >> 6) & 0x3f));
89         *q++=(unsigned char) (0x80 | (c & 0x3f));
90       }
91   }
92   *q='\0';
93   return(utf8);
94 }
95 
GetNextUTFCode(const char * magick_restrict text,unsigned int * magick_restrict octets)96 static inline int GetNextUTFCode(const char *magick_restrict text,
97   unsigned int *magick_restrict octets)
98 {
99   int
100     code;
101 
102   ssize_t
103     i;
104 
105   int
106     c,
107     unicode;
108 
109   *octets=1;
110   if (text == (const char *) NULL)
111     {
112       errno=EINVAL;
113       return(-1);
114     }
115   code=(int) (*text++) & 0xff;
116   unicode=code;
117   for (i=0; i < MaxMultibyteCodes; i++)
118   {
119     if ((code & utf_info[i].code_mask) == utf_info[i].code_value)
120       {
121         unicode&=utf_info[i].utf_mask;
122         if (unicode < utf_info[i].utf_value)
123           break;
124         *octets=(unsigned int) (i+1);
125         return(unicode);
126       }
127     c=(int) (*text++ ^ 0x80) & 0xff;
128     if ((c & 0xc0) != 0)
129       break;
130     if (unicode > 0x10FFFF)
131       break;
132     unicode=(unicode << 6) | c;
133   }
134   errno=EILSEQ;
135   return(-1);
136 }
137 
GetUTFCode(const char * magick_restrict text)138 static inline int GetUTFCode(const char *magick_restrict text)
139 {
140   unsigned int
141     octets;
142 
143   return(GetNextUTFCode(text,&octets));
144 }
145 
GetUTFOctets(const char * magick_restrict text)146 static inline unsigned int GetUTFOctets(const char *magick_restrict text)
147 {
148   unsigned int
149     octets;
150 
151   (void) GetNextUTFCode(text,&octets);
152   return(octets);
153 }
154 
IsUTFSpace(int code)155 static inline MagickBooleanType IsUTFSpace(int code)
156 {
157   if (((code >= 0x0009) && (code <= 0x000d)) || (code == 0x0020) ||
158       (code == 0x0085) || (code == 0x00a0) || (code == 0x1680) ||
159       (code == 0x180e) || ((code >= 0x2000) && (code <= 0x200a)) ||
160       (code == 0x2028) || (code == 0x2029) || (code == 0x202f) ||
161       (code == 0x205f) || (code == 0x3000))
162     return(MagickTrue);
163   return(MagickFalse);
164 }
165 
IsUTFValid(int code)166 static inline MagickBooleanType IsUTFValid(int code)
167 {
168   int
169     mask;
170 
171   mask=(int) 0x7fffffff;
172   if (((code & ~mask) != 0) && ((code < 0xd800) || (code > 0xdfff)) &&
173       (code != 0xfffe) && (code != 0xffff))
174     return(MagickFalse);
175   return(MagickTrue);
176 }
177 
IsUTFAscii(int code)178 static inline MagickBooleanType IsUTFAscii(int code)
179 {
180   int
181     mask;
182 
183   mask=(int) 0x7f;
184   if ((code & ~mask) != 0)
185     return(MagickFalse);
186   return(MagickTrue);
187 }
188 
189 #if defined(__cplusplus) || defined(c_plusplus)
190 }
191 #endif
192 
193 #endif
194