1 /**********************************************************************
2   regenc.c -  Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5  * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
6  * All rights reserved.
7  *
8  * (C) Copyright 2015 Hewlett Packard Enterprise Development LP<BR>
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include "regint.h"
33 
34 OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
35 
36 extern int
onigenc_init(void)37 onigenc_init(void)
38 {
39   return 0;
40 }
41 
42 extern OnigEncoding
onigenc_get_default_encoding(void)43 onigenc_get_default_encoding(void)
44 {
45   return OnigEncDefaultCharEncoding;
46 }
47 
48 extern int
onigenc_set_default_encoding(OnigEncoding enc)49 onigenc_set_default_encoding(OnigEncoding enc)
50 {
51   OnigEncDefaultCharEncoding = enc;
52   return 0;
53 }
54 
55 extern UChar*
onigenc_get_right_adjust_char_head(OnigEncoding enc,const UChar * start,const UChar * s)56 onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
57 {
58   UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
59   if (p < s) {
60     p += enclen(enc, p);
61   }
62   return p;
63 }
64 
65 extern UChar*
onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,const UChar * start,const UChar * s,const UChar ** prev)66 onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
67 				   const UChar* start, const UChar* s, const UChar** prev)
68 {
69   UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
70 
71   if (p < s) {
72     if (prev) *prev = (const UChar* )p;
73     p += enclen(enc, p);
74   }
75   else {
76     if (prev) *prev = (const UChar* )NULL; /* Sorry */
77   }
78   return p;
79 }
80 
81 extern UChar*
onigenc_get_prev_char_head(OnigEncoding enc,const UChar * start,const UChar * s)82 onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
83 {
84   if (s <= start)
85     return (UChar* )NULL;
86 
87   return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
88 }
89 
90 extern UChar*
onigenc_step_back(OnigEncoding enc,const UChar * start,const UChar * s,int n)91 onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, int n)
92 {
93   while (ONIG_IS_NOT_NULL(s) && n-- > 0) {
94     if (s <= start)
95       return (UChar* )NULL;
96 
97     s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1);
98   }
99   return (UChar* )s;
100 }
101 
102 extern UChar*
onigenc_step(OnigEncoding enc,const UChar * p,const UChar * end,int n)103 onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n)
104 {
105   UChar* q = (UChar* )p;
106   while (n-- > 0) {
107     q += ONIGENC_MBC_ENC_LEN(enc, q);
108   }
109   return (q <= end ? q : NULL);
110 }
111 
112 extern int
onigenc_strlen(OnigEncoding enc,const UChar * p,const UChar * end)113 onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)
114 {
115   int n = 0;
116   UChar* q = (UChar* )p;
117 
118   while (q < end) {
119     q += ONIGENC_MBC_ENC_LEN(enc, q);
120     n++;
121   }
122   return n;
123 }
124 
125 extern int
onigenc_strlen_null(OnigEncoding enc,const UChar * s)126 onigenc_strlen_null(OnigEncoding enc, const UChar* s)
127 {
128   int n = 0;
129   UChar* p = (UChar* )s;
130 
131   while (1) {
132     if (*p == '\0') {
133       UChar* q;
134       int len = ONIGENC_MBC_MINLEN(enc);
135 
136       if (len == 1) return n;
137       q = p + 1;
138       while (len > 1) {
139         if (*q != '\0') break;
140         q++;
141         len--;
142       }
143       if (len == 1) return n;
144     }
145     p += ONIGENC_MBC_ENC_LEN(enc, p);
146     n++;
147   }
148 }
149 
150 extern int
onigenc_str_bytelen_null(OnigEncoding enc,const UChar * s)151 onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
152 {
153   UChar* start = (UChar* )s;
154   UChar* p = (UChar* )s;
155 
156   while (1) {
157     if (*p == '\0') {
158       UChar* q;
159       int len = ONIGENC_MBC_MINLEN(enc);
160 
161       if (len == 1) return (int )(p - start);
162       q = p + 1;
163       while (len > 1) {
164         if (*q != '\0') break;
165         q++;
166         len--;
167       }
168       if (len == 1) return (int )(p - start);
169     }
170     p += ONIGENC_MBC_ENC_LEN(enc, p);
171   }
172 }
173 
174 const UChar OnigEncAsciiToLowerCaseTable[] = {
175   0000, 0001, 0002, 0003, 0004, 0005, 0006, 0007,
176   0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
177   0020, 0021, 0022, 0023, 0024, 0025, 0026, 0027,
178   0030, 0031, 0032, 0033, 0034, 0035, 0036, 0037,
179   0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
180   0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
181   0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
182   0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
183   0100, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
184   0150, 0151, 0152, 0153, 0154, 0155, 0156, 0157,
185   0160, 0161, 0162, 0163, 0164, 0165, 0166, 0167,
186   0170, 0171, 0172, 0133, 0134, 0135, 0136, 0137,
187   0140, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
188   0150, 0151, 0152, 0153, 0154, 0155, 0156, 0157,
189   0160, 0161, 0162, 0163, 0164, 0165, 0166, 0167,
190   0170, 0171, 0172, 0173, 0174, 0175, 0176, 0177,
191   0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
192   0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
193   0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
194   0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
195   0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
196   0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
197   0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
198   0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
199   0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
200   0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
201   0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
202   0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
203   0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
204   0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
205   0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
206   0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377,
207 };
208 
209 #ifdef USE_UPPER_CASE_TABLE
210 const UChar OnigEncAsciiToUpperCaseTable[256] = {
211   0000, 0001, 0002, 0003, 0004, 0005, 0006, 0007,
212   0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
213   0020, 0021, 0022, 0023, 0024, 0025, 0026, 0027,
214   0030, 0031, 0032, 0033, 0034, 0035, 0036, 0037,
215   0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
216   0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
217   0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
218   0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
219   0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
220   0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
221   0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
222   0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
223   0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
224   0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
225   0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
226   0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
227   0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
228   0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
229   0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
230   0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
231   0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
232   0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
233   0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
234   0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
235   0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
236   0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
237   0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
238   0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
239   0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
240   0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
241   0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
242   0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377,
243 };
244 #endif
245 
246 const unsigned short OnigEncAsciiCtypeTable[256] = {
247   0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
248   0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
249   0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
250   0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
251   0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
252   0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
253   0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
254   0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
255   0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
256   0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
257   0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
258   0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
259   0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
260   0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
261   0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
262   0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
263   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
264   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
265   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
266   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
267   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
268   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
269   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
270   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
271   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
272   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
273   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
274   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
275   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
276   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
277   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
278   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
279 };
280 
281 const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
282   0000, 0001, 0002, 0003, 0004, 0005, 0006, 0007,
283   0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
284   0020, 0021, 0022, 0023, 0024, 0025, 0026, 0027,
285   0030, 0031, 0032, 0033, 0034, 0035, 0036, 0037,
286   0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
287   0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
288   0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
289   0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
290   0100, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
291   0150, 0151, 0152, 0153, 0154, 0155, 0156, 0157,
292   0160, 0161, 0162, 0163, 0164, 0165, 0166, 0167,
293   0170, 0171, 0172, 0133, 0134, 0135, 0136, 0137,
294   0140, 0141, 0142, 0143, 0144, 0145, 0146, 0147,
295   0150, 0151, 0152, 0153, 0154, 0155, 0156, 0157,
296   0160, 0161, 0162, 0163, 0164, 0165, 0166, 0167,
297   0170, 0171, 0172, 0173, 0174, 0175, 0176, 0177,
298   0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
299   0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
300   0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
301   0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
302   0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
303   0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
304   0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
305   0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
306   0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
307   0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
308   0360, 0361, 0362, 0363, 0364, 0365, 0366, 0327,
309   0370, 0371, 0372, 0373, 0374, 0375, 0376, 0337,
310   0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
311   0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
312   0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
313   0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
314 };
315 
316 #ifdef USE_UPPER_CASE_TABLE
317 const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
318   0000, 0001, 0002, 0003, 0004, 0005, 0006, 0007,
319   0010, 0011, 0012, 0013, 0014, 0015, 0016, 0017,
320   0020, 0021, 0022, 0023, 0024, 0025, 0026, 0027,
321   0030, 0031, 0032, 0033, 0034, 0035, 0036, 0037,
322   0040, 0041, 0042, 0043, 0044, 0045, 0046, 0047,
323   0050, 0051, 0052, 0053, 0054, 0055, 0056, 0057,
324   0060, 0061, 0062, 0063, 0064, 0065, 0066, 0067,
325   0070, 0071, 0072, 0073, 0074, 0075, 0076, 0077,
326   0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
327   0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
328   0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
329   0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
330   0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
331   0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
332   0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
333   0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
334   0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
335   0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
336   0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
337   0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
338   0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
339   0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
340   0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
341   0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
342   0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
343   0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
344   0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
345   0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
346   0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
347   0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
348   0320, 0321, 0322, 0323, 0324, 0325, 0326, 0367,
349   0330, 0331, 0332, 0333, 0334, 0335, 0336, 0377,
350 };
351 #endif
352 
353 extern void
onigenc_set_default_caseconv_table(const UChar * table ARG_UNUSED)354 onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED)
355 {
356   /* nothing */
357   /* obsoleted. */
358 }
359 
360 extern UChar*
onigenc_get_left_adjust_char_head(OnigEncoding enc,const UChar * start,const UChar * s)361 onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s)
362 {
363   return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
364 }
365 
366 const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = {
367   { 0x41, 0x61 },
368   { 0x42, 0x62 },
369   { 0x43, 0x63 },
370   { 0x44, 0x64 },
371   { 0x45, 0x65 },
372   { 0x46, 0x66 },
373   { 0x47, 0x67 },
374   { 0x48, 0x68 },
375   { 0x49, 0x69 },
376   { 0x4a, 0x6a },
377   { 0x4b, 0x6b },
378   { 0x4c, 0x6c },
379   { 0x4d, 0x6d },
380   { 0x4e, 0x6e },
381   { 0x4f, 0x6f },
382   { 0x50, 0x70 },
383   { 0x51, 0x71 },
384   { 0x52, 0x72 },
385   { 0x53, 0x73 },
386   { 0x54, 0x74 },
387   { 0x55, 0x75 },
388   { 0x56, 0x76 },
389   { 0x57, 0x77 },
390   { 0x58, 0x78 },
391   { 0x59, 0x79 },
392   { 0x5a, 0x7a }
393 };
394 
395 extern int
onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,OnigApplyAllCaseFoldFunc f,void * arg)396 onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
397 				  OnigApplyAllCaseFoldFunc f, void* arg)
398 {
399   OnigCodePoint code;
400   int i, r;
401 
402   for (i = 0;
403        i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes));
404        i++) {
405     code = OnigAsciiLowerMap[i].to;
406     r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg);
407     if (r != 0) return r;
408 
409     code = OnigAsciiLowerMap[i].from;
410     r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg);
411     if (r != 0) return r;
412   }
413 
414   return 0;
415 }
416 
417 extern int
onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,const OnigUChar * p,const OnigUChar * end ARG_UNUSED,OnigCaseFoldCodeItem items[])418 onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,
419 	 const OnigUChar* p, const OnigUChar* end ARG_UNUSED,
420 	 OnigCaseFoldCodeItem items[])
421 {
422   if (0x41 <= *p && *p <= 0x5a) {
423     items[0].byte_len = 1;
424     items[0].code_len = 1;
425     items[0].code[0] = (OnigCodePoint )(*p + 0x20);
426     return 1;
427   }
428   else if (0x61 <= *p && *p <= 0x7a) {
429     items[0].byte_len = 1;
430     items[0].code_len = 1;
431     items[0].code[0] = (OnigCodePoint )(*p - 0x20);
432     return 1;
433   }
434   else
435     return 0;
436 }
437 
438 static int
ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,OnigApplyAllCaseFoldFunc f,void * arg)439 ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
440 		       OnigApplyAllCaseFoldFunc f, void* arg)
441 {
442   static OnigCodePoint ss[] = { 0x73, 0x73 };
443 
444   return (*f)((OnigCodePoint )0xdf, ss, 2, arg);
445 }
446 
447 extern int
onigenc_apply_all_case_fold_with_map(int map_size,const OnigPairCaseFoldCodes map[],int ess_tsett_flag,OnigCaseFoldType flag,OnigApplyAllCaseFoldFunc f,void * arg)448 onigenc_apply_all_case_fold_with_map(int map_size,
449     const OnigPairCaseFoldCodes map[],
450     int ess_tsett_flag, OnigCaseFoldType flag,
451     OnigApplyAllCaseFoldFunc f, void* arg)
452 {
453   OnigCodePoint code;
454   int i, r;
455 
456   r = onigenc_ascii_apply_all_case_fold(flag, f, arg);
457   if (r != 0) return r;
458 
459   for (i = 0; i < map_size; i++) {
460     code = map[i].to;
461     r = (*f)(map[i].from, &code, 1, arg);
462     if (r != 0) return r;
463 
464     code = map[i].from;
465     r = (*f)(map[i].to, &code, 1, arg);
466     if (r != 0) return r;
467   }
468 
469   if (ess_tsett_flag != 0)
470     return ss_apply_all_case_fold(flag, f, arg);
471 
472   return 0;
473 }
474 
475 extern int
onigenc_get_case_fold_codes_by_str_with_map(int map_size,const OnigPairCaseFoldCodes map[],int ess_tsett_flag,OnigCaseFoldType flag ARG_UNUSED,const OnigUChar * p,const OnigUChar * end,OnigCaseFoldCodeItem items[])476 onigenc_get_case_fold_codes_by_str_with_map(int map_size,
477     const OnigPairCaseFoldCodes map[],
478     int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED,
479     const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
480 {
481   if (0x41 <= *p && *p <= 0x5a) {
482     items[0].byte_len = 1;
483     items[0].code_len = 1;
484     items[0].code[0] = (OnigCodePoint )(*p + 0x20);
485     if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1
486 	&& (*(p+1) == 0x53 || *(p+1) == 0x73)) {
487       /* SS */
488       items[1].byte_len = 2;
489       items[1].code_len = 1;
490       items[1].code[0] = (OnigCodePoint )0xdf;
491       return 2;
492     }
493     else
494       return 1;
495   }
496   else if (0x61 <= *p && *p <= 0x7a) {
497     items[0].byte_len = 1;
498     items[0].code_len = 1;
499     items[0].code[0] = (OnigCodePoint )(*p - 0x20);
500     if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1
501 	&& (*(p+1) == 0x73 || *(p+1) == 0x53)) {
502       /* ss */
503       items[1].byte_len = 2;
504       items[1].code_len = 1;
505       items[1].code[0] = (OnigCodePoint )0xdf;
506       return 2;
507     }
508     else
509       return 1;
510   }
511   else if (*p == 0xdf && ess_tsett_flag != 0) {
512     items[0].byte_len = 1;
513     items[0].code_len = 2;
514     items[0].code[0] = (OnigCodePoint )'s';
515     items[0].code[1] = (OnigCodePoint )'s';
516 
517     items[1].byte_len = 1;
518     items[1].code_len = 2;
519     items[1].code[0] = (OnigCodePoint )'S';
520     items[1].code[1] = (OnigCodePoint )'S';
521 
522     items[2].byte_len = 1;
523     items[2].code_len = 2;
524     items[2].code[0] = (OnigCodePoint )'s';
525     items[2].code[1] = (OnigCodePoint )'S';
526 
527     items[3].byte_len = 1;
528     items[3].code_len = 2;
529     items[3].code[0] = (OnigCodePoint )'S';
530     items[3].code[1] = (OnigCodePoint )'s';
531 
532     return 4;
533   }
534   else {
535     int i;
536 
537     for (i = 0; i < map_size; i++) {
538       if (*p == map[i].from) {
539 	items[0].byte_len = 1;
540 	items[0].code_len = 1;
541 	items[0].code[0] = map[i].to;
542 	return 1;
543       }
544       else if (*p == map[i].to) {
545 	items[0].byte_len = 1;
546 	items[0].code_len = 1;
547 	items[0].code[0] = map[i].from;
548 	return 1;
549       }
550     }
551   }
552 
553   return 0;
554 }
555 
556 
557 extern int
onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED,OnigCodePoint * sb_out ARG_UNUSED,const OnigCodePoint * ranges[]ARG_UNUSED)558 onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED,
559 	 OnigCodePoint* sb_out ARG_UNUSED,
560 	 const OnigCodePoint* ranges[] ARG_UNUSED)
561 {
562   return ONIG_NO_SUPPORT_CONFIG;
563 }
564 
565 extern int
onigenc_is_mbc_newline_0x0a(const UChar * p,const UChar * end)566 onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end)
567 {
568   if (p < end) {
569     if (*p == 0x0a) return 1;
570   }
571   return 0;
572 }
573 
574 /* for single byte encodings */
575 extern int
onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,const UChar ** p,const UChar * end ARG_UNUSED,UChar * lower)576 onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p,
577 	    const UChar*end ARG_UNUSED, UChar* lower)
578 {
579   *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
580 
581   (*p)++;
582   return 1; /* return byte length of converted char to lower */
583 }
584 
585 #if 0
586 extern int
587 onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag,
588 			       const UChar** pp, const UChar* end)
589 {
590   const UChar* p = *pp;
591 
592   (*pp)++;
593   return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
594 }
595 #endif
596 
597 extern int
onigenc_single_byte_mbc_enc_len(const UChar * p ARG_UNUSED)598 onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED)
599 {
600   return 1;
601 }
602 
603 extern OnigCodePoint
onigenc_single_byte_mbc_to_code(const UChar * p,const UChar * end ARG_UNUSED)604 onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
605 {
606   return (OnigCodePoint )(*p);
607 }
608 
609 extern int
onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED)610 onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED)
611 {
612   return (code < 0x100 ? 1 : ONIGERR_INVALID_CODE_POINT_VALUE);
613 }
614 
615 extern int
onigenc_single_byte_code_to_mbc(OnigCodePoint code,UChar * buf)616 onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf)
617 {
618   *buf = (UChar )(code & 0xff);
619   return 1;
620 }
621 
622 extern UChar*
onigenc_single_byte_left_adjust_char_head(const UChar * start ARG_UNUSED,const UChar * s)623 onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED,
624 					  const UChar* s)
625 {
626   return (UChar* )s;
627 }
628 
629 extern int
onigenc_always_true_is_allowed_reverse_match(const UChar * s ARG_UNUSED,const UChar * end ARG_UNUSED)630 onigenc_always_true_is_allowed_reverse_match(const UChar* s   ARG_UNUSED,
631 					     const UChar* end ARG_UNUSED)
632 {
633   return TRUE;
634 }
635 
636 extern int
onigenc_always_false_is_allowed_reverse_match(const UChar * s ARG_UNUSED,const UChar * end ARG_UNUSED)637 onigenc_always_false_is_allowed_reverse_match(const UChar* s   ARG_UNUSED,
638 					      const UChar* end ARG_UNUSED)
639 {
640   return FALSE;
641 }
642 
643 extern OnigCodePoint
onigenc_mbn_mbc_to_code(OnigEncoding enc,const UChar * p,const UChar * end)644 onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
645 {
646   int c, i, len;
647   OnigCodePoint n;
648 
649   len = enclen(enc, p);
650   n = (OnigCodePoint )(*p++);
651   if (len == 1) return n;
652 
653   for (i = 1; i < len; i++) {
654     if (p >= end) break;
655     c = *p++;
656     n <<= 8;  n += c;
657   }
658   return n;
659 }
660 
661 extern int
onigenc_mbn_mbc_case_fold(OnigEncoding enc,OnigCaseFoldType flag ARG_UNUSED,const UChar ** pp,const UChar * end ARG_UNUSED,UChar * lower)662 onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,
663                           const UChar** pp, const UChar* end ARG_UNUSED,
664 			  UChar* lower)
665 {
666   int len;
667   const UChar *p = *pp;
668 
669   if (ONIGENC_IS_MBC_ASCII(p)) {
670     *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
671     (*pp)++;
672     return 1;
673   }
674   else {
675     int i;
676 
677     len = enclen(enc, p);
678     for (i = 0; i < len; i++) {
679       *lower++ = *p++;
680     }
681     (*pp) += len;
682     return len; /* return byte length of converted to lower char */
683   }
684 }
685 
686 #if 0
687 extern int
688 onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag,
689                              const UChar** pp, const UChar* end)
690 {
691   const UChar* p = *pp;
692 
693   if (ONIGENC_IS_MBC_ASCII(p)) {
694     (*pp)++;
695     return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
696   }
697 
698   (*pp) += enclen(enc, p);
699   return FALSE;
700 }
701 #endif
702 
703 extern int
onigenc_mb2_code_to_mbclen(OnigCodePoint code)704 onigenc_mb2_code_to_mbclen(OnigCodePoint code)
705 {
706   if ((code & 0xff00) != 0) return 2;
707   else return 1;
708 }
709 
710 extern int
onigenc_mb4_code_to_mbclen(OnigCodePoint code)711 onigenc_mb4_code_to_mbclen(OnigCodePoint code)
712 {
713        if ((code & 0xff000000) != 0) return 4;
714   else if ((code & 0xff0000) != 0) return 3;
715   else if ((code & 0xff00) != 0) return 2;
716   else return 1;
717 }
718 
719 extern int
onigenc_mb2_code_to_mbc(OnigEncoding enc,OnigCodePoint code,UChar * buf)720 onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
721 {
722   UChar *p = buf;
723 
724   if ((code & 0xff00) != 0) {
725     *p++ = (UChar )((code >>  8) & 0xff);
726   }
727   *p++ = (UChar )(code & 0xff);
728 
729 #if 1
730   if (enclen(enc, buf) != (p - buf))
731     return ONIGERR_INVALID_CODE_POINT_VALUE;
732 #endif
733   return (int)(p - buf);
734 }
735 
736 extern int
onigenc_mb4_code_to_mbc(OnigEncoding enc,OnigCodePoint code,UChar * buf)737 onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
738 {
739   UChar *p = buf;
740 
741   if ((code & 0xff000000) != 0) {
742     *p++ = (UChar )((code >> 24) & 0xff);
743   }
744   if ((code & 0xff0000) != 0 || p != buf) {
745     *p++ = (UChar )((code >> 16) & 0xff);
746   }
747   if ((code & 0xff00) != 0 || p != buf) {
748     *p++ = (UChar )((code >> 8) & 0xff);
749   }
750   *p++ = (UChar )(code & 0xff);
751 
752 #if 1
753   if (enclen(enc, buf) != (p - buf))
754     return ONIGERR_INVALID_CODE_POINT_VALUE;
755 #endif
756   return (int)(p - buf);
757 }
758 
759 extern int
onigenc_minimum_property_name_to_ctype(OnigEncoding enc,UChar * p,UChar * end)760 onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
761 {
762   static PosixBracketEntryType PBS[] = {
763     { (UChar* )"Alnum",  ONIGENC_CTYPE_ALNUM,  5 },
764     { (UChar* )"Alpha",  ONIGENC_CTYPE_ALPHA,  5 },
765     { (UChar* )"Blank",  ONIGENC_CTYPE_BLANK,  5 },
766     { (UChar* )"Cntrl",  ONIGENC_CTYPE_CNTRL,  5 },
767     { (UChar* )"Digit",  ONIGENC_CTYPE_DIGIT,  5 },
768     { (UChar* )"Graph",  ONIGENC_CTYPE_GRAPH,  5 },
769     { (UChar* )"Lower",  ONIGENC_CTYPE_LOWER,  5 },
770     { (UChar* )"Print",  ONIGENC_CTYPE_PRINT,  5 },
771     { (UChar* )"Punct",  ONIGENC_CTYPE_PUNCT,  5 },
772     { (UChar* )"Space",  ONIGENC_CTYPE_SPACE,  5 },
773     { (UChar* )"Upper",  ONIGENC_CTYPE_UPPER,  5 },
774     { (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 },
775     { (UChar* )"ASCII",  ONIGENC_CTYPE_ASCII,  5 },
776     { (UChar* )"Word",   ONIGENC_CTYPE_WORD,   4 },
777     { (UChar* )NULL, -1, 0 }
778   };
779 
780   PosixBracketEntryType *pb;
781   int len;
782 
783   len = onigenc_strlen(enc, p, end);
784   for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
785     if (len == pb->len &&
786         onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0)
787       return pb->ctype;
788   }
789 
790   return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
791 }
792 
793 extern int
onigenc_mb2_is_code_ctype(OnigEncoding enc,OnigCodePoint code,unsigned int ctype)794 onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
795 			  unsigned int ctype)
796 {
797   if (code < 128)
798     return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
799   else {
800     if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
801       return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
802     }
803   }
804 
805   return FALSE;
806 }
807 
808 extern int
onigenc_mb4_is_code_ctype(OnigEncoding enc,OnigCodePoint code,unsigned int ctype)809 onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
810 			  unsigned int ctype)
811 {
812   if (code < 128)
813     return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
814   else {
815     if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
816       return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
817     }
818   }
819 
820   return FALSE;
821 }
822 
823 extern int
onigenc_with_ascii_strncmp(OnigEncoding enc,const UChar * p,const UChar * end,const UChar * sascii,int n)824 onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end,
825                            const UChar* sascii /* ascii */, int n)
826 {
827   int x, c;
828 
829   while (n-- > 0) {
830     if (p >= end) return (int )(*sascii);
831 
832     c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
833     x = *sascii - c;
834     if (x) return x;
835 
836     sascii++;
837     p += enclen(enc, p);
838   }
839   return 0;
840 }
841 
842 /* Property management */
843 static int
resize_property_list(int new_size,const OnigCodePoint *** plist,int * psize)844 resize_property_list(int new_size, const OnigCodePoint*** plist, int* psize)
845 {
846   int size;
847   const OnigCodePoint **list = *plist;
848 
849   size = sizeof(OnigCodePoint*) * new_size;
850   if (IS_NULL(list)) {
851     list = (const OnigCodePoint** )xmalloc(size);
852   }
853   else {
854     list = (const OnigCodePoint** )xrealloc((void* )list, size, *psize * sizeof(OnigCodePoint*));
855   }
856 
857   if (IS_NULL(list)) return ONIGERR_MEMORY;
858 
859   *plist = list;
860   *psize = new_size;
861 
862   return 0;
863 }
864 
865 extern int
onigenc_property_list_add_property(UChar * name,const OnigCodePoint * prop,hash_table_type ** table,const OnigCodePoint *** plist,int * pnum,int * psize)866 onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop,
867      hash_table_type **table, const OnigCodePoint*** plist, int *pnum,
868      int *psize)
869 {
870 #define PROP_INIT_SIZE     16
871 
872   int r;
873 
874   if (*psize <= *pnum) {
875     int new_size = (*psize == 0 ? PROP_INIT_SIZE : *psize * 2);
876     r = resize_property_list(new_size, plist, psize);
877     if (r != 0) return r;
878   }
879 
880   (*plist)[*pnum] = prop;
881 
882   if (ONIG_IS_NULL(*table)) {
883     *table = onig_st_init_strend_table_with_size(PROP_INIT_SIZE);
884     if (ONIG_IS_NULL(*table)) return ONIGERR_MEMORY;
885   }
886 
887   *pnum = *pnum + 1;
888   onig_st_insert_strend(*table, name, name + strlen_s((char* )name, MAX_STRING_SIZE),
889 			(hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE));
890   return 0;
891 }
892 
893 extern int
onigenc_property_list_init(int (* f)(void))894 onigenc_property_list_init(int (*f)(void))
895 {
896   int r;
897 
898   THREAD_ATOMIC_START;
899 
900   r = f();
901 
902   THREAD_ATOMIC_END;
903   return r;
904 }
905