1 /*
2  **********************************************************************
3  *   Copyright (C) 2005-2015, International Business Machines
4  *   Corporation and others.  All Rights Reserved.
5  **********************************************************************
6  */
7 
8 #include "unicode/utypes.h"
9 
10 #include "cmemory.h"
11 
12 #if !UCONFIG_NO_CONVERSION
13 #include "csrsbcs.h"
14 #include "csmatch.h"
15 
16 #define N_GRAM_SIZE 3
17 #define N_GRAM_MASK 0xFFFFFF
18 #define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
19 
20 U_NAMESPACE_BEGIN
21 
NGramParser(const int32_t * theNgramList,const uint8_t * theCharMap)22 NGramParser::NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap)
23  : ngram(0), byteIndex(0)
24 {
25     ngramList = theNgramList;
26     charMap   = theCharMap;
27 
28     ngramCount = hitCount = 0;
29 }
30 
~NGramParser()31 NGramParser::~NGramParser()
32 {
33 }
34 
35 /*
36  * Binary search for value in table, which must have exactly 64 entries.
37  */
38 
search(const int32_t * table,int32_t value)39 int32_t NGramParser::search(const int32_t *table, int32_t value)
40 {
41     int32_t index = 0;
42 
43     if (table[index + 32] <= value) {
44         index += 32;
45     }
46 
47     if (table[index + 16] <= value) {
48         index += 16;
49     }
50 
51     if (table[index + 8] <= value) {
52         index += 8;
53     }
54 
55     if (table[index + 4] <= value) {
56         index += 4;
57     }
58 
59     if (table[index + 2] <= value) {
60         index += 2;
61     }
62 
63     if (table[index + 1] <= value) {
64         index += 1;
65     }
66 
67     if (table[index] > value) {
68         index -= 1;
69     }
70 
71     if (index < 0 || table[index] != value) {
72         return -1;
73     }
74 
75     return index;
76 }
77 
lookup(int32_t thisNgram)78 void NGramParser::lookup(int32_t thisNgram)
79 {
80     ngramCount += 1;
81 
82     if (search(ngramList, thisNgram) >= 0) {
83         hitCount += 1;
84     }
85 
86 }
87 
addByte(int32_t b)88 void NGramParser::addByte(int32_t b)
89 {
90     ngram = ((ngram << 8) + b) & N_GRAM_MASK;
91     lookup(ngram);
92 }
93 
nextByte(InputText * det)94 int32_t NGramParser::nextByte(InputText *det)
95 {
96     if (byteIndex >= det->fInputLen) {
97         return -1;
98     }
99 
100     return det->fInputBytes[byteIndex++];
101 }
102 
parseCharacters(InputText * det)103 void NGramParser::parseCharacters(InputText *det)
104 {
105     int32_t b;
106     bool ignoreSpace = FALSE;
107 
108     while ((b = nextByte(det)) >= 0) {
109         uint8_t mb = charMap[b];
110 
111         // TODO: 0x20 might not be a space in all character sets...
112         if (mb != 0) {
113             if (!(mb == 0x20 && ignoreSpace)) {
114                 addByte(mb);
115             }
116 
117             ignoreSpace = (mb == 0x20);
118         }
119     }
120 }
121 
parse(InputText * det)122 int32_t NGramParser::parse(InputText *det)
123 {
124     parseCharacters(det);
125 
126     // TODO: Is this OK? The buffer could have ended in the middle of a word...
127     addByte(0x20);
128 
129     double rawPercent = (double) hitCount / (double) ngramCount;
130 
131     //            if (rawPercent <= 2.0) {
132     //                return 0;
133     //            }
134 
135     // TODO - This is a bit of a hack to take care of a case
136     // were we were getting a confidence of 135...
137     if (rawPercent > 0.33) {
138         return 98;
139     }
140 
141     return (int32_t) (rawPercent * 300.0);
142 }
143 
144 #if !UCONFIG_ONLY_HTML_CONVERSION
145 static const uint8_t unshapeMap_IBM420[] = {
146 /*           -0    -1    -2    -3    -4    -5    -6    -7    -8    -9    -A    -B    -C    -D    -E    -F   */
147 /* 0- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
148 /* 1- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
149 /* 2- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
150 /* 3- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
151 /* 4- */    0x40, 0x40, 0x42, 0x42, 0x44, 0x45, 0x46, 0x47, 0x47, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
152 /* 5- */    0x50, 0x49, 0x52, 0x53, 0x54, 0x55, 0x56, 0x56, 0x58, 0x58, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
153 /* 6- */    0x60, 0x61, 0x62, 0x63, 0x63, 0x65, 0x65, 0x67, 0x67, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
154 /* 7- */    0x69, 0x71, 0x71, 0x73, 0x74, 0x75, 0x76, 0x77, 0x77, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
155 /* 8- */    0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x80, 0x8B, 0x8B, 0x8D, 0x8D, 0x8F,
156 /* 9- */    0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9A, 0x9A, 0x9A, 0x9E, 0x9E,
157 /* A- */    0x9E, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x9E, 0xAB, 0xAB, 0xAD, 0xAD, 0xAF,
158 /* B- */    0xAF, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xB1, 0xBB, 0xBB, 0xBD, 0xBD, 0xBF,
159 /* C- */    0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xBF, 0xCC, 0xBF, 0xCE, 0xCF,
160 /* D- */    0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDA, 0xDC, 0xDC, 0xDC, 0xDF,
161 /* E- */    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
162 /* F- */    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
163 };
164 
NGramParser_IBM420(const int32_t * theNgramList,const uint8_t * theCharMap)165 NGramParser_IBM420::NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap):NGramParser(theNgramList, theCharMap)
166 {
167 	alef = 0x00;
168 }
169 
170 
isLamAlef(int32_t b)171 int32_t NGramParser_IBM420::isLamAlef(int32_t b)
172 {
173 	if(b == 0xB2 || b == 0xB3){
174          	return 0x47;
175         }else if(b == 0xB4 || b == 0xB5){
176          	return 0x49;
177         }else if(b == 0xB8 || b == 0xB9){
178          	return 0x56;
179         }else
180          	return 0x00;
181 }
182 
183 /*
184 * Arabic shaping needs to be done manually. Cannot call ArabicShaping class
185 * because CharsetDetector is dealing with bytes not Unicode code points. We could
186 * convert the bytes to Unicode code points but that would leave us dependent
187 * on CharsetICU which we try to avoid. IBM420 converter amongst different versions
188 * of JDK can produce different results and therefore is also avoided.
189 */
nextByte(InputText * det)190 int32_t NGramParser_IBM420::nextByte(InputText *det)
191 {
192 
193     if (byteIndex >= det->fInputLen || det->fInputBytes[byteIndex] == 0) {
194         return -1;
195     }
196     int next;
197 
198     alef = isLamAlef(det->fInputBytes[byteIndex]);
199     if(alef != 0x00)
200         next = 0xB1 & 0xFF;
201     else
202         next = unshapeMap_IBM420[det->fInputBytes[byteIndex]& 0xFF] & 0xFF;
203 
204     byteIndex++;
205 
206     return next;
207 }
208 
parseCharacters(InputText * det)209 void NGramParser_IBM420::parseCharacters(InputText *det)
210 {
211 	int32_t b;
212     bool ignoreSpace = FALSE;
213 
214     while ((b = nextByte(det)) >= 0) {
215         uint8_t mb = charMap[b];
216 
217         // TODO: 0x20 might not be a space in all character sets...
218         if (mb != 0) {
219             if (!(mb == 0x20 && ignoreSpace)) {
220                 addByte(mb);
221             }
222             ignoreSpace = (mb == 0x20);
223         }
224 
225 		if(alef != 0x00){
226             mb = charMap[alef & 0xFF];
227 
228             // TODO: 0x20 might not be a space in all character sets...
229             if (mb != 0) {
230                 if (!(mb == 0x20 && ignoreSpace)) {
231                     addByte(mb);
232                 }
233 
234                 ignoreSpace = (mb == 0x20);
235             }
236 
237         }
238     }
239 }
240 #endif
241 
CharsetRecog_sbcs()242 CharsetRecog_sbcs::CharsetRecog_sbcs()
243 {
244     // nothing else to do
245 }
246 
~CharsetRecog_sbcs()247 CharsetRecog_sbcs::~CharsetRecog_sbcs()
248 {
249     // nothing to do
250 }
251 
match_sbcs(InputText * det,const int32_t ngrams[],const uint8_t byteMap[]) const252 int32_t CharsetRecog_sbcs::match_sbcs(InputText *det, const int32_t ngrams[],  const uint8_t byteMap[]) const
253 {
254     NGramParser parser(ngrams, byteMap);
255     int32_t result;
256 
257     result = parser.parse(det);
258 
259     return result;
260 }
261 
262 static const uint8_t charMap_8859_1[] = {
263     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
264     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
265     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
266     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
267     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
268     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
269     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
270     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
271     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
272     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
273     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
274     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
275     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
276     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
277     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
278     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
279     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
280     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
281     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
282     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
283     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
284     0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20,
285     0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
286     0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20,
287     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
288     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
289     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
290     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF,
291     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
292     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
293     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
294     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
295 };
296 
297 static const uint8_t charMap_8859_2[] = {
298     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
299     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
300     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
301     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
302     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
303     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
304     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
305     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
306     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
307     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
308     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
309     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
310     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
311     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
312     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
313     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
314     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
315     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
316     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
317     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
318     0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0x20,
319     0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF,
320     0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0xB7,
321     0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF,
322     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
323     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
324     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
325     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF,
326     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
327     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
328     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
329     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20,
330 };
331 
332 static const uint8_t charMap_8859_5[] = {
333     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
334     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
335     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
336     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
337     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
338     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
339     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
340     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
341     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
342     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
343     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
344     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
345     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
346     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
347     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
348     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
349     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
350     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
351     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
352     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
353     0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
354     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF,
355     0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
356     0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
357     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
358     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
359     0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
360     0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
361     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
362     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
363     0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
364     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF,
365 };
366 
367 static const uint8_t charMap_8859_6[] = {
368     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
369     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
370     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
371     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
372     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
373     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
374     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
375     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
376     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
377     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
378     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
379     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
380     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
381     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
382     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
383     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
384     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
385     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
386     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
387     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
388     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
389     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
390     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
391     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
392     0x20, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
393     0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
394     0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
395     0xD8, 0xD9, 0xDA, 0x20, 0x20, 0x20, 0x20, 0x20,
396     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
397     0xE8, 0xE9, 0xEA, 0x20, 0x20, 0x20, 0x20, 0x20,
398     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
399     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
400 };
401 
402 static const uint8_t charMap_8859_7[] = {
403     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
404     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
405     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
406     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
407     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
408     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
409     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
410     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
411     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
412     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
413     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
414     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
415     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
416     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
417     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
418     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
419     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
420     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
421     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
422     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
423     0x20, 0xA1, 0xA2, 0x20, 0x20, 0x20, 0x20, 0x20,
424     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
425     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xDC, 0x20,
426     0xDD, 0xDE, 0xDF, 0x20, 0xFC, 0x20, 0xFD, 0xFE,
427     0xC0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
428     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
429     0xF0, 0xF1, 0x20, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
430     0xF8, 0xF9, 0xFA, 0xFB, 0xDC, 0xDD, 0xDE, 0xDF,
431     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
432     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
433     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
434     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20,
435 };
436 
437 static const uint8_t charMap_8859_8[] = {
438     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
439     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
440     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
441     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
442     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
443     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
444     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
445     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
446     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
447     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
448     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
449     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
450     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
451     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
452     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
453     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
454     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
455     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
456     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
457     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
458     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
459     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
460     0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
461     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
462     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
463     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
464     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
465     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
466     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
467     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
468     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
469     0xF8, 0xF9, 0xFA, 0x20, 0x20, 0x20, 0x20, 0x20,
470 };
471 
472 static const uint8_t charMap_8859_9[] = {
473     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
474     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
475     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
476     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
477     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
478     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
479     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
480     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
481     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
482     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
483     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
484     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
485     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
486     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
487     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
488     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
489     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
490     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
491     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
492     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
493     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
494     0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20,
495     0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
496     0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20,
497     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
498     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
499     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
500     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x69, 0xFE, 0xDF,
501     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
502     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
503     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
504     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
505 };
506 
507 static const int32_t ngrams_windows_1251[] = {
508     0x20E220, 0x20E2EE, 0x20E4EE, 0x20E7E0, 0x20E820, 0x20EAE0, 0x20EAEE, 0x20EDE0, 0x20EDE5, 0x20EEE1, 0x20EFEE, 0x20EFF0, 0x20F0E0, 0x20F1EE, 0x20F1F2, 0x20F2EE,
509     0x20F7F2, 0x20FDF2, 0xE0EDE8, 0xE0F2FC, 0xE3EE20, 0xE5EBFC, 0xE5EDE8, 0xE5F1F2, 0xE5F220, 0xE820EF, 0xE8E520, 0xE8E820, 0xE8FF20, 0xEBE5ED, 0xEBE820, 0xEBFCED,
510     0xEDE020, 0xEDE520, 0xEDE8E5, 0xEDE8FF, 0xEDEE20, 0xEDEEE2, 0xEE20E2, 0xEE20EF, 0xEE20F1, 0xEEE220, 0xEEE2E0, 0xEEE3EE, 0xEEE920, 0xEEEBFC, 0xEEEC20, 0xEEF1F2,
511     0xEFEEEB, 0xEFF0E5, 0xEFF0E8, 0xEFF0EE, 0xF0E0E2, 0xF0E5E4, 0xF1F2E0, 0xF1F2E2, 0xF1F2E8, 0xF1FF20, 0xF2E5EB, 0xF2EE20, 0xF2EEF0, 0xF2FC20, 0xF7F2EE, 0xFBF520,
512 };
513 
514 static const uint8_t charMap_windows_1251[] = {
515     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
516     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
517     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
518     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
519     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
520     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
521     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
522     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
523     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
524     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
525     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
526     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
527     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
528     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
529     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
530     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
531     0x90, 0x83, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20,
532     0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F,
533     0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
534     0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F,
535     0x20, 0xA2, 0xA2, 0xBC, 0x20, 0xB4, 0x20, 0x20,
536     0xB8, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0xBF,
537     0x20, 0x20, 0xB3, 0xB3, 0xB4, 0xB5, 0x20, 0x20,
538     0xB8, 0x20, 0xBA, 0x20, 0xBC, 0xBE, 0xBE, 0xBF,
539     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
540     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
541     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
542     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
543     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
544     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
545     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
546     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
547 };
548 
549 static const int32_t ngrams_windows_1256[] = {
550     0x20C7E1, 0x20C7E4, 0x20C8C7, 0x20DAE1, 0x20DDED, 0x20E1E1, 0x20E3E4, 0x20E6C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E120, 0xC7E1C3, 0xC7E1C7, 0xC7E1C8,
551     0xC7E1CA, 0xC7E1CC, 0xC7E1CD, 0xC7E1CF, 0xC7E1D3, 0xC7E1DA, 0xC7E1DE, 0xC7E1E3, 0xC7E1E6, 0xC7E1ED, 0xC7E320, 0xC7E420, 0xC7E4CA, 0xC820C7, 0xC920C7, 0xC920DD,
552     0xC920E1, 0xC920E3, 0xC920E6, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xDA20C7, 0xDAE1EC, 0xDDED20, 0xE120C7, 0xE1C920, 0xE1EC20, 0xE1ED20,
553     0xE320C7, 0xE3C720, 0xE3C920, 0xE3E420, 0xE420C7, 0xE520C7, 0xE5C720, 0xE6C7E1, 0xE6E420, 0xEC20C7, 0xED20C7, 0xED20E3, 0xED20E6, 0xEDC920, 0xEDD120, 0xEDE420,
554 };
555 
556 static const uint8_t charMap_windows_1256[] = {
557     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
558     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
559     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
560     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
561     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
562     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
563     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
564     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
565     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
566     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
567     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
568     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
569     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
570     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
571     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
572     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
573     0x20, 0x81, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20,
574     0x88, 0x20, 0x8A, 0x20, 0x9C, 0x8D, 0x8E, 0x8F,
575     0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
576     0x98, 0x20, 0x9A, 0x20, 0x9C, 0x20, 0x20, 0x9F,
577     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
578     0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20,
579     0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
580     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
581     0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
582     0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
583     0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0x20,
584     0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
585     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
586     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
587     0x20, 0x20, 0x20, 0x20, 0xF4, 0x20, 0x20, 0x20,
588     0x20, 0xF9, 0x20, 0xFB, 0xFC, 0x20, 0x20, 0xFF,
589 };
590 
591 static const int32_t ngrams_KOI8_R[] = {
592     0x20C4CF, 0x20C920, 0x20CBC1, 0x20CBCF, 0x20CEC1, 0x20CEC5, 0x20CFC2, 0x20D0CF, 0x20D0D2, 0x20D2C1, 0x20D3CF, 0x20D3D4, 0x20D4CF, 0x20D720, 0x20D7CF, 0x20DAC1,
593     0x20DCD4, 0x20DED4, 0xC1CEC9, 0xC1D4D8, 0xC5CCD8, 0xC5CEC9, 0xC5D3D4, 0xC5D420, 0xC7CF20, 0xC920D0, 0xC9C520, 0xC9C920, 0xC9D120, 0xCCC5CE, 0xCCC920, 0xCCD8CE,
594     0xCEC120, 0xCEC520, 0xCEC9C5, 0xCEC9D1, 0xCECF20, 0xCECFD7, 0xCF20D0, 0xCF20D3, 0xCF20D7, 0xCFC7CF, 0xCFCA20, 0xCFCCD8, 0xCFCD20, 0xCFD3D4, 0xCFD720, 0xCFD7C1,
595     0xD0CFCC, 0xD0D2C5, 0xD0D2C9, 0xD0D2CF, 0xD2C1D7, 0xD2C5C4, 0xD3D120, 0xD3D4C1, 0xD3D4C9, 0xD3D4D7, 0xD4C5CC, 0xD4CF20, 0xD4CFD2, 0xD4D820, 0xD9C820, 0xDED4CF,
596 };
597 
598 static const uint8_t charMap_KOI8_R[] = {
599     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
600     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
601     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
602     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
603     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
604     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
605     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
606     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
607     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
608     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
609     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
610     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
611     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
612     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
613     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
614     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
615     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
616     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
617     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
618     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
619     0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20,
620     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
621     0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20,
622     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
623     0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
624     0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
625     0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
626     0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
627     0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
628     0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
629     0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
630     0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
631 };
632 
633 #if !UCONFIG_ONLY_HTML_CONVERSION
634 static const int32_t ngrams_IBM424_he_rtl[] = {
635     0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405641,
636     0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514045,
637     0x514540, 0x514671, 0x515155, 0x515540, 0x515740, 0x516840, 0x517140, 0x544041, 0x544045, 0x544140, 0x544540, 0x554041, 0x554042, 0x554045, 0x554054, 0x554056,
638     0x554069, 0x564540, 0x574045, 0x584540, 0x585140, 0x585155, 0x625440, 0x684045, 0x685155, 0x695440, 0x714041, 0x714042, 0x714045, 0x714054, 0x714056, 0x714069,
639 };
640 
641 static const int32_t ngrams_IBM424_he_ltr[] = {
642     0x404146, 0x404154, 0x404551, 0x404554, 0x404556, 0x404558, 0x405158, 0x405462, 0x405469, 0x405546, 0x405551, 0x405746, 0x405751, 0x406846, 0x406851, 0x407141,
643     0x407146, 0x407151, 0x414045, 0x414054, 0x414055, 0x414071, 0x414540, 0x414645, 0x415440, 0x415640, 0x424045, 0x424055, 0x424071, 0x454045, 0x454051, 0x454054,
644     0x454055, 0x454057, 0x454068, 0x454071, 0x455440, 0x464140, 0x464540, 0x484140, 0x514140, 0x514240, 0x514540, 0x544045, 0x544055, 0x544071, 0x546240, 0x546940,
645     0x555151, 0x555158, 0x555168, 0x564045, 0x564055, 0x564071, 0x564240, 0x564540, 0x624540, 0x694045, 0x694055, 0x694071, 0x694540, 0x714140, 0x714540, 0x714651,
646 };
647 
648 static const uint8_t charMap_IBM424_he[] = {
649 /*           -0    -1    -2    -3    -4    -5    -6    -7    -8    -9    -A    -B    -C    -D    -E    -F   */
650 /* 0- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
651 /* 1- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
652 /* 2- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
653 /* 3- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
654 /* 4- */    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
655 /* 5- */    0x40, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
656 /* 6- */    0x40, 0x40, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
657 /* 7- */    0x40, 0x71, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x00, 0x40, 0x40,
658 /* 8- */    0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
659 /* 9- */    0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
660 /* A- */    0xA0, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
661 /* B- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
662 /* C- */    0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
663 /* D- */    0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
664 /* E- */    0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
665 /* F- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
666 };
667 
668 static const int32_t ngrams_IBM420_ar_rtl[] = {
669     0x4056B1, 0x4056BD, 0x405856, 0x409AB1, 0x40ABDC, 0x40B1B1, 0x40BBBD, 0x40CF56, 0x564056, 0x564640, 0x566340, 0x567540, 0x56B140, 0x56B149, 0x56B156, 0x56B158,
670     0x56B163, 0x56B167, 0x56B169, 0x56B173, 0x56B178, 0x56B19A, 0x56B1AD, 0x56B1BB, 0x56B1CF, 0x56B1DC, 0x56BB40, 0x56BD40, 0x56BD63, 0x584056, 0x624056, 0x6240AB,
671     0x6240B1, 0x6240BB, 0x6240CF, 0x634056, 0x734056, 0x736240, 0x754056, 0x756240, 0x784056, 0x9A4056, 0x9AB1DA, 0xABDC40, 0xB14056, 0xB16240, 0xB1DA40, 0xB1DC40,
672     0xBB4056, 0xBB5640, 0xBB6240, 0xBBBD40, 0xBD4056, 0xBF4056, 0xBF5640, 0xCF56B1, 0xCFBD40, 0xDA4056, 0xDC4056, 0xDC40BB, 0xDC40CF, 0xDC6240, 0xDC7540, 0xDCBD40,
673 };
674 
675 static const int32_t ngrams_IBM420_ar_ltr[] = {
676     0x404656, 0x4056BB, 0x4056BF, 0x406273, 0x406275, 0x4062B1, 0x4062BB, 0x4062DC, 0x406356, 0x407556, 0x4075DC, 0x40B156, 0x40BB56, 0x40BD56, 0x40BDBB, 0x40BDCF,
677     0x40BDDC, 0x40DAB1, 0x40DCAB, 0x40DCB1, 0x49B156, 0x564056, 0x564058, 0x564062, 0x564063, 0x564073, 0x564075, 0x564078, 0x56409A, 0x5640B1, 0x5640BB, 0x5640BD,
678     0x5640BF, 0x5640DA, 0x5640DC, 0x565840, 0x56B156, 0x56CF40, 0x58B156, 0x63B156, 0x63BD56, 0x67B156, 0x69B156, 0x73B156, 0x78B156, 0x9AB156, 0xAB4062, 0xADB156,
679     0xB14062, 0xB15640, 0xB156CF, 0xB19A40, 0xB1B140, 0xBB4062, 0xBB40DC, 0xBBB156, 0xBD5640, 0xBDBB40, 0xCF4062, 0xCF40DC, 0xCFB156, 0xDAB19A, 0xDCAB40, 0xDCB156
680 };
681 
682 static const uint8_t charMap_IBM420_ar[]= {
683 /*           -0    -1    -2    -3    -4    -5    -6    -7    -8    -9    -A    -B    -C    -D    -E    -F   */
684 /* 0- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
685 /* 1- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
686 /* 2- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
687 /* 3- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
688 /* 4- */    0x40, 0x40, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
689 /* 5- */    0x40, 0x51, 0x52, 0x40, 0x40, 0x55, 0x56, 0x57, 0x58, 0x59, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
690 /* 6- */    0x40, 0x40, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
691 /* 7- */    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
692 /* 8- */    0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
693 /* 9- */    0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
694 /* A- */    0xA0, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
695 /* B- */    0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0x40, 0x40, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
696 /* C- */    0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0xCB, 0x40, 0xCD, 0x40, 0xCF,
697 /* D- */    0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
698 /* E- */    0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0xEB, 0x40, 0xED, 0xEE, 0xEF,
699 /* F- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0xFB, 0xFC, 0xFD, 0xFE, 0x40,
700 };
701 #endif
702 
703 //ISO-8859-1,2,5,6,7,8,9 Ngrams
704 
705 struct NGramsPlusLang {
706     const int32_t ngrams[64];
707     const char *  lang;
708 };
709 
710 static const NGramsPlusLang ngrams_8859_1[] =  {
711   {
712     {
713     0x206120, 0x20616E, 0x206265, 0x20636F, 0x20666F, 0x206861, 0x206865, 0x20696E, 0x206D61, 0x206F66, 0x207072, 0x207265, 0x207361, 0x207374, 0x207468, 0x20746F,
714     0x207768, 0x616964, 0x616C20, 0x616E20, 0x616E64, 0x617320, 0x617420, 0x617465, 0x617469, 0x642061, 0x642074, 0x652061, 0x652073, 0x652074, 0x656420, 0x656E74,
715     0x657220, 0x657320, 0x666F72, 0x686174, 0x686520, 0x686572, 0x696420, 0x696E20, 0x696E67, 0x696F6E, 0x697320, 0x6E2061, 0x6E2074, 0x6E6420, 0x6E6720, 0x6E7420,
716     0x6F6620, 0x6F6E20, 0x6F7220, 0x726520, 0x727320, 0x732061, 0x732074, 0x736169, 0x737420, 0x742074, 0x746572, 0x746861, 0x746865, 0x74696F, 0x746F20, 0x747320,
717     },
718     "en"
719   },
720   {
721     {
722     0x206166, 0x206174, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207369, 0x207374, 0x207469, 0x207669, 0x616620,
723     0x616E20, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646572, 0x646574, 0x652073, 0x656420, 0x656465, 0x656E20, 0x656E64, 0x657220, 0x657265, 0x657320,
724     0x657420, 0x666F72, 0x676520, 0x67656E, 0x676572, 0x696765, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6572, 0x6C6967, 0x6C6C65, 0x6D6564, 0x6E6465, 0x6E6520,
725     0x6E6720, 0x6E6765, 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722064, 0x722065, 0x722073, 0x726520, 0x737465, 0x742073, 0x746520, 0x746572, 0x74696C, 0x766572,
726     },
727     "da"
728   },
729   {
730     {
731     0x20616E, 0x206175, 0x206265, 0x206461, 0x206465, 0x206469, 0x206569, 0x206765, 0x206861, 0x20696E, 0x206D69, 0x207363, 0x207365, 0x20756E, 0x207665, 0x20766F,
732     0x207765, 0x207A75, 0x626572, 0x636820, 0x636865, 0x636874, 0x646173, 0x64656E, 0x646572, 0x646965, 0x652064, 0x652073, 0x65696E, 0x656974, 0x656E20, 0x657220,
733     0x657320, 0x67656E, 0x68656E, 0x687420, 0x696368, 0x696520, 0x696E20, 0x696E65, 0x697420, 0x6C6963, 0x6C6C65, 0x6E2061, 0x6E2064, 0x6E2073, 0x6E6420, 0x6E6465,
734     0x6E6520, 0x6E6720, 0x6E6765, 0x6E7465, 0x722064, 0x726465, 0x726569, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x756E64, 0x756E67, 0x766572,
735     },
736     "de"
737   },
738   {
739     {
740     0x206120, 0x206361, 0x20636F, 0x206465, 0x20656C, 0x20656E, 0x206573, 0x20696E, 0x206C61, 0x206C6F, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365,
741     0x20756E, 0x207920, 0x612063, 0x612064, 0x612065, 0x61206C, 0x612070, 0x616369, 0x61646F, 0x616C20, 0x617220, 0x617320, 0x6369F3, 0x636F6E, 0x646520, 0x64656C,
742     0x646F20, 0x652064, 0x652065, 0x65206C, 0x656C20, 0x656E20, 0x656E74, 0x657320, 0x657374, 0x69656E, 0x69F36E, 0x6C6120, 0x6C6F73, 0x6E2065, 0x6E7465, 0x6F2064,
743     0x6F2065, 0x6F6E20, 0x6F7220, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732064, 0x732065, 0x732070, 0x736520, 0x746520, 0x746F20, 0x756520, 0xF36E20,
744     },
745     "es"
746   },
747   {
748     {
749     0x206175, 0x20636F, 0x206461, 0x206465, 0x206475, 0x20656E, 0x206574, 0x206C61, 0x206C65, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207365, 0x20736F, 0x20756E,
750     0x20E020, 0x616E74, 0x617469, 0x636520, 0x636F6E, 0x646520, 0x646573, 0x647520, 0x652061, 0x652063, 0x652064, 0x652065, 0x65206C, 0x652070, 0x652073, 0x656E20,
751     0x656E74, 0x657220, 0x657320, 0x657420, 0x657572, 0x696F6E, 0x697320, 0x697420, 0x6C6120, 0x6C6520, 0x6C6573, 0x6D656E, 0x6E2064, 0x6E6520, 0x6E7320, 0x6E7420,
752     0x6F6E20, 0x6F6E74, 0x6F7572, 0x717565, 0x72206C, 0x726520, 0x732061, 0x732064, 0x732065, 0x73206C, 0x732070, 0x742064, 0x746520, 0x74696F, 0x756520, 0x757220,
753     },
754     "fr"
755   },
756   {
757     {
758     0x20616C, 0x206368, 0x20636F, 0x206465, 0x206469, 0x206520, 0x20696C, 0x20696E, 0x206C61, 0x207065, 0x207072, 0x20756E, 0x612063, 0x612064, 0x612070, 0x612073,
759     0x61746F, 0x636865, 0x636F6E, 0x64656C, 0x646920, 0x652061, 0x652063, 0x652064, 0x652069, 0x65206C, 0x652070, 0x652073, 0x656C20, 0x656C6C, 0x656E74, 0x657220,
760     0x686520, 0x692061, 0x692063, 0x692064, 0x692073, 0x696120, 0x696C20, 0x696E20, 0x696F6E, 0x6C6120, 0x6C6520, 0x6C6920, 0x6C6C61, 0x6E6520, 0x6E6920, 0x6E6F20,
761     0x6E7465, 0x6F2061, 0x6F2064, 0x6F2069, 0x6F2073, 0x6F6E20, 0x6F6E65, 0x706572, 0x726120, 0x726520, 0x736920, 0x746120, 0x746520, 0x746920, 0x746F20, 0x7A696F,
762     },
763     "it"
764   },
765   {
766     {
767     0x20616C, 0x206265, 0x206461, 0x206465, 0x206469, 0x206565, 0x20656E, 0x206765, 0x206865, 0x20696E, 0x206D61, 0x206D65, 0x206F70, 0x207465, 0x207661, 0x207665,
768     0x20766F, 0x207765, 0x207A69, 0x61616E, 0x616172, 0x616E20, 0x616E64, 0x617220, 0x617420, 0x636874, 0x646520, 0x64656E, 0x646572, 0x652062, 0x652076, 0x65656E,
769     0x656572, 0x656E20, 0x657220, 0x657273, 0x657420, 0x67656E, 0x686574, 0x696520, 0x696E20, 0x696E67, 0x697320, 0x6E2062, 0x6E2064, 0x6E2065, 0x6E2068, 0x6E206F,
770     0x6E2076, 0x6E6465, 0x6E6720, 0x6F6E64, 0x6F6F72, 0x6F7020, 0x6F7220, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x76616E, 0x766572, 0x766F6F,
771     },
772     "nl"
773   },
774   {
775     {
776     0x206174, 0x206176, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207365, 0x20736B, 0x20736F, 0x207374, 0x207469,
777     0x207669, 0x20E520, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646574, 0x652073, 0x656420, 0x656E20, 0x656E65, 0x657220, 0x657265, 0x657420, 0x657474,
778     0x666F72, 0x67656E, 0x696B6B, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6520, 0x6C6C65, 0x6D6564, 0x6D656E, 0x6E2073, 0x6E6520, 0x6E6720, 0x6E6765, 0x6E6E65,
779     0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722073, 0x726520, 0x736F6D, 0x737465, 0x742073, 0x746520, 0x74656E, 0x746572, 0x74696C, 0x747420, 0x747465, 0x766572,
780     },
781     "no"
782   },
783   {
784     {
785     0x206120, 0x20636F, 0x206461, 0x206465, 0x20646F, 0x206520, 0x206573, 0x206D61, 0x206E6F, 0x206F20, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365,
786     0x20756D, 0x612061, 0x612063, 0x612064, 0x612070, 0x616465, 0x61646F, 0x616C20, 0x617220, 0x617261, 0x617320, 0x636F6D, 0x636F6E, 0x646120, 0x646520, 0x646F20,
787     0x646F73, 0x652061, 0x652064, 0x656D20, 0x656E74, 0x657320, 0x657374, 0x696120, 0x696361, 0x6D656E, 0x6E7465, 0x6E746F, 0x6F2061, 0x6F2063, 0x6F2064, 0x6F2065,
788     0x6F2070, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732061, 0x732064, 0x732065, 0x732070, 0x737461, 0x746520, 0x746F20, 0x756520, 0xE36F20, 0xE7E36F,
789     },
790     "pt"
791   },
792   {
793     {
794     0x206174, 0x206176, 0x206465, 0x20656E, 0x2066F6, 0x206861, 0x206920, 0x20696E, 0x206B6F, 0x206D65, 0x206F63, 0x2070E5, 0x20736B, 0x20736F, 0x207374, 0x207469,
795     0x207661, 0x207669, 0x20E472, 0x616465, 0x616E20, 0x616E64, 0x617220, 0x617474, 0x636820, 0x646520, 0x64656E, 0x646572, 0x646574, 0x656420, 0x656E20, 0x657220,
796     0x657420, 0x66F672, 0x67656E, 0x696C6C, 0x696E67, 0x6B6120, 0x6C6C20, 0x6D6564, 0x6E2073, 0x6E6120, 0x6E6465, 0x6E6720, 0x6E6765, 0x6E696E, 0x6F6368, 0x6F6D20,
797     0x6F6E20, 0x70E520, 0x722061, 0x722073, 0x726120, 0x736B61, 0x736F6D, 0x742073, 0x746120, 0x746520, 0x746572, 0x74696C, 0x747420, 0x766172, 0xE47220, 0xF67220,
798     },
799     "sv"
800   }
801 };
802 
803 
804 static const NGramsPlusLang ngrams_8859_2[] =  {
805   {
806     {
807     0x206120, 0x206279, 0x20646F, 0x206A65, 0x206E61, 0x206E65, 0x206F20, 0x206F64, 0x20706F, 0x207072, 0x2070F8, 0x20726F, 0x207365, 0x20736F, 0x207374, 0x20746F,
808     0x207620, 0x207679, 0x207A61, 0x612070, 0x636520, 0x636820, 0x652070, 0x652073, 0x652076, 0x656D20, 0x656EED, 0x686F20, 0x686F64, 0x697374, 0x6A6520, 0x6B7465,
809     0x6C6520, 0x6C6920, 0x6E6120, 0x6EE920, 0x6EEC20, 0x6EED20, 0x6F2070, 0x6F646E, 0x6F6A69, 0x6F7374, 0x6F7520, 0x6F7661, 0x706F64, 0x706F6A, 0x70726F, 0x70F865,
810     0x736520, 0x736F75, 0x737461, 0x737469, 0x73746E, 0x746572, 0x746EED, 0x746F20, 0x752070, 0xBE6520, 0xE16EED, 0xE9686F, 0xED2070, 0xED2073, 0xED6D20, 0xF86564,
811     },
812     "cs"
813   },
814   {
815     {
816     0x206120, 0x20617A, 0x206265, 0x206567, 0x20656C, 0x206665, 0x206861, 0x20686F, 0x206973, 0x206B65, 0x206B69, 0x206BF6, 0x206C65, 0x206D61, 0x206D65, 0x206D69,
817     0x206E65, 0x20737A, 0x207465, 0x20E973, 0x612061, 0x61206B, 0x61206D, 0x612073, 0x616B20, 0x616E20, 0x617A20, 0x62616E, 0x62656E, 0x656779, 0x656B20, 0x656C20,
818     0x656C65, 0x656D20, 0x656E20, 0x657265, 0x657420, 0x657465, 0x657474, 0x677920, 0x686F67, 0x696E74, 0x697320, 0x6B2061, 0x6BF67A, 0x6D6567, 0x6D696E, 0x6E2061,
819     0x6E616B, 0x6E656B, 0x6E656D, 0x6E7420, 0x6F6779, 0x732061, 0x737A65, 0x737A74, 0x737AE1, 0x73E967, 0x742061, 0x747420, 0x74E173, 0x7A6572, 0xE16E20, 0xE97320,
820     },
821     "hu"
822   },
823   {
824     {
825     0x20637A, 0x20646F, 0x206920, 0x206A65, 0x206B6F, 0x206D61, 0x206D69, 0x206E61, 0x206E69, 0x206F64, 0x20706F, 0x207072, 0x207369, 0x207720, 0x207769, 0x207779,
826     0x207A20, 0x207A61, 0x612070, 0x612077, 0x616E69, 0x636820, 0x637A65, 0x637A79, 0x646F20, 0x647A69, 0x652070, 0x652073, 0x652077, 0x65207A, 0x65676F, 0x656A20,
827     0x656D20, 0x656E69, 0x676F20, 0x696120, 0x696520, 0x69656A, 0x6B6120, 0x6B6920, 0x6B6965, 0x6D6965, 0x6E6120, 0x6E6961, 0x6E6965, 0x6F2070, 0x6F7761, 0x6F7769,
828     0x706F6C, 0x707261, 0x70726F, 0x70727A, 0x727A65, 0x727A79, 0x7369EA, 0x736B69, 0x737461, 0x776965, 0x796368, 0x796D20, 0x7A6520, 0x7A6965, 0x7A7920, 0xF37720,
829     },
830     "pl"
831   },
832   {
833     {
834     0x206120, 0x206163, 0x206361, 0x206365, 0x20636F, 0x206375, 0x206465, 0x206469, 0x206C61, 0x206D61, 0x207065, 0x207072, 0x207365, 0x2073E3, 0x20756E, 0x20BA69,
835     0x20EE6E, 0x612063, 0x612064, 0x617265, 0x617420, 0x617465, 0x617520, 0x636172, 0x636F6E, 0x637520, 0x63E320, 0x646520, 0x652061, 0x652063, 0x652064, 0x652070,
836     0x652073, 0x656120, 0x656920, 0x656C65, 0x656E74, 0x657374, 0x692061, 0x692063, 0x692064, 0x692070, 0x696520, 0x696920, 0x696E20, 0x6C6120, 0x6C6520, 0x6C6F72,
837     0x6C7569, 0x6E6520, 0x6E7472, 0x6F7220, 0x70656E, 0x726520, 0x726561, 0x727520, 0x73E320, 0x746520, 0x747275, 0x74E320, 0x756920, 0x756C20, 0xBA6920, 0xEE6E20,
838     },
839     "ro"
840   }
841 };
842 
843 static const int32_t ngrams_8859_5_ru[] = {
844     0x20D220, 0x20D2DE, 0x20D4DE, 0x20D7D0, 0x20D820, 0x20DAD0, 0x20DADE, 0x20DDD0, 0x20DDD5, 0x20DED1, 0x20DFDE, 0x20DFE0, 0x20E0D0, 0x20E1DE, 0x20E1E2, 0x20E2DE,
845     0x20E7E2, 0x20EDE2, 0xD0DDD8, 0xD0E2EC, 0xD3DE20, 0xD5DBEC, 0xD5DDD8, 0xD5E1E2, 0xD5E220, 0xD820DF, 0xD8D520, 0xD8D820, 0xD8EF20, 0xDBD5DD, 0xDBD820, 0xDBECDD,
846     0xDDD020, 0xDDD520, 0xDDD8D5, 0xDDD8EF, 0xDDDE20, 0xDDDED2, 0xDE20D2, 0xDE20DF, 0xDE20E1, 0xDED220, 0xDED2D0, 0xDED3DE, 0xDED920, 0xDEDBEC, 0xDEDC20, 0xDEE1E2,
847     0xDFDEDB, 0xDFE0D5, 0xDFE0D8, 0xDFE0DE, 0xE0D0D2, 0xE0D5D4, 0xE1E2D0, 0xE1E2D2, 0xE1E2D8, 0xE1EF20, 0xE2D5DB, 0xE2DE20, 0xE2DEE0, 0xE2EC20, 0xE7E2DE, 0xEBE520,
848 };
849 
850 static const int32_t ngrams_8859_6_ar[] = {
851     0x20C7E4, 0x20C7E6, 0x20C8C7, 0x20D9E4, 0x20E1EA, 0x20E4E4, 0x20E5E6, 0x20E8C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E420, 0xC7E4C3, 0xC7E4C7, 0xC7E4C8,
852     0xC7E4CA, 0xC7E4CC, 0xC7E4CD, 0xC7E4CF, 0xC7E4D3, 0xC7E4D9, 0xC7E4E2, 0xC7E4E5, 0xC7E4E8, 0xC7E4EA, 0xC7E520, 0xC7E620, 0xC7E6CA, 0xC820C7, 0xC920C7, 0xC920E1,
853     0xC920E4, 0xC920E5, 0xC920E8, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xD920C7, 0xD9E4E9, 0xE1EA20, 0xE420C7, 0xE4C920, 0xE4E920, 0xE4EA20,
854     0xE520C7, 0xE5C720, 0xE5C920, 0xE5E620, 0xE620C7, 0xE720C7, 0xE7C720, 0xE8C7E4, 0xE8E620, 0xE920C7, 0xEA20C7, 0xEA20E5, 0xEA20E8, 0xEAC920, 0xEAD120, 0xEAE620,
855 };
856 
857 static const int32_t ngrams_8859_7_el[] = {
858     0x20E1ED, 0x20E1F0, 0x20E3E9, 0x20E4E9, 0x20E5F0, 0x20E720, 0x20EAE1, 0x20ECE5, 0x20EDE1, 0x20EF20, 0x20F0E1, 0x20F0EF, 0x20F0F1, 0x20F3F4, 0x20F3F5, 0x20F4E7,
859     0x20F4EF, 0xDFE120, 0xE120E1, 0xE120F4, 0xE1E920, 0xE1ED20, 0xE1F0FC, 0xE1F220, 0xE3E9E1, 0xE5E920, 0xE5F220, 0xE720F4, 0xE7ED20, 0xE7F220, 0xE920F4, 0xE9E120,
860     0xE9EADE, 0xE9F220, 0xEAE1E9, 0xEAE1F4, 0xECE520, 0xED20E1, 0xED20E5, 0xED20F0, 0xEDE120, 0xEFF220, 0xEFF520, 0xF0EFF5, 0xF0F1EF, 0xF0FC20, 0xF220E1, 0xF220E5,
861     0xF220EA, 0xF220F0, 0xF220F4, 0xF3E520, 0xF3E720, 0xF3F4EF, 0xF4E120, 0xF4E1E9, 0xF4E7ED, 0xF4E7F2, 0xF4E9EA, 0xF4EF20, 0xF4EFF5, 0xF4F9ED, 0xF9ED20, 0xFEED20,
862 };
863 
864 static const int32_t ngrams_8859_8_I_he[] = {
865     0x20E0E5, 0x20E0E7, 0x20E0E9, 0x20E0FA, 0x20E1E9, 0x20E1EE, 0x20E4E0, 0x20E4E5, 0x20E4E9, 0x20E4EE, 0x20E4F2, 0x20E4F9, 0x20E4FA, 0x20ECE0, 0x20ECE4, 0x20EEE0,
866     0x20F2EC, 0x20F9EC, 0xE0FA20, 0xE420E0, 0xE420E1, 0xE420E4, 0xE420EC, 0xE420EE, 0xE420F9, 0xE4E5E0, 0xE5E020, 0xE5ED20, 0xE5EF20, 0xE5F820, 0xE5FA20, 0xE920E4,
867     0xE9E420, 0xE9E5FA, 0xE9E9ED, 0xE9ED20, 0xE9EF20, 0xE9F820, 0xE9FA20, 0xEC20E0, 0xEC20E4, 0xECE020, 0xECE420, 0xED20E0, 0xED20E1, 0xED20E4, 0xED20EC, 0xED20EE,
868     0xED20F9, 0xEEE420, 0xEF20E4, 0xF0E420, 0xF0E920, 0xF0E9ED, 0xF2EC20, 0xF820E4, 0xF8E9ED, 0xF9EC20, 0xFA20E0, 0xFA20E1, 0xFA20E4, 0xFA20EC, 0xFA20EE, 0xFA20F9,
869 };
870 
871 static const int32_t ngrams_8859_8_he[] = {
872     0x20E0E5, 0x20E0EC, 0x20E4E9, 0x20E4EC, 0x20E4EE, 0x20E4F0, 0x20E9F0, 0x20ECF2, 0x20ECF9, 0x20EDE5, 0x20EDE9, 0x20EFE5, 0x20EFE9, 0x20F8E5, 0x20F8E9, 0x20FAE0,
873     0x20FAE5, 0x20FAE9, 0xE020E4, 0xE020EC, 0xE020ED, 0xE020FA, 0xE0E420, 0xE0E5E4, 0xE0EC20, 0xE0EE20, 0xE120E4, 0xE120ED, 0xE120FA, 0xE420E4, 0xE420E9, 0xE420EC,
874     0xE420ED, 0xE420EF, 0xE420F8, 0xE420FA, 0xE4EC20, 0xE5E020, 0xE5E420, 0xE7E020, 0xE9E020, 0xE9E120, 0xE9E420, 0xEC20E4, 0xEC20ED, 0xEC20FA, 0xECF220, 0xECF920,
875     0xEDE9E9, 0xEDE9F0, 0xEDE9F8, 0xEE20E4, 0xEE20ED, 0xEE20FA, 0xEEE120, 0xEEE420, 0xF2E420, 0xF920E4, 0xF920ED, 0xF920FA, 0xF9E420, 0xFAE020, 0xFAE420, 0xFAE5E9,
876 };
877 
878 static const int32_t ngrams_8859_9_tr[] = {
879     0x206261, 0x206269, 0x206275, 0x206461, 0x206465, 0x206765, 0x206861, 0x20696C, 0x206B61, 0x206B6F, 0x206D61, 0x206F6C, 0x207361, 0x207461, 0x207665, 0x207961,
880     0x612062, 0x616B20, 0x616C61, 0x616D61, 0x616E20, 0x616EFD, 0x617220, 0x617261, 0x6172FD, 0x6173FD, 0x617961, 0x626972, 0x646120, 0x646520, 0x646920, 0x652062,
881     0x65206B, 0x656469, 0x656E20, 0x657220, 0x657269, 0x657369, 0x696C65, 0x696E20, 0x696E69, 0x697220, 0x6C616E, 0x6C6172, 0x6C6520, 0x6C6572, 0x6E2061, 0x6E2062,
882     0x6E206B, 0x6E6461, 0x6E6465, 0x6E6520, 0x6E6920, 0x6E696E, 0x6EFD20, 0x72696E, 0x72FD6E, 0x766520, 0x796120, 0x796F72, 0xFD6E20, 0xFD6E64, 0xFD6EFD, 0xFDF0FD,
883 };
884 
~CharsetRecog_8859_1()885 CharsetRecog_8859_1::~CharsetRecog_8859_1()
886 {
887     // nothing to do
888 }
889 
match(InputText * textIn,CharsetMatch * results) const890 UBool CharsetRecog_8859_1::match(InputText *textIn, CharsetMatch *results) const {
891     const char *name = textIn->fC1Bytes? "windows-1252" : "ISO-8859-1";
892     uint32_t i;
893     int32_t bestConfidenceSoFar = -1;
894     for (i=0; i < ARRAY_SIZE(ngrams_8859_1) ; i++) {
895         const int32_t *ngrams = ngrams_8859_1[i].ngrams;
896         const char    *lang   = ngrams_8859_1[i].lang;
897         int32_t confidence = match_sbcs(textIn, ngrams, charMap_8859_1);
898         if (confidence > bestConfidenceSoFar) {
899             results->set(textIn, this, confidence, name, lang);
900             bestConfidenceSoFar = confidence;
901         }
902     }
903     return (bestConfidenceSoFar > 0);
904 }
905 
getName() const906 const char *CharsetRecog_8859_1::getName() const
907 {
908     return "ISO-8859-1";
909 }
910 
911 
~CharsetRecog_8859_2()912 CharsetRecog_8859_2::~CharsetRecog_8859_2()
913 {
914     // nothing to do
915 }
916 
match(InputText * textIn,CharsetMatch * results) const917 UBool CharsetRecog_8859_2::match(InputText *textIn, CharsetMatch *results) const {
918     const char *name = textIn->fC1Bytes? "windows-1250" : "ISO-8859-2";
919     uint32_t i;
920     int32_t bestConfidenceSoFar = -1;
921     for (i=0; i < ARRAY_SIZE(ngrams_8859_2) ; i++) {
922         const int32_t *ngrams = ngrams_8859_2[i].ngrams;
923         const char    *lang   = ngrams_8859_2[i].lang;
924         int32_t confidence = match_sbcs(textIn, ngrams, charMap_8859_2);
925         if (confidence > bestConfidenceSoFar) {
926             results->set(textIn, this, confidence, name, lang);
927             bestConfidenceSoFar = confidence;
928         }
929     }
930     return (bestConfidenceSoFar > 0);
931 }
932 
getName() const933 const char *CharsetRecog_8859_2::getName() const
934 {
935     return "ISO-8859-2";
936 }
937 
938 
~CharsetRecog_8859_5()939 CharsetRecog_8859_5::~CharsetRecog_8859_5()
940 {
941     // nothing to do
942 }
943 
getName() const944 const char *CharsetRecog_8859_5::getName() const
945 {
946     return "ISO-8859-5";
947 }
948 
~CharsetRecog_8859_5_ru()949 CharsetRecog_8859_5_ru::~CharsetRecog_8859_5_ru()
950 {
951     // nothing to do
952 }
953 
getLanguage() const954 const char *CharsetRecog_8859_5_ru::getLanguage() const
955 {
956     return "ru";
957 }
958 
match(InputText * textIn,CharsetMatch * results) const959 UBool CharsetRecog_8859_5_ru::match(InputText *textIn, CharsetMatch *results) const
960 {
961     int32_t confidence = match_sbcs(textIn, ngrams_8859_5_ru, charMap_8859_5);
962     results->set(textIn, this, confidence);
963     return (confidence > 0);
964 }
965 
~CharsetRecog_8859_6()966 CharsetRecog_8859_6::~CharsetRecog_8859_6()
967 {
968     // nothing to do
969 }
970 
getName() const971 const char *CharsetRecog_8859_6::getName() const
972 {
973     return "ISO-8859-6";
974 }
975 
~CharsetRecog_8859_6_ar()976 CharsetRecog_8859_6_ar::~CharsetRecog_8859_6_ar()
977 {
978     // nothing to do
979 }
980 
getLanguage() const981 const char *CharsetRecog_8859_6_ar::getLanguage() const
982 {
983     return "ar";
984 }
985 
match(InputText * textIn,CharsetMatch * results) const986 UBool CharsetRecog_8859_6_ar::match(InputText *textIn, CharsetMatch *results) const
987 {
988     int32_t confidence = match_sbcs(textIn, ngrams_8859_6_ar, charMap_8859_6);
989     results->set(textIn, this, confidence);
990     return (confidence > 0);
991 }
992 
~CharsetRecog_8859_7()993 CharsetRecog_8859_7::~CharsetRecog_8859_7()
994 {
995     // nothing to do
996 }
997 
getName() const998 const char *CharsetRecog_8859_7::getName() const
999 {
1000     return "ISO-8859-7";
1001 }
1002 
~CharsetRecog_8859_7_el()1003 CharsetRecog_8859_7_el::~CharsetRecog_8859_7_el()
1004 {
1005     // nothing to do
1006 }
1007 
getLanguage() const1008 const char *CharsetRecog_8859_7_el::getLanguage() const
1009 {
1010     return "el";
1011 }
1012 
match(InputText * textIn,CharsetMatch * results) const1013 UBool CharsetRecog_8859_7_el::match(InputText *textIn, CharsetMatch *results) const
1014 {
1015     const char *name = textIn->fC1Bytes? "windows-1253" : "ISO-8859-7";
1016     int32_t confidence = match_sbcs(textIn, ngrams_8859_7_el, charMap_8859_7);
1017     results->set(textIn, this, confidence, name, "el");
1018     return (confidence > 0);
1019 }
1020 
~CharsetRecog_8859_8()1021 CharsetRecog_8859_8::~CharsetRecog_8859_8()
1022 {
1023     // nothing to do
1024 }
1025 
getName() const1026 const char *CharsetRecog_8859_8::getName() const
1027 {
1028     return "ISO-8859-8";
1029 }
1030 
~CharsetRecog_8859_8_I_he()1031 CharsetRecog_8859_8_I_he::~CharsetRecog_8859_8_I_he ()
1032 {
1033     // nothing to do
1034 }
1035 
getName() const1036 const char *CharsetRecog_8859_8_I_he::getName() const
1037 {
1038     return "ISO-8859-8-I";
1039 }
1040 
getLanguage() const1041 const char *CharsetRecog_8859_8_I_he::getLanguage() const
1042 {
1043     return "he";
1044 }
1045 
match(InputText * textIn,CharsetMatch * results) const1046 UBool CharsetRecog_8859_8_I_he::match(InputText *textIn, CharsetMatch *results) const
1047 {
1048     const char *name = textIn->fC1Bytes? "windows-1255" : "ISO-8859-8-I";
1049     int32_t confidence = match_sbcs(textIn, ngrams_8859_8_I_he, charMap_8859_8);
1050     results->set(textIn, this, confidence, name, "he");
1051     return (confidence > 0);
1052 }
1053 
~CharsetRecog_8859_8_he()1054 CharsetRecog_8859_8_he::~CharsetRecog_8859_8_he()
1055 {
1056     // od ot gnihton
1057 }
1058 
getLanguage() const1059 const char *CharsetRecog_8859_8_he::getLanguage() const
1060 {
1061     return "he";
1062 }
1063 
match(InputText * textIn,CharsetMatch * results) const1064 UBool CharsetRecog_8859_8_he::match(InputText *textIn, CharsetMatch *results) const
1065 {
1066     const char *name = textIn->fC1Bytes? "windows-1255" : "ISO-8859-8";
1067     int32_t confidence = match_sbcs(textIn, ngrams_8859_8_he, charMap_8859_8);
1068     results->set(textIn, this, confidence, name, "he");
1069     return (confidence > 0);
1070 }
1071 
~CharsetRecog_8859_9()1072 CharsetRecog_8859_9::~CharsetRecog_8859_9()
1073 {
1074     // nothing to do
1075 }
1076 
getName() const1077 const char *CharsetRecog_8859_9::getName() const
1078 {
1079     return "ISO-8859-9";
1080 }
1081 
~CharsetRecog_8859_9_tr()1082 CharsetRecog_8859_9_tr::~CharsetRecog_8859_9_tr ()
1083 {
1084     // nothing to do
1085 }
1086 
getLanguage() const1087 const char *CharsetRecog_8859_9_tr::getLanguage() const
1088 {
1089     return "tr";
1090 }
1091 
match(InputText * textIn,CharsetMatch * results) const1092 UBool CharsetRecog_8859_9_tr::match(InputText *textIn, CharsetMatch *results) const
1093 {
1094     const char *name = textIn->fC1Bytes? "windows-1254" : "ISO-8859-9";
1095     int32_t confidence = match_sbcs(textIn, ngrams_8859_9_tr, charMap_8859_9);
1096     results->set(textIn, this, confidence, name, "tr");
1097     return (confidence > 0);
1098 }
1099 
~CharsetRecog_windows_1256()1100 CharsetRecog_windows_1256::~CharsetRecog_windows_1256()
1101 {
1102     // nothing to do
1103 }
1104 
getName() const1105 const char *CharsetRecog_windows_1256::getName() const
1106 {
1107     return  "windows-1256";
1108 }
1109 
getLanguage() const1110 const char *CharsetRecog_windows_1256::getLanguage() const
1111 {
1112     return "ar";
1113 }
1114 
match(InputText * textIn,CharsetMatch * results) const1115 UBool CharsetRecog_windows_1256::match(InputText *textIn, CharsetMatch *results) const
1116 {
1117     int32_t confidence = match_sbcs(textIn, ngrams_windows_1256, charMap_windows_1256);
1118     results->set(textIn, this, confidence);
1119     return (confidence > 0);
1120 }
1121 
~CharsetRecog_windows_1251()1122 CharsetRecog_windows_1251::~CharsetRecog_windows_1251()
1123 {
1124     // nothing to do
1125 }
1126 
getName() const1127 const char *CharsetRecog_windows_1251::getName() const
1128 {
1129     return  "windows-1251";
1130 }
1131 
getLanguage() const1132 const char *CharsetRecog_windows_1251::getLanguage() const
1133 {
1134     return "ru";
1135 }
1136 
match(InputText * textIn,CharsetMatch * results) const1137 UBool CharsetRecog_windows_1251::match(InputText *textIn, CharsetMatch *results) const
1138 {
1139     int32_t confidence = match_sbcs(textIn, ngrams_windows_1251, charMap_windows_1251);
1140     results->set(textIn, this, confidence);
1141     return (confidence > 0);
1142 }
1143 
~CharsetRecog_KOI8_R()1144 CharsetRecog_KOI8_R::~CharsetRecog_KOI8_R()
1145 {
1146     // nothing to do
1147 }
1148 
getName() const1149 const char *CharsetRecog_KOI8_R::getName() const
1150 {
1151     return  "KOI8-R";
1152 }
1153 
getLanguage() const1154 const char *CharsetRecog_KOI8_R::getLanguage() const
1155 {
1156     return "ru";
1157 }
1158 
match(InputText * textIn,CharsetMatch * results) const1159 UBool CharsetRecog_KOI8_R::match(InputText *textIn, CharsetMatch *results) const
1160 {
1161     int32_t confidence = match_sbcs(textIn, ngrams_KOI8_R, charMap_KOI8_R);
1162     results->set(textIn, this, confidence);
1163     return (confidence > 0);
1164 }
1165 
1166 #if !UCONFIG_ONLY_HTML_CONVERSION
~CharsetRecog_IBM424_he()1167 CharsetRecog_IBM424_he::~CharsetRecog_IBM424_he()
1168 {
1169     // nothing to do
1170 }
1171 
getLanguage() const1172 const char *CharsetRecog_IBM424_he::getLanguage() const
1173 {
1174     return "he";
1175 }
1176 
~CharsetRecog_IBM424_he_rtl()1177 CharsetRecog_IBM424_he_rtl::~CharsetRecog_IBM424_he_rtl()
1178 {
1179     // nothing to do
1180 }
1181 
getName() const1182 const char *CharsetRecog_IBM424_he_rtl::getName() const
1183 {
1184     return  "IBM424_rtl";
1185 }
1186 
match(InputText * textIn,CharsetMatch * results) const1187 UBool CharsetRecog_IBM424_he_rtl::match(InputText *textIn, CharsetMatch *results) const
1188 {
1189     int32_t confidence = match_sbcs(textIn, ngrams_IBM424_he_rtl, charMap_IBM424_he);
1190     results->set(textIn, this, confidence);
1191     return (confidence > 0);
1192 }
1193 
~CharsetRecog_IBM424_he_ltr()1194 CharsetRecog_IBM424_he_ltr::~CharsetRecog_IBM424_he_ltr()
1195 {
1196     // nothing to do
1197 }
1198 
getName() const1199 const char *CharsetRecog_IBM424_he_ltr::getName() const
1200 {
1201     return  "IBM424_ltr";
1202 }
1203 
match(InputText * textIn,CharsetMatch * results) const1204 UBool CharsetRecog_IBM424_he_ltr::match(InputText *textIn, CharsetMatch *results) const
1205 {
1206     int32_t confidence = match_sbcs(textIn, ngrams_IBM424_he_ltr, charMap_IBM424_he);
1207     results->set(textIn, this, confidence);
1208     return (confidence > 0);
1209 }
1210 
~CharsetRecog_IBM420_ar()1211 CharsetRecog_IBM420_ar::~CharsetRecog_IBM420_ar()
1212 {
1213     // nothing to do
1214 }
1215 
getLanguage() const1216 const char *CharsetRecog_IBM420_ar::getLanguage() const
1217 {
1218     return "ar";
1219 }
1220 
1221 
match_sbcs(InputText * det,const int32_t ngrams[],const uint8_t byteMap[]) const1222 int32_t CharsetRecog_IBM420_ar::match_sbcs(InputText *det, const int32_t ngrams[],  const uint8_t byteMap[]) const
1223 {
1224     NGramParser_IBM420 parser(ngrams, byteMap);
1225     int32_t result;
1226 
1227     result = parser.parse(det);
1228 
1229     return result;
1230 }
1231 
~CharsetRecog_IBM420_ar_rtl()1232 CharsetRecog_IBM420_ar_rtl::~CharsetRecog_IBM420_ar_rtl()
1233 {
1234     // nothing to do
1235 }
1236 
getName() const1237 const char *CharsetRecog_IBM420_ar_rtl::getName() const
1238 {
1239     return  "IBM420_rtl";
1240 }
1241 
match(InputText * textIn,CharsetMatch * results) const1242 UBool CharsetRecog_IBM420_ar_rtl::match(InputText *textIn, CharsetMatch *results) const
1243 {
1244     int32_t confidence = match_sbcs(textIn, ngrams_IBM420_ar_rtl, charMap_IBM420_ar);
1245     results->set(textIn, this, confidence);
1246     return (confidence > 0);
1247 }
1248 
~CharsetRecog_IBM420_ar_ltr()1249 CharsetRecog_IBM420_ar_ltr::~CharsetRecog_IBM420_ar_ltr()
1250 {
1251     // nothing to do
1252 }
1253 
getName() const1254 const char *CharsetRecog_IBM420_ar_ltr::getName() const
1255 {
1256     return  "IBM420_ltr";
1257 }
1258 
match(InputText * textIn,CharsetMatch * results) const1259 UBool CharsetRecog_IBM420_ar_ltr::match(InputText *textIn, CharsetMatch *results) const
1260 {
1261     int32_t confidence = match_sbcs(textIn, ngrams_IBM420_ar_ltr, charMap_IBM420_ar);
1262     results->set(textIn, this, confidence);
1263     return (confidence > 0);
1264 }
1265 #endif
1266 
1267 U_NAMESPACE_END
1268 #endif
1269 
1270