1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 *   Copyright (C) 2000-2016, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 **********************************************************************
8 *   file name:  ucnvisci.c
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2001JUN26
14 *   created by: Ram Viswanadha
15 *
16 *   Date        Name        Description
17 *   24/7/2001   Ram         Added support for EXT character handling
18 */
19 
20 #include "unicode/utypes.h"
21 
22 #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
23 
24 #include "unicode/ucnv.h"
25 #include "unicode/ucnv_cb.h"
26 #include "unicode/utf16.h"
27 #include "cmemory.h"
28 #include "ucnv_bld.h"
29 #include "ucnv_cnv.h"
30 #include "cstring.h"
31 #include "uassert.h"
32 
33 #define UCNV_OPTIONS_VERSION_MASK 0xf
34 #define NUKTA               0x093c
35 #define HALANT              0x094d
36 #define ZWNJ                0x200c /* Zero Width Non Joiner */
37 #define ZWJ                 0x200d /* Zero width Joiner */
38 #define INVALID_CHAR        0xffff
39 #define ATR                 0xEF   /* Attribute code */
40 #define EXT                 0xF0   /* Extension code */
41 #define DANDA               0x0964
42 #define DOUBLE_DANDA        0x0965
43 #define ISCII_NUKTA         0xE9
44 #define ISCII_HALANT        0xE8
45 #define ISCII_DANDA         0xEA
46 #define ISCII_INV           0xD9
47 #define ISCII_VOWEL_SIGN_E  0xE0
48 #define INDIC_BLOCK_BEGIN   0x0900
49 #define INDIC_BLOCK_END     0x0D7F
50 #define INDIC_RANGE         (INDIC_BLOCK_END - INDIC_BLOCK_BEGIN)
51 #define VOCALLIC_RR         0x0931
52 #define LF                  0x0A
53 #define ASCII_END           0xA0
54 #define NO_CHAR_MARKER      0xFFFE
55 #define TELUGU_DELTA        DELTA * TELUGU
56 #define DEV_ABBR_SIGN       0x0970
57 #define DEV_ANUDATTA        0x0952
58 #define EXT_RANGE_BEGIN     0xA1
59 #define EXT_RANGE_END       0xEE
60 
61 #define PNJ_DELTA           0x0100
62 #define PNJ_BINDI           0x0A02
63 #define PNJ_TIPPI           0x0A70
64 #define PNJ_SIGN_VIRAMA     0x0A4D
65 #define PNJ_ADHAK           0x0A71
66 #define PNJ_HA              0x0A39
67 #define PNJ_RRA             0x0A5C
68 
69 typedef enum {
70     DEVANAGARI =0,
71     BENGALI,
72     GURMUKHI,
73     GUJARATI,
74     ORIYA,
75     TAMIL,
76     TELUGU,
77     KANNADA,
78     MALAYALAM,
79     DELTA=0x80
80 }UniLang;
81 
82 /**
83  * Enumeration for switching code pages if <ATR>+<one of below values>
84  * is encountered
85  */
86 typedef enum {
87     DEF = 0x40,
88     RMN = 0x41,
89     DEV = 0x42,
90     BNG = 0x43,
91     TML = 0x44,
92     TLG = 0x45,
93     ASM = 0x46,
94     ORI = 0x47,
95     KND = 0x48,
96     MLM = 0x49,
97     GJR = 0x4A,
98     PNJ = 0x4B,
99     ARB = 0x71,
100     PES = 0x72,
101     URD = 0x73,
102     SND = 0x74,
103     KSM = 0x75,
104     PST = 0x76
105 }ISCIILang;
106 
107 typedef enum {
108     DEV_MASK =0x80,
109     PNJ_MASK =0x40,
110     GJR_MASK =0x20,
111     ORI_MASK =0x10,
112     BNG_MASK =0x08,
113     KND_MASK =0x04,
114     MLM_MASK =0x02,
115     TML_MASK =0x01,
116     ZERO =0x00
117 }MaskEnum;
118 
119 #define ISCII_CNV_PREFIX "ISCII,version="
120 
121 typedef struct {
122     UChar contextCharToUnicode;         /* previous Unicode codepoint for contextual analysis */
123     UChar contextCharFromUnicode;       /* previous Unicode codepoint for contextual analysis */
124     uint16_t defDeltaToUnicode;         /* delta for switching to default state when DEF is encountered  */
125     uint16_t currentDeltaFromUnicode;   /* current delta in Indic block */
126     uint16_t currentDeltaToUnicode;     /* current delta in Indic block */
127     MaskEnum currentMaskFromUnicode;    /* mask for current state in toUnicode */
128     MaskEnum currentMaskToUnicode;      /* mask for current state in toUnicode */
129     MaskEnum defMaskToUnicode;          /* mask for default state in toUnicode */
130     UBool isFirstBuffer;                /* boolean for fromUnicode to see if we need to announce the first script */
131     UBool resetToDefaultToUnicode;      /* boolean for reseting to default delta and mask when a newline is encountered*/
132     char name[sizeof(ISCII_CNV_PREFIX) + 1];
133     UChar32 prevToUnicodeStatus;        /* Hold the previous toUnicodeStatus. This is necessary because we may need to know the last two code points. */
134 } UConverterDataISCII;
135 
136 typedef struct LookupDataStruct {
137     UniLang uniLang;
138     MaskEnum maskEnum;
139     ISCIILang isciiLang;
140 } LookupDataStruct;
141 
142 static const LookupDataStruct lookupInitialData[]={
143     { DEVANAGARI, DEV_MASK,  DEV },
144     { BENGALI,    BNG_MASK,  BNG },
145     { GURMUKHI,   PNJ_MASK,  PNJ },
146     { GUJARATI,   GJR_MASK,  GJR },
147     { ORIYA,      ORI_MASK,  ORI },
148     { TAMIL,      TML_MASK,  TML },
149     { TELUGU,     KND_MASK,  TLG },
150     { KANNADA,    KND_MASK,  KND },
151     { MALAYALAM,  MLM_MASK,  MLM }
152 };
153 
154 /*
155  * For special handling of certain Gurmukhi characters.
156  * Bit 0 (value 1): PNJ consonant
157  * Bit 1 (value 2): PNJ Bindi Tippi
158  */
159 static const uint8_t pnjMap[80] = {
160     /* 0A00..0A0F */
161     0, 0, 0, 0, 0, 2, 0, 2,  0, 0, 0, 0, 0, 0, 0, 0,
162     /* 0A10..0A1F */
163     0, 0, 0, 0, 0, 3, 3, 3,  3, 3, 3, 3, 3, 3, 3, 3,
164     /* 0A20..0A2F */
165     3, 3, 3, 3, 3, 3, 3, 3,  3, 0, 3, 3, 3, 3, 3, 3,
166     /* 0A30..0A3F */
167     3, 0, 0, 0, 0, 3, 3, 0,  3, 3, 0, 0, 0, 0, 0, 2,
168     /* 0A40..0A4F */
169     0, 2, 2, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0
170 };
171 
172 static UBool
isPNJConsonant(UChar32 c)173 isPNJConsonant(UChar32 c) {
174     if (c < 0xa00 || 0xa50 <= c) {
175         return FALSE;
176     } else {
177         return (UBool)(pnjMap[c - 0xa00] & 1);
178     }
179 }
180 
181 static UBool
isPNJBindiTippi(UChar32 c)182 isPNJBindiTippi(UChar32 c) {
183     if (c < 0xa00 || 0xa50 <= c) {
184         return FALSE;
185     } else {
186         return (UBool)(pnjMap[c - 0xa00] >> 1);
187     }
188 }
189 
_ISCIIOpen(UConverter * cnv,UConverterLoadArgs * pArgs,UErrorCode * errorCode)190 static void _ISCIIOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode) {
191     if(pArgs->onlyTestIsLoadable) {
192         return;
193     }
194 
195     cnv->extraInfo = uprv_malloc(sizeof(UConverterDataISCII));
196 
197     if (cnv->extraInfo != NULL) {
198         int32_t len=0;
199         UConverterDataISCII *converterData=
200                 (UConverterDataISCII *) cnv->extraInfo;
201         converterData->contextCharToUnicode=NO_CHAR_MARKER;
202         cnv->toUnicodeStatus = missingCharMarker;
203         converterData->contextCharFromUnicode=0x0000;
204         converterData->resetToDefaultToUnicode=FALSE;
205         /* check if the version requested is supported */
206         if ((pArgs->options & UCNV_OPTIONS_VERSION_MASK) < 9) {
207             /* initialize state variables */
208             converterData->currentDeltaFromUnicode
209                     = converterData->currentDeltaToUnicode
210                             = converterData->defDeltaToUnicode = (uint16_t)(lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].uniLang * DELTA);
211 
212             converterData->currentMaskFromUnicode
213                     = converterData->currentMaskToUnicode
214                             = converterData->defMaskToUnicode = lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].maskEnum;
215 
216             converterData->isFirstBuffer=TRUE;
217             (void)uprv_strcpy(converterData->name, ISCII_CNV_PREFIX);
218             len = (int32_t)uprv_strlen(converterData->name);
219             converterData->name[len]= (char)((pArgs->options & UCNV_OPTIONS_VERSION_MASK) + '0');
220             converterData->name[len+1]=0;
221 
222             converterData->prevToUnicodeStatus = 0x0000;
223         } else {
224             uprv_free(cnv->extraInfo);
225             cnv->extraInfo = NULL;
226             *errorCode = U_ILLEGAL_ARGUMENT_ERROR;
227         }
228 
229     } else {
230         *errorCode =U_MEMORY_ALLOCATION_ERROR;
231     }
232 }
233 
_ISCIIClose(UConverter * cnv)234 static void _ISCIIClose(UConverter *cnv) {
235     if (cnv->extraInfo!=NULL) {
236         if (!cnv->isExtraLocal) {
237             uprv_free(cnv->extraInfo);
238         }
239         cnv->extraInfo=NULL;
240     }
241 }
242 
_ISCIIgetName(const UConverter * cnv)243 static const char* _ISCIIgetName(const UConverter* cnv) {
244     if (cnv->extraInfo) {
245         UConverterDataISCII* myData= (UConverterDataISCII*)cnv->extraInfo;
246         return myData->name;
247     }
248     return NULL;
249 }
250 
_ISCIIReset(UConverter * cnv,UConverterResetChoice choice)251 static void _ISCIIReset(UConverter *cnv, UConverterResetChoice choice) {
252     UConverterDataISCII* data =(UConverterDataISCII *) (cnv->extraInfo);
253     if (choice<=UCNV_RESET_TO_UNICODE) {
254         cnv->toUnicodeStatus = missingCharMarker;
255         cnv->mode=0;
256         data->currentDeltaToUnicode=data->defDeltaToUnicode;
257         data->currentMaskToUnicode = data->defMaskToUnicode;
258         data->contextCharToUnicode=NO_CHAR_MARKER;
259         data->prevToUnicodeStatus = 0x0000;
260     }
261     if (choice!=UCNV_RESET_TO_UNICODE) {
262         cnv->fromUChar32=0x0000;
263         data->contextCharFromUnicode=0x00;
264         data->currentMaskFromUnicode=data->defMaskToUnicode;
265         data->currentDeltaFromUnicode=data->defDeltaToUnicode;
266         data->isFirstBuffer=TRUE;
267         data->resetToDefaultToUnicode=FALSE;
268     }
269 }
270 
271 /**
272  * The values in validity table are indexed by the lower bits of Unicode
273  * range 0x0900 - 0x09ff. The values have a structure like:
274  *       ---------------------------------------------------------------
275  *      | DEV   | PNJ   | GJR   | ORI   | BNG   | TLG   | MLM   | TML   |
276  *      |       |       |       |       | ASM   | KND   |       |       |
277  *       ---------------------------------------------------------------
278  * If a code point is valid in a particular script
279  * then that bit is turned on
280  *
281  * Unicode does not distinguish between Bengali and Assamese so we use 1 bit for
282  * to represent these languages
283  *
284  * Telugu and Kannada have same codepoints except for Vocallic_RR which we special case
285  * and combine and use 1 bit to represent these languages.
286  *
287  * TODO: It is probably easier to understand and maintain to change this
288  * to use uint16_t and give each of the 9 Unicode/script blocks its own bit.
289  */
290 
291 static const uint8_t validityTable[128] = {
292 /* This state table is tool generated please do not edit unless you know exactly what you are doing */
293 /* Note: This table was edited to mirror the Windows XP implementation */
294 /*ISCII:Valid:Unicode */
295 /*0xa0 : 0x00: 0x900  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
296 /*0xa1 : 0xb8: 0x901  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
297 /*0xa2 : 0xfe: 0x902  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
298 /*0xa3 : 0xbf: 0x903  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
299 /*0x00 : 0x00: 0x904  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
300 /*0xa4 : 0xff: 0x905  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
301 /*0xa5 : 0xff: 0x906  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
302 /*0xa6 : 0xff: 0x907  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
303 /*0xa7 : 0xff: 0x908  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
304 /*0xa8 : 0xff: 0x909  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
305 /*0xa9 : 0xff: 0x90a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
306 /*0xaa : 0xfe: 0x90b  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
307 /*0x00 : 0x00: 0x90c  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
308 /*0xae : 0x80: 0x90d  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
309 /*0xab : 0x87: 0x90e  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
310 /*0xac : 0xff: 0x90f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
311 /*0xad : 0xff: 0x910  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
312 /*0xb2 : 0x80: 0x911  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
313 /*0xaf : 0x87: 0x912  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
314 /*0xb0 : 0xff: 0x913  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
315 /*0xb1 : 0xff: 0x914  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
316 /*0xb3 : 0xff: 0x915  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
317 /*0xb4 : 0xfe: 0x916  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
318 /*0xb5 : 0xfe: 0x917  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
319 /*0xb6 : 0xfe: 0x918  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
320 /*0xb7 : 0xff: 0x919  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
321 /*0xb8 : 0xff: 0x91a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
322 /*0xb9 : 0xfe: 0x91b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
323 /*0xba : 0xff: 0x91c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
324 /*0xbb : 0xfe: 0x91d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
325 /*0xbc : 0xff: 0x91e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
326 /*0xbd : 0xff: 0x91f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
327 /*0xbe : 0xfe: 0x920  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
328 /*0xbf : 0xfe: 0x921  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
329 /*0xc0 : 0xfe: 0x922  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
330 /*0xc1 : 0xff: 0x923  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
331 /*0xc2 : 0xff: 0x924  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
332 /*0xc3 : 0xfe: 0x925  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
333 /*0xc4 : 0xfe: 0x926  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
334 /*0xc5 : 0xfe: 0x927  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
335 /*0xc6 : 0xff: 0x928  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
336 /*0xc7 : 0x81: 0x929  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + TML_MASK ,
337 /*0xc8 : 0xff: 0x92a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
338 /*0xc9 : 0xfe: 0x92b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
339 /*0xca : 0xfe: 0x92c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
340 /*0xcb : 0xfe: 0x92d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
341 /*0xcc : 0xfe: 0x92e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
342 /*0xcd : 0xff: 0x92f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
343 /*0xcf : 0xff: 0x930  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
344 /*0xd0 : 0x87: 0x931  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + MLM_MASK + TML_MASK ,
345 /*0xd1 : 0xff: 0x932  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
346 /*0xd2 : 0xb7: 0x933  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
347 /*0xd3 : 0x83: 0x934  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + MLM_MASK + TML_MASK ,
348 /*0xd4 : 0xff: 0x935  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
349 /*0xd5 : 0xfe: 0x936  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
350 /*0xd6 : 0xbf: 0x937  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
351 /*0xd7 : 0xff: 0x938  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
352 /*0xd8 : 0xff: 0x939  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
353 /*0x00 : 0x00: 0x93A  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
354 /*0x00 : 0x00: 0x93B  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
355 /*0xe9 : 0xda: 0x93c  */ DEV_MASK + PNJ_MASK + ZERO     + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
356 /*0x00 : 0x00: 0x93d  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
357 /*0xda : 0xff: 0x93e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
358 /*0xdb : 0xff: 0x93f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
359 /*0xdc : 0xff: 0x940  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
360 /*0xdd : 0xff: 0x941  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
361 /*0xde : 0xff: 0x942  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
362 /*0xdf : 0xbe: 0x943  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
363 /*0x00 : 0x00: 0x944  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + BNG_MASK + KND_MASK + ZERO     + ZERO     ,
364 /*0xe3 : 0x80: 0x945  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
365 /*0xe0 : 0x87: 0x946  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
366 /*0xe1 : 0xff: 0x947  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
367 /*0xe2 : 0xff: 0x948  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
368 /*0xe7 : 0x80: 0x949  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
369 /*0xe4 : 0x87: 0x94a  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + MLM_MASK + TML_MASK ,
370 /*0xe5 : 0xff: 0x94b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
371 /*0xe6 : 0xff: 0x94c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
372 /*0xe8 : 0xff: 0x94d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
373 /*0xec : 0x00: 0x94e  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
374 /*0xed : 0x00: 0x94f  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
375 /*0x00 : 0x00: 0x950  */ DEV_MASK + ZERO     + GJR_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
376 /*0x00 : 0x00: 0x951  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
377 /*0x00 : 0x00: 0x952  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
378 /*0x00 : 0x00: 0x953  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
379 /*0x00 : 0x00: 0x954  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
380 /*0x00 : 0x00: 0x955  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + KND_MASK + ZERO     + ZERO     ,
381 /*0x00 : 0x00: 0x956  */ ZERO     + ZERO     + ZERO     + ORI_MASK + ZERO     + KND_MASK + ZERO     + ZERO     ,
382 /*0x00 : 0x00: 0x957  */ ZERO     + ZERO     + ZERO     + ORI_MASK + BNG_MASK + ZERO     + MLM_MASK + ZERO     ,
383 /*0x00 : 0x00: 0x958  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
384 /*0x00 : 0x00: 0x959  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
385 /*0x00 : 0x00: 0x95a  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
386 /*0x00 : 0x00: 0x95b  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
387 /*0x00 : 0x00: 0x95c  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + BNG_MASK + ZERO     + ZERO     + ZERO     ,
388 /*0x00 : 0x00: 0x95d  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
389 /*0x00 : 0x00: 0x95e  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
390 /*0xce : 0x98: 0x95f  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + ZERO     + ZERO     + ZERO     ,
391 /*0x00 : 0x00: 0x960  */ DEV_MASK + ZERO     + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
392 /*0x00 : 0x00: 0x961  */ DEV_MASK + ZERO     + ZERO     + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO     ,
393 /*0x00 : 0x00: 0x962  */ DEV_MASK + ZERO     + ZERO     + ZERO     + BNG_MASK + ZERO     + ZERO     + ZERO     ,
394 /*0x00 : 0x00: 0x963  */ DEV_MASK + ZERO     + ZERO     + ZERO     + BNG_MASK + ZERO     + ZERO     + ZERO     ,
395 /*0xea : 0xf8: 0x964  */ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
396 /*0xeaea : 0x00: 0x965*/ DEV_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
397 /*0xf1 : 0xff: 0x966  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
398 /*0xf2 : 0xff: 0x967  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
399 /*0xf3 : 0xff: 0x968  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
400 /*0xf4 : 0xff: 0x969  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
401 /*0xf5 : 0xff: 0x96a  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
402 /*0xf6 : 0xff: 0x96b  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
403 /*0xf7 : 0xff: 0x96c  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
404 /*0xf8 : 0xff: 0x96d  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
405 /*0xf9 : 0xff: 0x96e  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
406 /*0xfa : 0xff: 0x96f  */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK ,
407 /*0x00 : 0x80: 0x970  */ DEV_MASK + PNJ_MASK + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     ,
408 /*
409  * The length of the array is 128 to provide values for 0x900..0x97f.
410  * The last 15 entries for 0x971..0x97f of the validity table are all zero
411  * because no Indic script uses such Unicode code points.
412  */
413 /*0x00 : 0x00: 0x9yz  */ ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO     + ZERO
414 };
415 
416 static const uint16_t fromUnicodeTable[128]={
417     0x00a0 ,/* 0x0900 */
418     0x00a1 ,/* 0x0901 */
419     0x00a2 ,/* 0x0902 */
420     0x00a3 ,/* 0x0903 */
421     0xa4e0 ,/* 0x0904 */
422     0x00a4 ,/* 0x0905 */
423     0x00a5 ,/* 0x0906 */
424     0x00a6 ,/* 0x0907 */
425     0x00a7 ,/* 0x0908 */
426     0x00a8 ,/* 0x0909 */
427     0x00a9 ,/* 0x090a */
428     0x00aa ,/* 0x090b */
429     0xA6E9 ,/* 0x090c */
430     0x00ae ,/* 0x090d */
431     0x00ab ,/* 0x090e */
432     0x00ac ,/* 0x090f */
433     0x00ad ,/* 0x0910 */
434     0x00b2 ,/* 0x0911 */
435     0x00af ,/* 0x0912 */
436     0x00b0 ,/* 0x0913 */
437     0x00b1 ,/* 0x0914 */
438     0x00b3 ,/* 0x0915 */
439     0x00b4 ,/* 0x0916 */
440     0x00b5 ,/* 0x0917 */
441     0x00b6 ,/* 0x0918 */
442     0x00b7 ,/* 0x0919 */
443     0x00b8 ,/* 0x091a */
444     0x00b9 ,/* 0x091b */
445     0x00ba ,/* 0x091c */
446     0x00bb ,/* 0x091d */
447     0x00bc ,/* 0x091e */
448     0x00bd ,/* 0x091f */
449     0x00be ,/* 0x0920 */
450     0x00bf ,/* 0x0921 */
451     0x00c0 ,/* 0x0922 */
452     0x00c1 ,/* 0x0923 */
453     0x00c2 ,/* 0x0924 */
454     0x00c3 ,/* 0x0925 */
455     0x00c4 ,/* 0x0926 */
456     0x00c5 ,/* 0x0927 */
457     0x00c6 ,/* 0x0928 */
458     0x00c7 ,/* 0x0929 */
459     0x00c8 ,/* 0x092a */
460     0x00c9 ,/* 0x092b */
461     0x00ca ,/* 0x092c */
462     0x00cb ,/* 0x092d */
463     0x00cc ,/* 0x092e */
464     0x00cd ,/* 0x092f */
465     0x00cf ,/* 0x0930 */
466     0x00d0 ,/* 0x0931 */
467     0x00d1 ,/* 0x0932 */
468     0x00d2 ,/* 0x0933 */
469     0x00d3 ,/* 0x0934 */
470     0x00d4 ,/* 0x0935 */
471     0x00d5 ,/* 0x0936 */
472     0x00d6 ,/* 0x0937 */
473     0x00d7 ,/* 0x0938 */
474     0x00d8 ,/* 0x0939 */
475     0xFFFF ,/* 0x093A */
476     0xFFFF ,/* 0x093B */
477     0x00e9 ,/* 0x093c */
478     0xEAE9 ,/* 0x093d */
479     0x00da ,/* 0x093e */
480     0x00db ,/* 0x093f */
481     0x00dc ,/* 0x0940 */
482     0x00dd ,/* 0x0941 */
483     0x00de ,/* 0x0942 */
484     0x00df ,/* 0x0943 */
485     0xDFE9 ,/* 0x0944 */
486     0x00e3 ,/* 0x0945 */
487     0x00e0 ,/* 0x0946 */
488     0x00e1 ,/* 0x0947 */
489     0x00e2 ,/* 0x0948 */
490     0x00e7 ,/* 0x0949 */
491     0x00e4 ,/* 0x094a */
492     0x00e5 ,/* 0x094b */
493     0x00e6 ,/* 0x094c */
494     0x00e8 ,/* 0x094d */
495     0x00ec ,/* 0x094e */
496     0x00ed ,/* 0x094f */
497     0xA1E9 ,/* 0x0950 */ /* OM Symbol */
498     0xFFFF ,/* 0x0951 */
499     0xF0B8 ,/* 0x0952 */
500     0xFFFF ,/* 0x0953 */
501     0xFFFF ,/* 0x0954 */
502     0xFFFF ,/* 0x0955 */
503     0xFFFF ,/* 0x0956 */
504     0xFFFF ,/* 0x0957 */
505     0xb3e9 ,/* 0x0958 */
506     0xb4e9 ,/* 0x0959 */
507     0xb5e9 ,/* 0x095a */
508     0xbae9 ,/* 0x095b */
509     0xbfe9 ,/* 0x095c */
510     0xC0E9 ,/* 0x095d */
511     0xc9e9 ,/* 0x095e */
512     0x00ce ,/* 0x095f */
513     0xAAe9 ,/* 0x0960 */
514     0xA7E9 ,/* 0x0961 */
515     0xDBE9 ,/* 0x0962 */
516     0xDCE9 ,/* 0x0963 */
517     0x00ea ,/* 0x0964 */
518     0xeaea ,/* 0x0965 */
519     0x00f1 ,/* 0x0966 */
520     0x00f2 ,/* 0x0967 */
521     0x00f3 ,/* 0x0968 */
522     0x00f4 ,/* 0x0969 */
523     0x00f5 ,/* 0x096a */
524     0x00f6 ,/* 0x096b */
525     0x00f7 ,/* 0x096c */
526     0x00f8 ,/* 0x096d */
527     0x00f9 ,/* 0x096e */
528     0x00fa ,/* 0x096f */
529     0xF0BF ,/* 0x0970 */
530     0xFFFF ,/* 0x0971 */
531     0xFFFF ,/* 0x0972 */
532     0xFFFF ,/* 0x0973 */
533     0xFFFF ,/* 0x0974 */
534     0xFFFF ,/* 0x0975 */
535     0xFFFF ,/* 0x0976 */
536     0xFFFF ,/* 0x0977 */
537     0xFFFF ,/* 0x0978 */
538     0xFFFF ,/* 0x0979 */
539     0xFFFF ,/* 0x097a */
540     0xFFFF ,/* 0x097b */
541     0xFFFF ,/* 0x097c */
542     0xFFFF ,/* 0x097d */
543     0xFFFF ,/* 0x097e */
544     0xFFFF ,/* 0x097f */
545 };
546 static const uint16_t toUnicodeTable[256]={
547     0x0000,/* 0x00 */
548     0x0001,/* 0x01 */
549     0x0002,/* 0x02 */
550     0x0003,/* 0x03 */
551     0x0004,/* 0x04 */
552     0x0005,/* 0x05 */
553     0x0006,/* 0x06 */
554     0x0007,/* 0x07 */
555     0x0008,/* 0x08 */
556     0x0009,/* 0x09 */
557     0x000a,/* 0x0a */
558     0x000b,/* 0x0b */
559     0x000c,/* 0x0c */
560     0x000d,/* 0x0d */
561     0x000e,/* 0x0e */
562     0x000f,/* 0x0f */
563     0x0010,/* 0x10 */
564     0x0011,/* 0x11 */
565     0x0012,/* 0x12 */
566     0x0013,/* 0x13 */
567     0x0014,/* 0x14 */
568     0x0015,/* 0x15 */
569     0x0016,/* 0x16 */
570     0x0017,/* 0x17 */
571     0x0018,/* 0x18 */
572     0x0019,/* 0x19 */
573     0x001a,/* 0x1a */
574     0x001b,/* 0x1b */
575     0x001c,/* 0x1c */
576     0x001d,/* 0x1d */
577     0x001e,/* 0x1e */
578     0x001f,/* 0x1f */
579     0x0020,/* 0x20 */
580     0x0021,/* 0x21 */
581     0x0022,/* 0x22 */
582     0x0023,/* 0x23 */
583     0x0024,/* 0x24 */
584     0x0025,/* 0x25 */
585     0x0026,/* 0x26 */
586     0x0027,/* 0x27 */
587     0x0028,/* 0x28 */
588     0x0029,/* 0x29 */
589     0x002a,/* 0x2a */
590     0x002b,/* 0x2b */
591     0x002c,/* 0x2c */
592     0x002d,/* 0x2d */
593     0x002e,/* 0x2e */
594     0x002f,/* 0x2f */
595     0x0030,/* 0x30 */
596     0x0031,/* 0x31 */
597     0x0032,/* 0x32 */
598     0x0033,/* 0x33 */
599     0x0034,/* 0x34 */
600     0x0035,/* 0x35 */
601     0x0036,/* 0x36 */
602     0x0037,/* 0x37 */
603     0x0038,/* 0x38 */
604     0x0039,/* 0x39 */
605     0x003A,/* 0x3A */
606     0x003B,/* 0x3B */
607     0x003c,/* 0x3c */
608     0x003d,/* 0x3d */
609     0x003e,/* 0x3e */
610     0x003f,/* 0x3f */
611     0x0040,/* 0x40 */
612     0x0041,/* 0x41 */
613     0x0042,/* 0x42 */
614     0x0043,/* 0x43 */
615     0x0044,/* 0x44 */
616     0x0045,/* 0x45 */
617     0x0046,/* 0x46 */
618     0x0047,/* 0x47 */
619     0x0048,/* 0x48 */
620     0x0049,/* 0x49 */
621     0x004a,/* 0x4a */
622     0x004b,/* 0x4b */
623     0x004c,/* 0x4c */
624     0x004d,/* 0x4d */
625     0x004e,/* 0x4e */
626     0x004f,/* 0x4f */
627     0x0050,/* 0x50 */
628     0x0051,/* 0x51 */
629     0x0052,/* 0x52 */
630     0x0053,/* 0x53 */
631     0x0054,/* 0x54 */
632     0x0055,/* 0x55 */
633     0x0056,/* 0x56 */
634     0x0057,/* 0x57 */
635     0x0058,/* 0x58 */
636     0x0059,/* 0x59 */
637     0x005a,/* 0x5a */
638     0x005b,/* 0x5b */
639     0x005c,/* 0x5c */
640     0x005d,/* 0x5d */
641     0x005e,/* 0x5e */
642     0x005f,/* 0x5f */
643     0x0060,/* 0x60 */
644     0x0061,/* 0x61 */
645     0x0062,/* 0x62 */
646     0x0063,/* 0x63 */
647     0x0064,/* 0x64 */
648     0x0065,/* 0x65 */
649     0x0066,/* 0x66 */
650     0x0067,/* 0x67 */
651     0x0068,/* 0x68 */
652     0x0069,/* 0x69 */
653     0x006a,/* 0x6a */
654     0x006b,/* 0x6b */
655     0x006c,/* 0x6c */
656     0x006d,/* 0x6d */
657     0x006e,/* 0x6e */
658     0x006f,/* 0x6f */
659     0x0070,/* 0x70 */
660     0x0071,/* 0x71 */
661     0x0072,/* 0x72 */
662     0x0073,/* 0x73 */
663     0x0074,/* 0x74 */
664     0x0075,/* 0x75 */
665     0x0076,/* 0x76 */
666     0x0077,/* 0x77 */
667     0x0078,/* 0x78 */
668     0x0079,/* 0x79 */
669     0x007a,/* 0x7a */
670     0x007b,/* 0x7b */
671     0x007c,/* 0x7c */
672     0x007d,/* 0x7d */
673     0x007e,/* 0x7e */
674     0x007f,/* 0x7f */
675     0x0080,/* 0x80 */
676     0x0081,/* 0x81 */
677     0x0082,/* 0x82 */
678     0x0083,/* 0x83 */
679     0x0084,/* 0x84 */
680     0x0085,/* 0x85 */
681     0x0086,/* 0x86 */
682     0x0087,/* 0x87 */
683     0x0088,/* 0x88 */
684     0x0089,/* 0x89 */
685     0x008a,/* 0x8a */
686     0x008b,/* 0x8b */
687     0x008c,/* 0x8c */
688     0x008d,/* 0x8d */
689     0x008e,/* 0x8e */
690     0x008f,/* 0x8f */
691     0x0090,/* 0x90 */
692     0x0091,/* 0x91 */
693     0x0092,/* 0x92 */
694     0x0093,/* 0x93 */
695     0x0094,/* 0x94 */
696     0x0095,/* 0x95 */
697     0x0096,/* 0x96 */
698     0x0097,/* 0x97 */
699     0x0098,/* 0x98 */
700     0x0099,/* 0x99 */
701     0x009a,/* 0x9a */
702     0x009b,/* 0x9b */
703     0x009c,/* 0x9c */
704     0x009d,/* 0x9d */
705     0x009e,/* 0x9e */
706     0x009f,/* 0x9f */
707     0x00A0,/* 0xa0 */
708     0x0901,/* 0xa1 */
709     0x0902,/* 0xa2 */
710     0x0903,/* 0xa3 */
711     0x0905,/* 0xa4 */
712     0x0906,/* 0xa5 */
713     0x0907,/* 0xa6 */
714     0x0908,/* 0xa7 */
715     0x0909,/* 0xa8 */
716     0x090a,/* 0xa9 */
717     0x090b,/* 0xaa */
718     0x090e,/* 0xab */
719     0x090f,/* 0xac */
720     0x0910,/* 0xad */
721     0x090d,/* 0xae */
722     0x0912,/* 0xaf */
723     0x0913,/* 0xb0 */
724     0x0914,/* 0xb1 */
725     0x0911,/* 0xb2 */
726     0x0915,/* 0xb3 */
727     0x0916,/* 0xb4 */
728     0x0917,/* 0xb5 */
729     0x0918,/* 0xb6 */
730     0x0919,/* 0xb7 */
731     0x091a,/* 0xb8 */
732     0x091b,/* 0xb9 */
733     0x091c,/* 0xba */
734     0x091d,/* 0xbb */
735     0x091e,/* 0xbc */
736     0x091f,/* 0xbd */
737     0x0920,/* 0xbe */
738     0x0921,/* 0xbf */
739     0x0922,/* 0xc0 */
740     0x0923,/* 0xc1 */
741     0x0924,/* 0xc2 */
742     0x0925,/* 0xc3 */
743     0x0926,/* 0xc4 */
744     0x0927,/* 0xc5 */
745     0x0928,/* 0xc6 */
746     0x0929,/* 0xc7 */
747     0x092a,/* 0xc8 */
748     0x092b,/* 0xc9 */
749     0x092c,/* 0xca */
750     0x092d,/* 0xcb */
751     0x092e,/* 0xcc */
752     0x092f,/* 0xcd */
753     0x095f,/* 0xce */
754     0x0930,/* 0xcf */
755     0x0931,/* 0xd0 */
756     0x0932,/* 0xd1 */
757     0x0933,/* 0xd2 */
758     0x0934,/* 0xd3 */
759     0x0935,/* 0xd4 */
760     0x0936,/* 0xd5 */
761     0x0937,/* 0xd6 */
762     0x0938,/* 0xd7 */
763     0x0939,/* 0xd8 */
764     0x200D,/* 0xd9 */
765     0x093e,/* 0xda */
766     0x093f,/* 0xdb */
767     0x0940,/* 0xdc */
768     0x0941,/* 0xdd */
769     0x0942,/* 0xde */
770     0x0943,/* 0xdf */
771     0x0946,/* 0xe0 */
772     0x0947,/* 0xe1 */
773     0x0948,/* 0xe2 */
774     0x0945,/* 0xe3 */
775     0x094a,/* 0xe4 */
776     0x094b,/* 0xe5 */
777     0x094c,/* 0xe6 */
778     0x0949,/* 0xe7 */
779     0x094d,/* 0xe8 */
780     0x093c,/* 0xe9 */
781     0x0964,/* 0xea */
782     0xFFFF,/* 0xeb */
783     0xFFFF,/* 0xec */
784     0xFFFF,/* 0xed */
785     0xFFFF,/* 0xee */
786     0xFFFF,/* 0xef */
787     0xFFFF,/* 0xf0 */
788     0x0966,/* 0xf1 */
789     0x0967,/* 0xf2 */
790     0x0968,/* 0xf3 */
791     0x0969,/* 0xf4 */
792     0x096a,/* 0xf5 */
793     0x096b,/* 0xf6 */
794     0x096c,/* 0xf7 */
795     0x096d,/* 0xf8 */
796     0x096e,/* 0xf9 */
797     0x096f,/* 0xfa */
798     0xFFFF,/* 0xfb */
799     0xFFFF,/* 0xfc */
800     0xFFFF,/* 0xfd */
801     0xFFFF,/* 0xfe */
802     0xFFFF /* 0xff */
803 };
804 
805 static const uint16_t vowelSignESpecialCases[][2]={
806 	{ 2 /*length of array*/    , 0      },
807 	{ 0xA4 , 0x0904 },
808 };
809 
810 static const uint16_t nuktaSpecialCases[][2]={
811     { 16 /*length of array*/   , 0      },
812     { 0xA6 , 0x090c },
813     { 0xEA , 0x093D },
814     { 0xDF , 0x0944 },
815     { 0xA1 , 0x0950 },
816     { 0xb3 , 0x0958 },
817     { 0xb4 , 0x0959 },
818     { 0xb5 , 0x095a },
819     { 0xba , 0x095b },
820     { 0xbf , 0x095c },
821     { 0xC0 , 0x095d },
822     { 0xc9 , 0x095e },
823     { 0xAA , 0x0960 },
824     { 0xA7 , 0x0961 },
825     { 0xDB , 0x0962 },
826     { 0xDC , 0x0963 },
827 };
828 
829 
830 #define WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err){      \
831     int32_t offset = (int32_t)(source - args->source-1);                                        \
832       /* write the targetUniChar  to target */                                                  \
833     if(target < targetLimit){                                                                   \
834         if(targetByteUnit <= 0xFF){                                                             \
835             *(target)++ = (uint8_t)(targetByteUnit);                                            \
836             if(offsets){                                                                        \
837                 *(offsets++) = offset;                                                          \
838             }                                                                                   \
839         }else{                                                                                  \
840             if (targetByteUnit > 0xFFFF) {                                                      \
841                 *(target)++ = (uint8_t)(targetByteUnit>>16);                                    \
842                 if (offsets) {                                                                  \
843                     --offset;                                                                   \
844                     *(offsets++) = offset;                                                      \
845                 }                                                                               \
846             }                                                                                   \
847             if (!(target < targetLimit)) {                                                      \
848                 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =    \
849                                 (uint8_t)(targetByteUnit >> 8);                                 \
850                 args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =    \
851                                 (uint8_t)targetByteUnit;                                        \
852                 *err = U_BUFFER_OVERFLOW_ERROR;                                                 \
853             } else {                                                                            \
854                 *(target)++ = (uint8_t)(targetByteUnit>>8);                                     \
855                 if(offsets){                                                                    \
856                     *(offsets++) = offset;                                                      \
857                 }                                                                               \
858                 if(target < targetLimit){                                                       \
859                     *(target)++ = (uint8_t)  targetByteUnit;                                    \
860                     if(offsets){                                                                \
861                         *(offsets++) = offset                            ;                      \
862                     }                                                                           \
863                 }else{                                                                          \
864                     args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =\
865                                 (uint8_t) (targetByteUnit);                                     \
866                     *err = U_BUFFER_OVERFLOW_ERROR;                                             \
867                 }                                                                               \
868             }                                                                                   \
869         }                                                                                       \
870     }else{                                                                                      \
871         if (targetByteUnit & 0xFF0000) {                                                        \
872             args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =        \
873                         (uint8_t) (targetByteUnit >>16);                                        \
874         }                                                                                       \
875         if(targetByteUnit & 0xFF00){                                                            \
876             args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =        \
877                         (uint8_t) (targetByteUnit >>8);                                         \
878         }                                                                                       \
879         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =            \
880                         (uint8_t) (targetByteUnit);                                             \
881         *err = U_BUFFER_OVERFLOW_ERROR;                                                         \
882     }                                                                                           \
883 }
884 
885 /* Rules:
886  *    Explicit Halant :
887  *                      <HALANT> + <ZWNJ>
888  *    Soft Halant :
889  *                      <HALANT> + <ZWJ>
890  */
891 
UConverter_fromUnicode_ISCII_OFFSETS_LOGIC(UConverterFromUnicodeArgs * args,UErrorCode * err)892 static void UConverter_fromUnicode_ISCII_OFFSETS_LOGIC(
893         UConverterFromUnicodeArgs * args, UErrorCode * err) {
894     const UChar *source = args->source;
895     const UChar *sourceLimit = args->sourceLimit;
896     unsigned char *target = (unsigned char *) args->target;
897     unsigned char *targetLimit = (unsigned char *) args->targetLimit;
898     int32_t* offsets = args->offsets;
899     uint32_t targetByteUnit = 0x0000;
900     UChar32 sourceChar = 0x0000;
901     UChar32 tempContextFromUnicode = 0x0000;    /* For special handling of the Gurmukhi script. */
902     UConverterDataISCII *converterData;
903     uint16_t newDelta=0;
904     uint16_t range = 0;
905     UBool deltaChanged = FALSE;
906 
907     if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)) {
908         *err = U_ILLEGAL_ARGUMENT_ERROR;
909         return;
910     }
911     /* initialize data */
912     converterData=(UConverterDataISCII*)args->converter->extraInfo;
913     newDelta=converterData->currentDeltaFromUnicode;
914     range = (uint16_t)(newDelta/DELTA);
915 
916     if ((sourceChar = args->converter->fromUChar32)!=0) {
917         goto getTrail;
918     }
919 
920     /*writing the char to the output stream */
921     while (source < sourceLimit) {
922         /* Write the language code following LF only if LF is not the last character. */
923         if (args->converter->fromUnicodeStatus == LF) {
924             targetByteUnit = ATR<<8;
925             targetByteUnit += (uint8_t) lookupInitialData[range].isciiLang;
926             args->converter->fromUnicodeStatus = 0x0000;
927             /* now append ATR and language code */
928             WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err);
929             if (U_FAILURE(*err)) {
930                 break;
931             }
932         }
933 
934         sourceChar = *source++;
935         tempContextFromUnicode = converterData->contextCharFromUnicode;
936 
937         targetByteUnit = missingCharMarker;
938 
939         /*check if input is in ASCII and C0 control codes range*/
940         if (sourceChar <= ASCII_END) {
941             args->converter->fromUnicodeStatus = sourceChar;
942             WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,sourceChar,err);
943             if (U_FAILURE(*err)) {
944                 break;
945             }
946             continue;
947         }
948         switch (sourceChar) {
949         case ZWNJ:
950             /* contextChar has HALANT */
951             if (converterData->contextCharFromUnicode) {
952                 converterData->contextCharFromUnicode = 0x00;
953                 targetByteUnit = ISCII_HALANT;
954             } else {
955                 /* consume ZWNJ and continue */
956                 converterData->contextCharFromUnicode = 0x00;
957                 continue;
958             }
959             break;
960         case ZWJ:
961             /* contextChar has HALANT */
962             if (converterData->contextCharFromUnicode) {
963                 targetByteUnit = ISCII_NUKTA;
964             } else {
965                 targetByteUnit =ISCII_INV;
966             }
967             converterData->contextCharFromUnicode = 0x00;
968             break;
969         default:
970             /* is the sourceChar in the INDIC_RANGE? */
971             if ((uint16_t)(INDIC_BLOCK_END-sourceChar) <= INDIC_RANGE) {
972                 /* Danda and Double Danda are valid in Northern scripts.. since Unicode
973                  * does not include these codepoints in all Northern scrips we need to
974                  * filter them out
975                  */
976                 if (sourceChar!= DANDA && sourceChar != DOUBLE_DANDA) {
977                     /* find out to which block the souceChar belongs*/
978                     range =(uint16_t)((sourceChar-INDIC_BLOCK_BEGIN)/DELTA);
979                     newDelta =(uint16_t)(range*DELTA);
980 
981                     /* Now are we in the same block as the previous? */
982                     if (newDelta!= converterData->currentDeltaFromUnicode || converterData->isFirstBuffer) {
983                         converterData->currentDeltaFromUnicode = newDelta;
984                         converterData->currentMaskFromUnicode = lookupInitialData[range].maskEnum;
985                         deltaChanged =TRUE;
986                         converterData->isFirstBuffer=FALSE;
987                     }
988 
989                     if (converterData->currentDeltaFromUnicode == PNJ_DELTA) {
990                         if (sourceChar == PNJ_TIPPI) {
991                             /* Make sure Tippi is converterd to Bindi. */
992                             sourceChar = PNJ_BINDI;
993                         } else if (sourceChar == PNJ_ADHAK) {
994                             /* This is for consonant cluster handling. */
995                             converterData->contextCharFromUnicode = PNJ_ADHAK;
996                         }
997 
998                     }
999                     /* Normalize all Indic codepoints to Devanagari and map them to ISCII */
1000                     /* now subtract the new delta from sourceChar*/
1001                     sourceChar -= converterData->currentDeltaFromUnicode;
1002                 }
1003 
1004                 /* get the target byte unit */
1005                 targetByteUnit=fromUnicodeTable[(uint8_t)sourceChar];
1006 
1007                 /* is the code point valid in current script? */
1008                 if ((validityTable[(uint8_t)sourceChar] & converterData->currentMaskFromUnicode)==0) {
1009                     /* Vocallic RR is assigned in ISCII Telugu and Unicode */
1010                     if (converterData->currentDeltaFromUnicode!=(TELUGU_DELTA) || sourceChar!=VOCALLIC_RR) {
1011                         targetByteUnit=missingCharMarker;
1012                     }
1013                 }
1014 
1015                 if (deltaChanged) {
1016                     /* we are in a script block which is different than
1017                      * previous sourceChar's script block write ATR and language codes
1018                      */
1019                     uint32_t temp=0;
1020                     temp =(uint16_t)(ATR<<8);
1021                     temp += (uint16_t)((uint8_t) lookupInitialData[range].isciiLang);
1022                     /* reset */
1023                     deltaChanged=FALSE;
1024                     /* now append ATR and language code */
1025                     WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,temp,err);
1026                     if (U_FAILURE(*err)) {
1027                         break;
1028                     }
1029                 }
1030 
1031                 if (converterData->currentDeltaFromUnicode == PNJ_DELTA && (sourceChar + PNJ_DELTA) == PNJ_ADHAK) {
1032                     continue;
1033                 }
1034             }
1035             /* reset context char */
1036             converterData->contextCharFromUnicode = 0x00;
1037             break;
1038         }
1039         if (converterData->currentDeltaFromUnicode == PNJ_DELTA && tempContextFromUnicode == PNJ_ADHAK && isPNJConsonant((sourceChar + PNJ_DELTA))) {
1040             /* If the previous codepoint is Adhak and the current codepoint is a consonant, the targetByteUnit should be C + Halant + C. */
1041             /* reset context char */
1042             converterData->contextCharFromUnicode = 0x0000;
1043             targetByteUnit = targetByteUnit << 16 | ISCII_HALANT << 8 | targetByteUnit;
1044             /* write targetByteUnit to target */
1045             WRITE_TO_TARGET_FROM_U(args, offsets, source, target, targetLimit, targetByteUnit,err);
1046             if (U_FAILURE(*err)) {
1047                 break;
1048             }
1049         } else if (targetByteUnit != missingCharMarker) {
1050             if (targetByteUnit==ISCII_HALANT) {
1051                 converterData->contextCharFromUnicode = (UChar)targetByteUnit;
1052             }
1053             /* write targetByteUnit to target*/
1054             WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err);
1055             if (U_FAILURE(*err)) {
1056                 break;
1057             }
1058         } else {
1059             /* oops.. the code point is unassigned */
1060             /*check if the char is a First surrogate*/
1061             if (U16_IS_SURROGATE(sourceChar)) {
1062                 if (U16_IS_SURROGATE_LEAD(sourceChar)) {
1063 getTrail:
1064                     /*look ahead to find the trail surrogate*/
1065                     if (source < sourceLimit) {
1066                         /* test the following code unit */
1067                         UChar trail= (*source);
1068                         if (U16_IS_TRAIL(trail)) {
1069                             source++;
1070                             sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail);
1071                             *err =U_INVALID_CHAR_FOUND;
1072                             /* convert this surrogate code point */
1073                             /* exit this condition tree */
1074                         } else {
1075                             /* this is an unmatched lead code unit (1st surrogate) */
1076                             /* callback(illegal) */
1077                             *err=U_ILLEGAL_CHAR_FOUND;
1078                         }
1079                     } else {
1080                         /* no more input */
1081                         *err = U_ZERO_ERROR;
1082                     }
1083                 } else {
1084                     /* this is an unmatched trail code unit (2nd surrogate) */
1085                     /* callback(illegal) */
1086                     *err=U_ILLEGAL_CHAR_FOUND;
1087                 }
1088             } else {
1089                 /* callback(unassigned) for a BMP code point */
1090                 *err = U_INVALID_CHAR_FOUND;
1091             }
1092 
1093             args->converter->fromUChar32=sourceChar;
1094             break;
1095         }
1096     }/* end while(mySourceIndex<mySourceLength) */
1097 
1098     /*save the state and return */
1099     args->source = source;
1100     args->target = (char*)target;
1101 }
1102 
1103 static const uint16_t lookupTable[][2]={
1104     { ZERO,       ZERO     },     /*DEFALT*/
1105     { ZERO,       ZERO     },     /*ROMAN*/
1106     { DEVANAGARI, DEV_MASK },
1107     { BENGALI,    BNG_MASK },
1108     { TAMIL,      TML_MASK },
1109     { TELUGU,     KND_MASK },
1110     { BENGALI,    BNG_MASK },
1111     { ORIYA,      ORI_MASK },
1112     { KANNADA,    KND_MASK },
1113     { MALAYALAM,  MLM_MASK },
1114     { GUJARATI,   GJR_MASK },
1115     { GURMUKHI,   PNJ_MASK }
1116 };
1117 
1118 #define WRITE_TO_TARGET_TO_U(args,source,target,offsets,offset,targetUniChar,delta, err){\
1119     /* add offset to current Indic Block */                                              \
1120     if(targetUniChar>ASCII_END &&                                                        \
1121            targetUniChar != ZWJ &&                                                       \
1122            targetUniChar != ZWNJ &&                                                      \
1123            targetUniChar != DANDA &&                                                     \
1124            targetUniChar != DOUBLE_DANDA){                                               \
1125                                                                                          \
1126            targetUniChar+=(uint16_t)(delta);                                             \
1127     }                                                                                    \
1128     /* now write the targetUniChar */                                                    \
1129     if(target<args->targetLimit){                                                        \
1130         *(target)++ = (UChar)targetUniChar;                                              \
1131         if(offsets){                                                                     \
1132             *(offsets)++ = (int32_t)(offset);                                            \
1133         }                                                                                \
1134     }else{                                                                               \
1135         args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++] =   \
1136             (UChar)targetUniChar;                                                        \
1137         *err = U_BUFFER_OVERFLOW_ERROR;                                                  \
1138     }                                                                                    \
1139 }
1140 
1141 #define GET_MAPPING(sourceChar,targetUniChar,data){                                      \
1142     targetUniChar = toUnicodeTable[(sourceChar)] ;                                       \
1143     /* is the code point valid in current script? */                                     \
1144     if(sourceChar> ASCII_END &&                                                          \
1145             (validityTable[(targetUniChar & 0x7F)] & data->currentMaskToUnicode)==0){    \
1146         /* Vocallic RR is assigne in ISCII Telugu and Unicode */                         \
1147         if(data->currentDeltaToUnicode!=(TELUGU_DELTA) ||                                \
1148                     targetUniChar!=VOCALLIC_RR){                                         \
1149             targetUniChar=missingCharMarker;                                             \
1150         }                                                                                \
1151     }                                                                                    \
1152 }
1153 
1154 /***********
1155  *  Rules for ISCII to Unicode converter
1156  *  ISCII is stateful encoding. To convert ISCII bytes to Unicode,
1157  *  which has both precomposed and decomposed forms characters
1158  *  pre-context and post-context need to be considered.
1159  *
1160  *  Post context
1161  *  i)  ATR : Attribute code is used to declare the font and script switching.
1162  *      Currently we only switch scripts and font codes consumed without generating an error
1163  *  ii) EXT : Extention code is used to declare switching to Sanskrit and for obscure,
1164  *      obsolete characters
1165  *  Pre context
1166  *  i)  Halant: if preceeded by a halant then it is a explicit halant
1167  *  ii) Nukta :
1168  *       a) if preceeded by a halant then it is a soft halant
1169  *       b) if preceeded by specific consonants and the ligatures have pre-composed
1170  *          characters in Unicode then convert to pre-composed characters
1171  *  iii) Danda: If Danda is preceeded by a Danda then convert to Double Danda
1172  *
1173  */
1174 
UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs * args,UErrorCode * err)1175 static void UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, UErrorCode* err) {
1176     const char *source = ( char *) args->source;
1177     UChar *target = args->target;
1178     const char *sourceLimit = args->sourceLimit;
1179     const UChar* targetLimit = args->targetLimit;
1180     uint32_t targetUniChar = 0x0000;
1181     uint8_t sourceChar = 0x0000;
1182     UConverterDataISCII* data;
1183     UChar32* toUnicodeStatus=NULL;
1184     UChar32 tempTargetUniChar = 0x0000;
1185     UChar* contextCharToUnicode= NULL;
1186     UBool found;
1187     int i;
1188     int offset = 0;
1189 
1190     if ((args->converter == NULL) || (target < args->target) || (source < args->source)) {
1191         *err = U_ILLEGAL_ARGUMENT_ERROR;
1192         return;
1193     }
1194 
1195     data = (UConverterDataISCII*)(args->converter->extraInfo);
1196     contextCharToUnicode = &data->contextCharToUnicode; /* contains previous ISCII codepoint visited */
1197     toUnicodeStatus = (UChar32*)&args->converter->toUnicodeStatus;/* contains the mapping to Unicode of the above codepoint*/
1198 
1199     while (U_SUCCESS(*err) && source<sourceLimit) {
1200 
1201         targetUniChar = missingCharMarker;
1202 
1203         if (target < targetLimit) {
1204             sourceChar = (unsigned char)*(source)++;
1205 
1206             /* look at the post-context preform special processing */
1207             if (*contextCharToUnicode==ATR) {
1208 
1209                 /* If we have ATR in *contextCharToUnicode then we need to change our
1210                  * state to the Indic Script specified by sourceChar
1211                  */
1212 
1213                 /* check if the sourceChar is supported script range*/
1214                 if ((uint8_t)(PNJ-sourceChar)<=PNJ-DEV) {
1215                     data->currentDeltaToUnicode = (uint16_t)(lookupTable[sourceChar & 0x0F][0] * DELTA);
1216                     data->currentMaskToUnicode = (MaskEnum)lookupTable[sourceChar & 0x0F][1];
1217                 } else if (sourceChar==DEF) {
1218                     /* switch back to default */
1219                     data->currentDeltaToUnicode = data->defDeltaToUnicode;
1220                     data->currentMaskToUnicode = data->defMaskToUnicode;
1221                 } else {
1222                     if ((sourceChar >= 0x21 && sourceChar <= 0x3F)) {
1223                         /* these are display codes consume and continue */
1224                     } else {
1225                         *err =U_ILLEGAL_CHAR_FOUND;
1226                         /* reset */
1227                         *contextCharToUnicode=NO_CHAR_MARKER;
1228                         goto CALLBACK;
1229                     }
1230                 }
1231 
1232                 /* reset */
1233                 *contextCharToUnicode=NO_CHAR_MARKER;
1234 
1235                 continue;
1236 
1237             } else if (*contextCharToUnicode==EXT) {
1238                 /* check if sourceChar is in 0xA1-0xEE range */
1239                 if ((uint8_t) (EXT_RANGE_END - sourceChar) <= (EXT_RANGE_END - EXT_RANGE_BEGIN)) {
1240                     /* We currently support only Anudatta and Devanagari abbreviation sign */
1241                     if (sourceChar==0xBF || sourceChar == 0xB8) {
1242                         targetUniChar = (sourceChar==0xBF) ? DEV_ABBR_SIGN : DEV_ANUDATTA;
1243 
1244                         /* find out if the mapping is valid in this state */
1245                         if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
1246                             *contextCharToUnicode= NO_CHAR_MARKER;
1247 
1248                             /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
1249                             if (data->prevToUnicodeStatus) {
1250                                 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
1251                                 data->prevToUnicodeStatus = 0x0000;
1252                             }
1253                             /* write to target */
1254                             WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);
1255 
1256                             continue;
1257                         }
1258                     }
1259                     /* byte unit is unassigned */
1260                     targetUniChar = missingCharMarker;
1261                     *err= U_INVALID_CHAR_FOUND;
1262                 } else {
1263                     /* only 0xA1 - 0xEE are legal after EXT char */
1264                     *contextCharToUnicode= NO_CHAR_MARKER;
1265                     *err = U_ILLEGAL_CHAR_FOUND;
1266                 }
1267                 goto CALLBACK;
1268             } else if (*contextCharToUnicode==ISCII_INV) {
1269                 if (sourceChar==ISCII_HALANT) {
1270                     targetUniChar = 0x0020; /* replace with space accoding to Indic FAQ */
1271                 } else {
1272                     targetUniChar = ZWJ;
1273                 }
1274 
1275                 /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
1276                 if (data->prevToUnicodeStatus) {
1277                     WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
1278                     data->prevToUnicodeStatus = 0x0000;
1279                 }
1280                 /* write to target */
1281                 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);
1282                 /* reset */
1283                 *contextCharToUnicode=NO_CHAR_MARKER;
1284             }
1285 
1286             /* look at the pre-context and perform special processing */
1287             switch (sourceChar) {
1288             case ISCII_INV:
1289             case EXT:
1290             case ATR:
1291                 *contextCharToUnicode = (UChar)sourceChar;
1292 
1293                 if (*toUnicodeStatus != missingCharMarker) {
1294                     /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
1295                     if (data->prevToUnicodeStatus) {
1296                         WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
1297                         data->prevToUnicodeStatus = 0x0000;
1298                     }
1299                     WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err);
1300                     *toUnicodeStatus = missingCharMarker;
1301                 }
1302                 continue;
1303             case ISCII_DANDA:
1304                 /* handle double danda*/
1305                 if (*contextCharToUnicode== ISCII_DANDA) {
1306                     targetUniChar = DOUBLE_DANDA;
1307                     /* clear the context */
1308                     *contextCharToUnicode = NO_CHAR_MARKER;
1309                     *toUnicodeStatus = missingCharMarker;
1310                 } else {
1311                     GET_MAPPING(sourceChar,targetUniChar,data);
1312                     *contextCharToUnicode = sourceChar;
1313                 }
1314                 break;
1315             case ISCII_HALANT:
1316                 /* handle explicit halant */
1317                 if (*contextCharToUnicode == ISCII_HALANT) {
1318                     targetUniChar = ZWNJ;
1319                     /* clear the context */
1320                     *contextCharToUnicode = NO_CHAR_MARKER;
1321                 } else {
1322                     GET_MAPPING(sourceChar,targetUniChar,data);
1323                     *contextCharToUnicode = sourceChar;
1324                 }
1325                 break;
1326             case 0x0A:
1327             case 0x0D:
1328                 data->resetToDefaultToUnicode = TRUE;
1329                 GET_MAPPING(sourceChar,targetUniChar,data)
1330                 ;
1331                 *contextCharToUnicode = sourceChar;
1332                 break;
1333 
1334             case ISCII_VOWEL_SIGN_E:
1335                 i=1;
1336                 found=FALSE;
1337                 for (; i<vowelSignESpecialCases[0][0]; i++) {
1338                     U_ASSERT(i<UPRV_LENGTHOF(vowelSignESpecialCases));
1339                     if (vowelSignESpecialCases[i][0]==(uint8_t)*contextCharToUnicode) {
1340                         targetUniChar=vowelSignESpecialCases[i][1];
1341                         found=TRUE;
1342                         break;
1343                     }
1344                 }
1345                 if (found) {
1346                     /* find out if the mapping is valid in this state */
1347                     if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
1348                         /*targetUniChar += data->currentDeltaToUnicode ;*/
1349                         *contextCharToUnicode= NO_CHAR_MARKER;
1350                         *toUnicodeStatus = missingCharMarker;
1351                         break;
1352                     }
1353                 }
1354                 GET_MAPPING(sourceChar,targetUniChar,data);
1355                 *contextCharToUnicode = sourceChar;
1356                 break;
1357 
1358             case ISCII_NUKTA:
1359                 /* handle soft halant */
1360                 if (*contextCharToUnicode == ISCII_HALANT) {
1361                     targetUniChar = ZWJ;
1362                     /* clear the context */
1363                     *contextCharToUnicode = NO_CHAR_MARKER;
1364                     break;
1365                 } else if (data->currentDeltaToUnicode == PNJ_DELTA && data->contextCharToUnicode == 0xc0) {
1366                     /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
1367                     if (data->prevToUnicodeStatus) {
1368                         WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
1369                         data->prevToUnicodeStatus = 0x0000;
1370                     }
1371                     /* We got here because ISCII_NUKTA was preceded by 0xc0 and we are converting Gurmukhi.
1372                      * In that case we must convert (0xc0 0xe9) to (\u0a5c\u0a4d\u0a39).
1373                      */
1374                     targetUniChar = PNJ_RRA;
1375                     WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);
1376                     if (U_SUCCESS(*err)) {
1377                         targetUniChar = PNJ_SIGN_VIRAMA;
1378                         WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);
1379                         if (U_SUCCESS(*err)) {
1380                             targetUniChar = PNJ_HA;
1381                             WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err);
1382                         } else {
1383                             args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA;
1384                         }
1385                     } else {
1386                         args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_SIGN_VIRAMA;
1387                         args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA;
1388                     }
1389                     *toUnicodeStatus = missingCharMarker;
1390                     data->contextCharToUnicode = NO_CHAR_MARKER;
1391                     continue;
1392                 } else {
1393                     /* try to handle <CHAR> + ISCII_NUKTA special mappings */
1394                     i=1;
1395                     found =FALSE;
1396                     for (; i<nuktaSpecialCases[0][0]; i++) {
1397                         if (nuktaSpecialCases[i][0]==(uint8_t)
1398                                 *contextCharToUnicode) {
1399                             targetUniChar=nuktaSpecialCases[i][1];
1400                             found =TRUE;
1401                             break;
1402                         }
1403                     }
1404                     if (found) {
1405                         /* find out if the mapping is valid in this state */
1406                         if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) {
1407                             /*targetUniChar += data->currentDeltaToUnicode ;*/
1408                             *contextCharToUnicode= NO_CHAR_MARKER;
1409                             *toUnicodeStatus = missingCharMarker;
1410                             if (data->currentDeltaToUnicode == PNJ_DELTA) {
1411                                 /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
1412                                 if (data->prevToUnicodeStatus) {
1413                                     WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
1414                                     data->prevToUnicodeStatus = 0x0000;
1415                                 }
1416                                 WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err);
1417                                 continue;
1418                             }
1419                             break;
1420                         }
1421                         /* else fall through to default */
1422                     }
1423                     /* else fall through to default */
1424                     U_FALLTHROUGH;
1425                 }
1426             default:GET_MAPPING(sourceChar,targetUniChar,data)
1427                 ;
1428                 *contextCharToUnicode = sourceChar;
1429                 break;
1430             }
1431 
1432             if (*toUnicodeStatus != missingCharMarker) {
1433                 /* Check to make sure that consonant clusters are handled correct for Gurmukhi script. */
1434                 if (data->currentDeltaToUnicode == PNJ_DELTA && data->prevToUnicodeStatus != 0 && isPNJConsonant(data->prevToUnicodeStatus) &&
1435                         (*toUnicodeStatus + PNJ_DELTA) == PNJ_SIGN_VIRAMA && (targetUniChar + PNJ_DELTA) == data->prevToUnicodeStatus) {
1436                     /* Consonant clusters C + HALANT + C should be encoded as ADHAK + C */
1437                     offset = (int)(source-args->source - 3);
1438                     tempTargetUniChar = PNJ_ADHAK; /* This is necessary to avoid some compiler warnings. */
1439                     WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,tempTargetUniChar,0,err);
1440                     WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,data->prevToUnicodeStatus,0,err);
1441                     data->prevToUnicodeStatus = 0x0000; /* reset the previous unicode code point */
1442                     *toUnicodeStatus = missingCharMarker;
1443                     continue;
1444                 } else {
1445                     /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */
1446                     if (data->prevToUnicodeStatus) {
1447                         WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err);
1448                         data->prevToUnicodeStatus = 0x0000;
1449                     }
1450                     /* Check to make sure that Bindi and Tippi are handled correctly for Gurmukhi script.
1451                      * If 0xA2 is preceded by a codepoint in the PNJ_BINDI_TIPPI_SET then the target codepoint should be Tippi instead of Bindi.
1452                      */
1453                     if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_BINDI && isPNJBindiTippi((*toUnicodeStatus + PNJ_DELTA))) {
1454                         targetUniChar = PNJ_TIPPI - PNJ_DELTA;
1455                         WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,PNJ_DELTA,err);
1456                     } else if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_SIGN_VIRAMA && isPNJConsonant((*toUnicodeStatus + PNJ_DELTA))) {
1457                         /* Store the current toUnicodeStatus code point for later handling of consonant cluster in Gurmukhi. */
1458                         data->prevToUnicodeStatus = *toUnicodeStatus + PNJ_DELTA;
1459                     } else {
1460                         /* write the previously mapped codepoint */
1461                         WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err);
1462                     }
1463                 }
1464                 *toUnicodeStatus = missingCharMarker;
1465             }
1466 
1467             if (targetUniChar != missingCharMarker) {
1468                 /* now save the targetUniChar for delayed write */
1469                 *toUnicodeStatus = (UChar) targetUniChar;
1470                 if (data->resetToDefaultToUnicode==TRUE) {
1471                     data->currentDeltaToUnicode = data->defDeltaToUnicode;
1472                     data->currentMaskToUnicode = data->defMaskToUnicode;
1473                     data->resetToDefaultToUnicode=FALSE;
1474                 }
1475             } else {
1476 
1477                 /* we reach here only if targetUniChar == missingCharMarker
1478                  * so assign codes to reason and err
1479                  */
1480                 *err = U_INVALID_CHAR_FOUND;
1481 CALLBACK:
1482                 args->converter->toUBytes[0] = (uint8_t) sourceChar;
1483                 args->converter->toULength = 1;
1484                 break;
1485             }
1486 
1487         } else {
1488             *err =U_BUFFER_OVERFLOW_ERROR;
1489             break;
1490         }
1491     }
1492 
1493     if (U_SUCCESS(*err) && args->flush && source == sourceLimit) {
1494         /* end of the input stream */
1495         UConverter *cnv = args->converter;
1496 
1497         if (*contextCharToUnicode==ATR || *contextCharToUnicode==EXT || *contextCharToUnicode==ISCII_INV) {
1498             /* set toUBytes[] */
1499             cnv->toUBytes[0] = (uint8_t)*contextCharToUnicode;
1500             cnv->toULength = 1;
1501 
1502             /* avoid looping on truncated sequences */
1503             *contextCharToUnicode = NO_CHAR_MARKER;
1504         } else {
1505             cnv->toULength = 0;
1506         }
1507 
1508         if (*toUnicodeStatus != missingCharMarker) {
1509             /* output a remaining target character */
1510             WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source - args->source -1),*toUnicodeStatus,data->currentDeltaToUnicode,err);
1511             *toUnicodeStatus = missingCharMarker;
1512         }
1513     }
1514 
1515     args->target = target;
1516     args->source = source;
1517 }
1518 
1519 /* structure for SafeClone calculations */
1520 struct cloneISCIIStruct {
1521     UConverter cnv;
1522     UConverterDataISCII mydata;
1523 };
1524 
1525 static UConverter *
_ISCII_SafeClone(const UConverter * cnv,void * stackBuffer,int32_t * pBufferSize,UErrorCode * status)1526 _ISCII_SafeClone(const UConverter *cnv,
1527               void *stackBuffer,
1528               int32_t *pBufferSize,
1529               UErrorCode *status)
1530 {
1531     struct cloneISCIIStruct * localClone;
1532     int32_t bufferSizeNeeded = sizeof(struct cloneISCIIStruct);
1533 
1534     if (U_FAILURE(*status)) {
1535         return 0;
1536     }
1537 
1538     if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */
1539         *pBufferSize = bufferSizeNeeded;
1540         return 0;
1541     }
1542 
1543     localClone = (struct cloneISCIIStruct *)stackBuffer;
1544     /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
1545 
1546     uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataISCII));
1547     localClone->cnv.extraInfo = &localClone->mydata;
1548     localClone->cnv.isExtraLocal = TRUE;
1549 
1550     return &localClone->cnv;
1551 }
1552 
1553 static void
_ISCIIGetUnicodeSet(const UConverter * cnv,const USetAdder * sa,UConverterUnicodeSet which,UErrorCode * pErrorCode)1554 _ISCIIGetUnicodeSet(const UConverter *cnv,
1555                     const USetAdder *sa,
1556                     UConverterUnicodeSet which,
1557                     UErrorCode *pErrorCode)
1558 {
1559     int32_t idx, script;
1560     uint8_t mask;
1561 
1562     /* Since all ISCII versions allow switching to other ISCII
1563     scripts, we add all roundtrippable characters to this set. */
1564     sa->addRange(sa->set, 0, ASCII_END);
1565     for (script = DEVANAGARI; script <= MALAYALAM; script++) {
1566         mask = (uint8_t)(lookupInitialData[script].maskEnum);
1567         for (idx = 0; idx < DELTA; idx++) {
1568             /* added check for TELUGU character */
1569             if ((validityTable[idx] & mask) || (script==TELUGU && idx==0x31)) {
1570                 sa->add(sa->set, idx + (script * DELTA) + INDIC_BLOCK_BEGIN);
1571             }
1572         }
1573     }
1574     sa->add(sa->set, DANDA);
1575     sa->add(sa->set, DOUBLE_DANDA);
1576     sa->add(sa->set, ZWNJ);
1577     sa->add(sa->set, ZWJ);
1578 }
1579 
1580 static const UConverterImpl _ISCIIImpl={
1581 
1582     UCNV_ISCII,
1583 
1584     NULL,
1585     NULL,
1586 
1587     _ISCIIOpen,
1588     _ISCIIClose,
1589     _ISCIIReset,
1590 
1591     UConverter_toUnicode_ISCII_OFFSETS_LOGIC,
1592     UConverter_toUnicode_ISCII_OFFSETS_LOGIC,
1593     UConverter_fromUnicode_ISCII_OFFSETS_LOGIC,
1594     UConverter_fromUnicode_ISCII_OFFSETS_LOGIC,
1595     NULL,
1596 
1597     NULL,
1598     _ISCIIgetName,
1599     NULL,
1600     _ISCII_SafeClone,
1601     _ISCIIGetUnicodeSet
1602 };
1603 
1604 static const UConverterStaticData _ISCIIStaticData={
1605     sizeof(UConverterStaticData),
1606         "ISCII",
1607          0,
1608          UCNV_IBM,
1609          UCNV_ISCII,
1610          1,
1611          4,
1612         { 0x1a, 0, 0, 0 },
1613         0x1,
1614         FALSE,
1615         FALSE,
1616         0x0,
1617         0x0,
1618         { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */
1619 
1620 };
1621 
1622 const UConverterSharedData _ISCIIData=
1623         UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISCIIStaticData, &_ISCIIImpl);
1624 
1625 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
1626