1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2008-2012, International Business Machines Corporation and * 6 * others. All Rights Reserved. * 7 ******************************************************************************* 8 */ 9 package com.ibm.icu.charset; 10 11 import java.nio.ByteBuffer; 12 import java.nio.CharBuffer; 13 import java.nio.IntBuffer; 14 import java.nio.charset.CharsetDecoder; 15 import java.nio.charset.CharsetEncoder; 16 import java.nio.charset.CoderResult; 17 18 import com.ibm.icu.text.UTF16; 19 import com.ibm.icu.text.UnicodeSet; 20 21 /** 22 * @author Michael Ow 23 * 24 */ 25 class CharsetISCII extends CharsetICU { 26 private static final short UCNV_OPTIONS_VERSION_MASK = 0X0f; 27 //private static final short NUKTA = 0x093c; 28 //private static final short HALANT = 0x094d; 29 private static final short ZWNJ = 0x200c; /* Zero Width Non Joiner */ 30 private static final short ZWJ = 0x200d; /* Zero Width Joiner */ 31 //private static final int INVALID_CHAR = 0xffff; 32 private static final short ATR = 0xef; /* Attribute code */ 33 private static final short EXT = 0xf0; /* Extension code */ 34 private static final short DANDA = 0x0964; 35 private static final short DOUBLE_DANDA = 0x0965; 36 private static final short ISCII_NUKTA = 0xe9; 37 private static final short ISCII_HALANT = 0xe8; 38 private static final short ISCII_DANDA = 0xea; 39 private static final short ISCII_VOWEL_SIGN_E = 0xe0; 40 private static final short ISCII_INV = 0xd9; 41 private static final short INDIC_BLOCK_BEGIN = 0x0900; 42 private static final short INDIC_BLOCK_END = 0x0d7f; 43 private static final short INDIC_RANGE = (INDIC_BLOCK_END - INDIC_BLOCK_BEGIN); 44 private static final short VOCALLIC_RR = 0x0931; 45 private static final short LF = 0x0a; 46 private static final short ASCII_END = 0xa0; 47 private static final short TELUGU_DELTA = (UniLang.DELTA * UniLang.TELUGU); 48 private static final short DEV_ABBR_SIGN = 0x0970; 49 private static final short DEV_ANUDATTA = 0x0952; 50 private static final short EXT_RANGE_BEGIN = 0xa1; 51 private static final short EXT_RANGE_END = 0xee; 52 private static final short PNJ_DELTA = 0x100; 53 private static final int NO_CHAR_MARKER = 0xfffe; 54 55 /* Used for proper conversion to and from Gurmukhi */ 56 private static UnicodeSet PNJ_BINDI_TIPPI_SET; 57 private static UnicodeSet PNJ_CONSONANT_SET; 58 private static final short PNJ_BINDI = 0x0a02; 59 private static final short PNJ_TIPPI = 0x0a70; 60 private static final short PNJ_SIGN_VIRAMA = 0x0a4d; 61 private static final short PNJ_ADHAK = 0x0a71; 62 private static final short PNJ_HA = 0x0a39; 63 private static final short PNJ_RRA = 0x0a5c; 64 65 private static final class UniLang { 66 static final short DEVALANGARI = 0; 67 static final short BENGALI = DEVALANGARI + 1; 68 static final short GURMUKHI = BENGALI + 1; 69 static final short GUJARATI = GURMUKHI + 1; 70 static final short ORIYA = GUJARATI + 1; 71 static final short TAMIL = ORIYA + 1; 72 static final short TELUGU = TAMIL + 1; 73 static final short KANNADA = TELUGU + 1; 74 static final short MALAYALAM = KANNADA + 1; 75 static final short DELTA = 0x80; 76 } 77 @SuppressWarnings("unused") 78 private static final class ISCIILang { 79 static final short DEF = 0x40; 80 static final short RMN = 0x41; 81 static final short DEV = 0x42; 82 static final short BNG = 0x43; 83 static final short TML = 0x44; 84 static final short TLG = 0x45; 85 static final short ASM = 0x46; 86 static final short ORI = 0x47; 87 static final short KND = 0x48; 88 static final short MLM = 0x49; 89 static final short GJR = 0x4a; 90 static final short PNJ = 0x4b; 91 static final short ARB = 0x71; 92 static final short PES = 0x72; 93 static final short URD = 0x73; 94 static final short SND = 0x74; 95 static final short KSM = 0x75; 96 static final short PST = 0x76; 97 } 98 99 private static final class MaskEnum { 100 static final short DEV_MASK = 0x80; 101 static final short PNJ_MASK = 0x40; 102 static final short GJR_MASK = 0x20; 103 static final short ORI_MASK = 0x10; 104 static final short BNG_MASK = 0x08; 105 static final short KND_MASK = 0x04; 106 static final short MLM_MASK = 0x02; 107 static final short TML_MASK = 0x01; 108 static final short ZERO = 0x00; 109 } 110 111 //ivate final static String ISCII_CNV_PREFIX = "ISCII,version="; 112 113 private static final class UConverterDataISCII { 114 int option; 115 int contextCharToUnicode; /* previous Unicode codepoint for contextual analysis */ 116 int contextCharFromUnicode; /* previous Unicode codepoint for contextual analysis */ 117 short defDeltaToUnicode; /* delta for switching to default state when DEF is encountered */ 118 short currentDeltaFromUnicode; /* current delta in Indic block */ 119 short currentDeltaToUnicode; /* current delta in Indic block */ 120 short currentMaskFromUnicode; /* mask for current state in fromUnicode */ 121 short currentMaskToUnicode; /* mask for current state in toUnicode */ 122 short defMaskToUnicode; /* mask for default state in toUnicode */ 123 boolean isFirstBuffer; /* boolean for fromUnicode to see if we need to announce the first script */ 124 boolean resetToDefaultToUnicode; /* boolean for reseting to default delta and mask when a newline is encountered */ 125 int prevToUnicodeStatus; /* Hold the previous toUnicodeStatus. This is necessary because we may need to know the last two code points. */ 126 UConverterDataISCII(int option)127 UConverterDataISCII(int option) { 128 this.option = option; 129 130 initialize(); 131 } 132 initialize()133 void initialize() { 134 this.contextCharToUnicode = NO_CHAR_MARKER; /* contextCharToUnicode */ 135 this.currentDeltaFromUnicode = 0x0000; /* contextCharFromUnicode */ 136 this.defDeltaToUnicode = (short)(lookupInitialData[option & UCNV_OPTIONS_VERSION_MASK].uniLang * UniLang.DELTA); /* defDeltaToUnicode */ 137 this.currentDeltaFromUnicode = (short)(lookupInitialData[option & UCNV_OPTIONS_VERSION_MASK].uniLang * UniLang.DELTA); /* currentDeltaFromUnicode */ 138 this.currentDeltaToUnicode = (short)(lookupInitialData[option & UCNV_OPTIONS_VERSION_MASK].uniLang * UniLang.DELTA); /* currentDeltaToUnicode */ 139 this.currentMaskToUnicode = lookupInitialData[option & UCNV_OPTIONS_VERSION_MASK].maskEnum; /* currentMaskToUnicode */ 140 this.currentMaskFromUnicode = lookupInitialData[option & UCNV_OPTIONS_VERSION_MASK].maskEnum; /* currentMaskFromUnicode */ 141 this.defMaskToUnicode = lookupInitialData[option & UCNV_OPTIONS_VERSION_MASK].maskEnum; /* defMaskToUnicode */ 142 this.isFirstBuffer = true; /* isFirstBuffer */ 143 this.resetToDefaultToUnicode = false; /* resetToDefaultToUnicode */ 144 this.prevToUnicodeStatus = 0x0000; 145 } 146 } 147 148 private static final class LookupDataStruct { 149 short uniLang; 150 short maskEnum; 151 short isciiLang; 152 LookupDataStruct(short uniLang, short maskEnum, short isciiLang)153 LookupDataStruct(short uniLang, short maskEnum, short isciiLang) { 154 this.uniLang = uniLang; 155 this.maskEnum = maskEnum; 156 this.isciiLang = isciiLang; 157 } 158 } 159 160 private static final LookupDataStruct [] lookupInitialData = { 161 new LookupDataStruct(UniLang.DEVALANGARI, MaskEnum.DEV_MASK, ISCIILang.DEV), 162 new LookupDataStruct(UniLang.BENGALI, MaskEnum.BNG_MASK, ISCIILang.BNG), 163 new LookupDataStruct(UniLang.GURMUKHI, MaskEnum.PNJ_MASK, ISCIILang.PNJ), 164 new LookupDataStruct(UniLang.GUJARATI, MaskEnum.GJR_MASK, ISCIILang.GJR), 165 new LookupDataStruct(UniLang.ORIYA, MaskEnum.ORI_MASK, ISCIILang.ORI), 166 new LookupDataStruct(UniLang.TAMIL, MaskEnum.TML_MASK, ISCIILang.TML), 167 new LookupDataStruct(UniLang.TELUGU, MaskEnum.KND_MASK, ISCIILang.TLG), 168 new LookupDataStruct(UniLang.KANNADA, MaskEnum.KND_MASK, ISCIILang.KND), 169 new LookupDataStruct(UniLang.MALAYALAM, MaskEnum.MLM_MASK, ISCIILang.MLM) 170 }; 171 172 /* 173 * The values in validity table are indexed by the lower bits of Unicode 174 * range 0x0900 - 0x09ff. The values have a structure like: 175 * ----------------------------------------------------------------- 176 * |DEV | PNJ | GJR | ORI | BNG | TLG | MLM | TML | 177 * | | | | | ASM | KND | | | 178 * ----------------------------------------------------------------- 179 * If a code point is valid in a particular script 180 * then that bit is turned on 181 * 182 * Unicode does not distinguish between Bengali and Assamese aso we use 1 bit for 183 * to represent these languages 184 * 185 * Telugu and Kannda have same codepoints except for Vocallic_RR which we special case 186 * and combine and use 1 bit to represent these languages 187 */ 188 private static final short validityTable[] = { 189 /* This state table is tool generated so please do not edit unless you know exactly what you are doing */ 190 /* Note: This table was edited to mirror the Windows XP implementation */ 191 /* ISCII: Valid: Unicode */ 192 /* 0xa0: 0x00: 0x900 */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, 193 /* 0xa1: 0xb8: 0x901 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, 194 /* 0xa2: 0xfe: 0x902 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 195 /* 0xa3: 0xbf: 0x903 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 196 /* 0x00: 0x00: 0x904 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, 197 /* 0xa4: 0xff: 0x905 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 198 /* 0xa5: 0xff: 0x906 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 199 /* 0xa6: 0xff: 0x907 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 200 /* 0xa7: 0xff: 0x908 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 201 /* 0xa8: 0xff: 0x909 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 202 /* 0xa9: 0xff: 0x90a */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 203 /* 0xaa: 0xfe: 0x90b */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO, 204 /* 0x00: 0x00: 0x90c */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO, 205 /* 0xae: 0x80: 0x90d */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, 206 /* 0xab: 0x87: 0x90e */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 207 /* 0xac: 0xff: 0x90f */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 208 /* 0xad: 0xff: 0x910 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 209 /* 0xb2: 0x80: 0x911 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, 210 /* 0xaf: 0x87: 0x912 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 211 /* 0xb0: 0xff: 0x913 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 212 /* 0xb1: 0xff: 0x914 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 213 /* 0xb3: 0xff: 0x915 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 214 /* 0xb4: 0xfe: 0x916 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO, 215 /* 0xb5: 0xfe: 0x917 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO, 216 /* 0xb6: 0xfe: 0x918 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO, 217 /* 0xb7: 0xff: 0x919 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 218 /* 0xb8: 0xff: 0x91a */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 219 /* 0xb9: 0xfe: 0x91b */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO, 220 /* 0xba: 0xff: 0x91c */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 221 /* 0xbb: 0xfe: 0x91d */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO, 222 /* 0xbc: 0xff: 0x91e */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 223 /* 0xbd: 0xff: 0x91f */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 224 /* 0xbe: 0xfe: 0x920 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO, 225 /* 0xbf: 0xfe: 0x921 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO, 226 /* 0xc0: 0xfe: 0x922 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO, 227 /* 0xc1: 0xff: 0x923 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 228 /* 0xc2: 0xff: 0x924 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 229 /* 0xc3: 0xfe: 0x925 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO, 230 /* 0xc4: 0xfe: 0x926 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO, 231 /* 0xc5: 0xfe: 0x927 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO, 232 /* 0xc6: 0xff: 0x928 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 233 /* 0xc7: 0x81: 0x929 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.TML_MASK, 234 /* 0xc8: 0xff: 0x92a */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 235 /* 0xc9: 0xfe: 0x92b */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO, 236 /* 0xca: 0xfe: 0x92c */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO, 237 /* 0xcb: 0xfe: 0x92d */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO, 238 /* 0xcc: 0xfe: 0x92e */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 239 /* 0xcd: 0xff: 0x92f */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 240 /* 0xcf: 0xff: 0x930 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 241 /* 0xd0: 0x87: 0x931 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 242 /* 0xd1: 0xff: 0x932 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 243 /* 0xd2: 0xb7: 0x933 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 244 /* 0xd3: 0x83: 0x934 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 245 /* 0xd4: 0xff: 0x935 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 246 /* 0xd5: 0xfe: 0x936 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO, 247 /* 0xd6: 0xbf: 0x937 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 248 /* 0xd7: 0xff: 0x938 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 249 /* 0xd8: 0xff: 0x939 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 250 /* 0x00: 0x00: 0x93a */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, 251 /* 0x00: 0x00: 0x93b */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, 252 /* 0xe9: 0xda: 0x93c */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.ZERO + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, 253 /* 0x00: 0x00: 0x93d */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, 254 /* 0xda: 0xff: 0x93e */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 255 /* 0xdb: 0xff: 0x93f */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 256 /* 0xdc: 0xff: 0x940 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 257 /* 0xdd: 0xff: 0x941 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 258 /* 0xde: 0xff: 0x942 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 259 /* 0xdf: 0xbe: 0x943 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO, 260 /* 0x00: 0x00: 0x944 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ZERO + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.ZERO + MaskEnum.ZERO, 261 /* 0xe3: 0x80: 0x945 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, 262 /* 0xe0: 0x87: 0x946 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 263 /* 0xe1: 0xff: 0x947 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 264 /* 0xe2: 0xff: 0x948 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 265 /* 0xe7: 0x80: 0x949 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, 266 /* 0xe4: 0x87: 0x94a */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 267 /* 0xe5: 0xff: 0x94b */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 268 /* 0xe6: 0xff: 0x94c */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 269 /* 0xe8: 0xff: 0x94d */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 270 /* 0xec: 0x00: 0x94e */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, 271 /* 0xed: 0x00: 0x94f */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, 272 /* 0x00: 0x00: 0x950 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, 273 /* 0x00: 0x00: 0x951 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, 274 /* 0x00: 0x00: 0x952 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, 275 /* 0x00: 0x00: 0x953 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, 276 /* 0x00: 0x00: 0x954 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, 277 /* 0x00: 0x00: 0x955 */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.ZERO + MaskEnum.ZERO, 278 /* 0x00: 0x00: 0x956 */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ORI_MASK + MaskEnum.ZERO + MaskEnum.KND_MASK + MaskEnum.ZERO + MaskEnum.ZERO, 279 /* 0x00: 0x00: 0x957 */ MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.MLM_MASK + MaskEnum.ZERO, 280 /* 0x00: 0x00: 0x958 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, 281 /* 0x00: 0x00: 0x959 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, 282 /* 0x00: 0x00: 0x95a */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, 283 /* 0x00: 0x00: 0x95b */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, 284 /* 0x00: 0x00: 0x95c */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, 285 /* 0x00: 0x00: 0x95d */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, 286 /* 0x00: 0x00: 0x95e */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, 287 /* 0xce: 0x98: 0x95f */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, 288 /* 0x00: 0x00: 0x960 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO, 289 /* 0x00: 0x00: 0x961 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.ZERO, 290 /* 0x00: 0x00: 0x962 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, 291 /* 0x00: 0x00: 0x963 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.BNG_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, 292 /* 0xea: 0xf8: 0x964 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, 293 /* 0xeaea: 0x00: 0x965 */ MaskEnum.DEV_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, 294 /* 0xf1: 0xff: 0x966 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 295 /* 0xf2: 0xff: 0x967 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 296 /* 0xf3: 0xff: 0x968 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 297 /* 0xf4: 0xff: 0x969 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 298 /* 0xf5: 0xff: 0x96a */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 299 /* 0xf6: 0xff: 0x96b */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 300 /* 0xf7: 0xff: 0x96c */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 301 /* 0xf8: 0xff: 0x96d */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 302 /* 0xf9: 0xff: 0x96e */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 303 /* 0xfa: 0xff: 0x96f */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.GJR_MASK + MaskEnum.ORI_MASK + MaskEnum.BNG_MASK + MaskEnum.KND_MASK + MaskEnum.MLM_MASK + MaskEnum.TML_MASK, 304 /* 0x00: 0x80: 0x970 */ MaskEnum.DEV_MASK + MaskEnum.PNJ_MASK + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO + MaskEnum.ZERO, 305 306 /* 307 * The length of the array is 128 to provide values for 0x900..0x97f. 308 * The last 15 entries for 0x971..0x97f of the table are all zero 309 * because no Indic script uses such Unicode code points. 310 */ 311 312 /* 0x00: 0x00: 0x971 */ MaskEnum.ZERO, 313 /* 0x00: 0x00: 0x972 */ MaskEnum.ZERO, 314 /* 0x00: 0x00: 0x973 */ MaskEnum.ZERO, 315 /* 0x00: 0x00: 0x974 */ MaskEnum.ZERO, 316 /* 0x00: 0x00: 0x975 */ MaskEnum.ZERO, 317 /* 0x00: 0x00: 0x976 */ MaskEnum.ZERO, 318 /* 0x00: 0x00: 0x977 */ MaskEnum.ZERO, 319 /* 0x00: 0x00: 0x978 */ MaskEnum.ZERO, 320 /* 0x00: 0x00: 0x979 */ MaskEnum.ZERO, 321 /* 0x00: 0x00: 0x97A */ MaskEnum.ZERO, 322 /* 0x00: 0x00: 0x97B */ MaskEnum.ZERO, 323 /* 0x00: 0x00: 0x97C */ MaskEnum.ZERO, 324 /* 0x00: 0x00: 0x97D */ MaskEnum.ZERO, 325 /* 0x00: 0x00: 0x97E */ MaskEnum.ZERO, 326 /* 0x00: 0x00: 0x97F */ MaskEnum.ZERO, 327 }; 328 329 private static final char fromUnicodeTable[] = { 330 0x00a0, /* 0x0900 */ 331 0x00a1, /* 0x0901 */ 332 0x00a2, /* 0x0902 */ 333 0x00a3, /* 0x0903 */ 334 0xa4e0, /* 0x0904 */ 335 0x00a4, /* 0x0905 */ 336 0x00a5, /* 0x0906 */ 337 0x00a6, /* 0x0907 */ 338 0x00a7, /* 0x0908 */ 339 0x00a8, /* 0x0909 */ 340 0x00a9, /* 0x090a */ 341 0x00aa, /* 0x090b */ 342 0xA6E9, /* 0x090c */ 343 0x00ae, /* 0x090d */ 344 0x00ab, /* 0x090e */ 345 0x00ac, /* 0x090f */ 346 0x00ad, /* 0x0910 */ 347 0x00b2, /* 0x0911 */ 348 0x00af, /* 0x0912 */ 349 0x00b0, /* 0x0913 */ 350 0x00b1, /* 0x0914 */ 351 0x00b3, /* 0x0915 */ 352 0x00b4, /* 0x0916 */ 353 0x00b5, /* 0x0917 */ 354 0x00b6, /* 0x0918 */ 355 0x00b7, /* 0x0919 */ 356 0x00b8, /* 0x091a */ 357 0x00b9, /* 0x091b */ 358 0x00ba, /* 0x091c */ 359 0x00bb, /* 0x091d */ 360 0x00bc, /* 0x091e */ 361 0x00bd, /* 0x091f */ 362 0x00be, /* 0x0920 */ 363 0x00bf, /* 0x0921 */ 364 0x00c0, /* 0x0922 */ 365 0x00c1, /* 0x0923 */ 366 0x00c2, /* 0x0924 */ 367 0x00c3, /* 0x0925 */ 368 0x00c4, /* 0x0926 */ 369 0x00c5, /* 0x0927 */ 370 0x00c6, /* 0x0928 */ 371 0x00c7, /* 0x0929 */ 372 0x00c8, /* 0x092a */ 373 0x00c9, /* 0x092b */ 374 0x00ca, /* 0x092c */ 375 0x00cb, /* 0x092d */ 376 0x00cc, /* 0x092e */ 377 0x00cd, /* 0x092f */ 378 0x00cf, /* 0x0930 */ 379 0x00d0, /* 0x0931 */ 380 0x00d1, /* 0x0932 */ 381 0x00d2, /* 0x0933 */ 382 0x00d3, /* 0x0934 */ 383 0x00d4, /* 0x0935 */ 384 0x00d5, /* 0x0936 */ 385 0x00d6, /* 0x0937 */ 386 0x00d7, /* 0x0938 */ 387 0x00d8, /* 0x0939 */ 388 0xFFFF, /* 0x093a */ 389 0xFFFF, /* 0x093b */ 390 0x00e9, /* 0x093c */ 391 0xEAE9, /* 0x093d */ 392 0x00da, /* 0x093e */ 393 0x00db, /* 0x093f */ 394 0x00dc, /* 0x0940 */ 395 0x00dd, /* 0x0941 */ 396 0x00de, /* 0x0942 */ 397 0x00df, /* 0x0943 */ 398 0xDFE9, /* 0x0944 */ 399 0x00e3, /* 0x0945 */ 400 0x00e0, /* 0x0946 */ 401 0x00e1, /* 0x0947 */ 402 0x00e2, /* 0x0948 */ 403 0x00e7, /* 0x0949 */ 404 0x00e4, /* 0x094a */ 405 0x00e5, /* 0x094b */ 406 0x00e6, /* 0x094c */ 407 0x00e8, /* 0x094d */ 408 0x00ec, /* 0x094e */ 409 0x00ed, /* 0x094f */ 410 0xA1E9, /* 0x0950 */ /* OM Symbol */ 411 0xFFFF, /* 0x0951 */ 412 0xF0B8, /* 0x0952 */ 413 0xFFFF, /* 0x0953 */ 414 0xFFFF, /* 0x0954 */ 415 0xFFFF, /* 0x0955 */ 416 0xFFFF, /* 0x0956 */ 417 0xFFFF, /* 0x0957 */ 418 0xb3e9, /* 0x0958 */ 419 0xb4e9, /* 0x0959 */ 420 0xb5e9, /* 0x095a */ 421 0xbae9, /* 0x095b */ 422 0xbfe9, /* 0x095c */ 423 0xC0E9, /* 0x095d */ 424 0xc9e9, /* 0x095e */ 425 0x00ce, /* 0x095f */ 426 0xAAe9, /* 0x0960 */ 427 0xA7E9, /* 0x0961 */ 428 0xDBE9, /* 0x0962 */ 429 0xDCE9, /* 0x0963 */ 430 0x00ea, /* 0x0964 */ 431 0xeaea, /* 0x0965 */ 432 0x00f1, /* 0x0966 */ 433 0x00f2, /* 0x0967 */ 434 0x00f3, /* 0x0968 */ 435 0x00f4, /* 0x0969 */ 436 0x00f5, /* 0x096a */ 437 0x00f6, /* 0x096b */ 438 0x00f7, /* 0x096c */ 439 0x00f8, /* 0x096d */ 440 0x00f9, /* 0x096e */ 441 0x00fa, /* 0x096f */ 442 0xF0BF, /* 0x0970 */ 443 0xFFFF, /* 0x0971 */ 444 0xFFFF, /* 0x0972 */ 445 0xFFFF, /* 0x0973 */ 446 0xFFFF, /* 0x0974 */ 447 0xFFFF, /* 0x0975 */ 448 0xFFFF, /* 0x0976 */ 449 0xFFFF, /* 0x0977 */ 450 0xFFFF, /* 0x0978 */ 451 0xFFFF, /* 0x0979 */ 452 0xFFFF, /* 0x097a */ 453 0xFFFF, /* 0x097b */ 454 0xFFFF, /* 0x097c */ 455 0xFFFF, /* 0x097d */ 456 0xFFFF, /* 0x097e */ 457 0xFFFF, /* 0x097f */ 458 }; 459 private static final char toUnicodeTable[] = { 460 0x0000, /* 0x00 */ 461 0x0001, /* 0x01 */ 462 0x0002, /* 0x02 */ 463 0x0003, /* 0x03 */ 464 0x0004, /* 0x04 */ 465 0x0005, /* 0x05 */ 466 0x0006, /* 0x06 */ 467 0x0007, /* 0x07 */ 468 0x0008, /* 0x08 */ 469 0x0009, /* 0x09 */ 470 0x000a, /* 0x0a */ 471 0x000b, /* 0x0b */ 472 0x000c, /* 0x0c */ 473 0x000d, /* 0x0d */ 474 0x000e, /* 0x0e */ 475 0x000f, /* 0x0f */ 476 0x0010, /* 0x10 */ 477 0x0011, /* 0x11 */ 478 0x0012, /* 0x12 */ 479 0x0013, /* 0x13 */ 480 0x0014, /* 0x14 */ 481 0x0015, /* 0x15 */ 482 0x0016, /* 0x16 */ 483 0x0017, /* 0x17 */ 484 0x0018, /* 0x18 */ 485 0x0019, /* 0x19 */ 486 0x001a, /* 0x1a */ 487 0x001b, /* 0x1b */ 488 0x001c, /* 0x1c */ 489 0x001d, /* 0x1d */ 490 0x001e, /* 0x1e */ 491 0x001f, /* 0x1f */ 492 0x0020, /* 0x20 */ 493 0x0021, /* 0x21 */ 494 0x0022, /* 0x22 */ 495 0x0023, /* 0x23 */ 496 0x0024, /* 0x24 */ 497 0x0025, /* 0x25 */ 498 0x0026, /* 0x26 */ 499 0x0027, /* 0x27 */ 500 0x0028, /* 0x28 */ 501 0x0029, /* 0x29 */ 502 0x002a, /* 0x2a */ 503 0x002b, /* 0x2b */ 504 0x002c, /* 0x2c */ 505 0x002d, /* 0x2d */ 506 0x002e, /* 0x2e */ 507 0x002f, /* 0x2f */ 508 0x0030, /* 0x30 */ 509 0x0031, /* 0x31 */ 510 0x0032, /* 0x32 */ 511 0x0033, /* 0x33 */ 512 0x0034, /* 0x34 */ 513 0x0035, /* 0x35 */ 514 0x0036, /* 0x36 */ 515 0x0037, /* 0x37 */ 516 0x0038, /* 0x38 */ 517 0x0039, /* 0x39 */ 518 0x003A, /* 0x3A */ 519 0x003B, /* 0x3B */ 520 0x003c, /* 0x3c */ 521 0x003d, /* 0x3d */ 522 0x003e, /* 0x3e */ 523 0x003f, /* 0x3f */ 524 0x0040, /* 0x40 */ 525 0x0041, /* 0x41 */ 526 0x0042, /* 0x42 */ 527 0x0043, /* 0x43 */ 528 0x0044, /* 0x44 */ 529 0x0045, /* 0x45 */ 530 0x0046, /* 0x46 */ 531 0x0047, /* 0x47 */ 532 0x0048, /* 0x48 */ 533 0x0049, /* 0x49 */ 534 0x004a, /* 0x4a */ 535 0x004b, /* 0x4b */ 536 0x004c, /* 0x4c */ 537 0x004d, /* 0x4d */ 538 0x004e, /* 0x4e */ 539 0x004f, /* 0x4f */ 540 0x0050, /* 0x50 */ 541 0x0051, /* 0x51 */ 542 0x0052, /* 0x52 */ 543 0x0053, /* 0x53 */ 544 0x0054, /* 0x54 */ 545 0x0055, /* 0x55 */ 546 0x0056, /* 0x56 */ 547 0x0057, /* 0x57 */ 548 0x0058, /* 0x58 */ 549 0x0059, /* 0x59 */ 550 0x005a, /* 0x5a */ 551 0x005b, /* 0x5b */ 552 0x005c, /* 0x5c */ 553 0x005d, /* 0x5d */ 554 0x005e, /* 0x5e */ 555 0x005f, /* 0x5f */ 556 0x0060, /* 0x60 */ 557 0x0061, /* 0x61 */ 558 0x0062, /* 0x62 */ 559 0x0063, /* 0x63 */ 560 0x0064, /* 0x64 */ 561 0x0065, /* 0x65 */ 562 0x0066, /* 0x66 */ 563 0x0067, /* 0x67 */ 564 0x0068, /* 0x68 */ 565 0x0069, /* 0x69 */ 566 0x006a, /* 0x6a */ 567 0x006b, /* 0x6b */ 568 0x006c, /* 0x6c */ 569 0x006d, /* 0x6d */ 570 0x006e, /* 0x6e */ 571 0x006f, /* 0x6f */ 572 0x0070, /* 0x70 */ 573 0x0071, /* 0x71 */ 574 0x0072, /* 0x72 */ 575 0x0073, /* 0x73 */ 576 0x0074, /* 0x74 */ 577 0x0075, /* 0x75 */ 578 0x0076, /* 0x76 */ 579 0x0077, /* 0x77 */ 580 0x0078, /* 0x78 */ 581 0x0079, /* 0x79 */ 582 0x007a, /* 0x7a */ 583 0x007b, /* 0x7b */ 584 0x007c, /* 0x7c */ 585 0x007d, /* 0x7d */ 586 0x007e, /* 0x7e */ 587 0x007f, /* 0x7f */ 588 0x0080, /* 0x80 */ 589 0x0081, /* 0x81 */ 590 0x0082, /* 0x82 */ 591 0x0083, /* 0x83 */ 592 0x0084, /* 0x84 */ 593 0x0085, /* 0x85 */ 594 0x0086, /* 0x86 */ 595 0x0087, /* 0x87 */ 596 0x0088, /* 0x88 */ 597 0x0089, /* 0x89 */ 598 0x008a, /* 0x8a */ 599 0x008b, /* 0x8b */ 600 0x008c, /* 0x8c */ 601 0x008d, /* 0x8d */ 602 0x008e, /* 0x8e */ 603 0x008f, /* 0x8f */ 604 0x0090, /* 0x90 */ 605 0x0091, /* 0x91 */ 606 0x0092, /* 0x92 */ 607 0x0093, /* 0x93 */ 608 0x0094, /* 0x94 */ 609 0x0095, /* 0x95 */ 610 0x0096, /* 0x96 */ 611 0x0097, /* 0x97 */ 612 0x0098, /* 0x98 */ 613 0x0099, /* 0x99 */ 614 0x009a, /* 0x9a */ 615 0x009b, /* 0x9b */ 616 0x009c, /* 0x9c */ 617 0x009d, /* 0x9d */ 618 0x009e, /* 0x9e */ 619 0x009f, /* 0x9f */ 620 0x00A0, /* 0xa0 */ 621 0x0901, /* 0xa1 */ 622 0x0902, /* 0xa2 */ 623 0x0903, /* 0xa3 */ 624 0x0905, /* 0xa4 */ 625 0x0906, /* 0xa5 */ 626 0x0907, /* 0xa6 */ 627 0x0908, /* 0xa7 */ 628 0x0909, /* 0xa8 */ 629 0x090a, /* 0xa9 */ 630 0x090b, /* 0xaa */ 631 0x090e, /* 0xab */ 632 0x090f, /* 0xac */ 633 0x0910, /* 0xad */ 634 0x090d, /* 0xae */ 635 0x0912, /* 0xaf */ 636 0x0913, /* 0xb0 */ 637 0x0914, /* 0xb1 */ 638 0x0911, /* 0xb2 */ 639 0x0915, /* 0xb3 */ 640 0x0916, /* 0xb4 */ 641 0x0917, /* 0xb5 */ 642 0x0918, /* 0xb6 */ 643 0x0919, /* 0xb7 */ 644 0x091a, /* 0xb8 */ 645 0x091b, /* 0xb9 */ 646 0x091c, /* 0xba */ 647 0x091d, /* 0xbb */ 648 0x091e, /* 0xbc */ 649 0x091f, /* 0xbd */ 650 0x0920, /* 0xbe */ 651 0x0921, /* 0xbf */ 652 0x0922, /* 0xc0 */ 653 0x0923, /* 0xc1 */ 654 0x0924, /* 0xc2 */ 655 0x0925, /* 0xc3 */ 656 0x0926, /* 0xc4 */ 657 0x0927, /* 0xc5 */ 658 0x0928, /* 0xc6 */ 659 0x0929, /* 0xc7 */ 660 0x092a, /* 0xc8 */ 661 0x092b, /* 0xc9 */ 662 0x092c, /* 0xca */ 663 0x092d, /* 0xcb */ 664 0x092e, /* 0xcc */ 665 0x092f, /* 0xcd */ 666 0x095f, /* 0xce */ 667 0x0930, /* 0xcf */ 668 0x0931, /* 0xd0 */ 669 0x0932, /* 0xd1 */ 670 0x0933, /* 0xd2 */ 671 0x0934, /* 0xd3 */ 672 0x0935, /* 0xd4 */ 673 0x0936, /* 0xd5 */ 674 0x0937, /* 0xd6 */ 675 0x0938, /* 0xd7 */ 676 0x0939, /* 0xd8 */ 677 0x200D, /* 0xd9 */ 678 0x093e, /* 0xda */ 679 0x093f, /* 0xdb */ 680 0x0940, /* 0xdc */ 681 0x0941, /* 0xdd */ 682 0x0942, /* 0xde */ 683 0x0943, /* 0xdf */ 684 0x0946, /* 0xe0 */ 685 0x0947, /* 0xe1 */ 686 0x0948, /* 0xe2 */ 687 0x0945, /* 0xe3 */ 688 0x094a, /* 0xe4 */ 689 0x094b, /* 0xe5 */ 690 0x094c, /* 0xe6 */ 691 0x0949, /* 0xe7 */ 692 0x094d, /* 0xe8 */ 693 0x093c, /* 0xe9 */ 694 0x0964, /* 0xea */ 695 0xFFFF, /* 0xeb */ 696 0xFFFF, /* 0xec */ 697 0xFFFF, /* 0xed */ 698 0xFFFF, /* 0xee */ 699 0xFFFF, /* 0xef */ 700 0xFFFF, /* 0xf0 */ 701 0x0966, /* 0xf1 */ 702 0x0967, /* 0xf2 */ 703 0x0968, /* 0xf3 */ 704 0x0969, /* 0xf4 */ 705 0x096a, /* 0xf5 */ 706 0x096b, /* 0xf6 */ 707 0x096c, /* 0xf7 */ 708 0x096d, /* 0xf8 */ 709 0x096e, /* 0xf9 */ 710 0x096f, /* 0xfa */ 711 0xFFFF, /* 0xfb */ 712 0xFFFF, /* 0xfc */ 713 0xFFFF, /* 0xfd */ 714 0xFFFF, /* 0xfe */ 715 0xFFFF, /* 0xff */ 716 }; 717 private static final char nuktaSpecialCases[][] = { 718 { 16 /* length of array */ , 0 }, 719 { 0xa6, 0x090c }, 720 { 0xea, 0x093d }, 721 { 0xdf, 0x0944 }, 722 { 0xa1, 0x0950 }, 723 { 0xb3, 0x0958 }, 724 { 0xb4, 0x0959 }, 725 { 0xb5, 0x095a }, 726 { 0xba, 0x095b }, 727 { 0xbf, 0x095c }, 728 { 0xc0, 0x095d }, 729 { 0xc9, 0x095e }, 730 { 0xaa, 0x0960 }, 731 { 0xa7, 0x0961 }, 732 { 0xdb, 0x0962 }, 733 { 0xdc, 0x0963 } 734 }; 735 private static final char vowelSignESpecialCases[][] = { 736 { 2 /* length of array */ , 0 }, 737 { 0xA4, 0x0904 } 738 }; 739 740 private static final short lookupTable[][] = { 741 { MaskEnum.ZERO, MaskEnum.ZERO }, /* DEFAULT */ 742 { MaskEnum.ZERO, MaskEnum.ZERO }, /* ROMAN */ 743 { UniLang.DEVALANGARI, MaskEnum.DEV_MASK }, 744 { UniLang.BENGALI, MaskEnum.BNG_MASK }, 745 { UniLang.TAMIL, MaskEnum.TML_MASK }, 746 { UniLang.TELUGU, MaskEnum.KND_MASK }, 747 { UniLang.BENGALI, MaskEnum.BNG_MASK }, 748 { UniLang.ORIYA, MaskEnum.ORI_MASK }, 749 { UniLang.KANNADA, MaskEnum.KND_MASK }, 750 { UniLang.MALAYALAM, MaskEnum.MLM_MASK }, 751 { UniLang.GUJARATI, MaskEnum.GJR_MASK }, 752 { UniLang.GURMUKHI, MaskEnum.PNJ_MASK } 753 }; 754 755 private UConverterDataISCII extraInfo = null; 756 protected byte[] fromUSubstitution = new byte[]{(byte)0x1A}; 757 CharsetISCII(String icuCanonicalName, String javaCanonicalName, String[] aliases)758 public CharsetISCII(String icuCanonicalName, String javaCanonicalName, String[] aliases) { 759 super(icuCanonicalName, javaCanonicalName, aliases); 760 maxBytesPerChar = 4; 761 minBytesPerChar = 1; 762 maxCharsPerByte = 1; 763 //get the version number of the ISCII converter 764 int option = Integer.parseInt(icuCanonicalName.substring(14)); 765 766 extraInfo = new UConverterDataISCII(option); 767 768 initializePNJSets(); 769 } 770 771 /* Initialize the two UnicodeSets use for proper Gurmukhi conversion if they have not already been created. */ initializePNJSets()772 private void initializePNJSets() { 773 if (PNJ_BINDI_TIPPI_SET != null && PNJ_CONSONANT_SET != null) { 774 return; 775 } 776 PNJ_BINDI_TIPPI_SET = new UnicodeSet(); 777 PNJ_CONSONANT_SET = new UnicodeSet(); 778 779 PNJ_CONSONANT_SET.add(0x0a15, 0x0a28); 780 PNJ_CONSONANT_SET.add(0x0a2a, 0x0a30); 781 PNJ_CONSONANT_SET.add(0x0a35, 0x0a36); 782 PNJ_CONSONANT_SET.add(0x0a38, 0x0a39); 783 784 PNJ_BINDI_TIPPI_SET.addAll(PNJ_CONSONANT_SET); 785 PNJ_BINDI_TIPPI_SET.add(0x0a05); 786 PNJ_BINDI_TIPPI_SET.add(0x0a07); 787 788 PNJ_BINDI_TIPPI_SET.add(0x0a41, 0x0a42); 789 PNJ_BINDI_TIPPI_SET.add(0x0a3f); 790 791 PNJ_CONSONANT_SET.compact(); 792 PNJ_BINDI_TIPPI_SET.compact(); 793 } 794 795 /* 796 * Rules for ISCII to Unicode converter 797 * ISCII is a stateful encoding. To convert ISCII bytes to Unicode, 798 * which is both precomposed and decomposed from characters 799 * pre-context and post-context need to be considered. 800 * 801 * Post context 802 * i) ATR : Attribute code is used to declare the font and script switching. 803 * Currently we only switch scripts and font codes consumed without generating an error 804 * ii) EXT : Extention code is used to declare switching to Sanskrit and for obscure, 805 * obsolete characters 806 * Pre context 807 * i) Halant: if preceeded by a halant then it is a explicit halant 808 * ii) Nukta: 809 * a) if preceeded by a halant then it is a soft halant 810 * b) if preceeded by specific consonants and the ligatures have pre-composed 811 * characters in Unicode then convert to pre-composed characters 812 * iii) Danda: If Danda is preceeded by a Danda then convert to Double Danda 813 */ 814 class CharsetDecoderISCII extends CharsetDecoderICU { CharsetDecoderISCII(CharsetICU cs)815 public CharsetDecoderISCII(CharsetICU cs) { 816 super(cs); 817 implReset(); 818 } 819 820 @Override implReset()821 protected void implReset() { 822 super.implReset(); 823 this.toUnicodeStatus = 0xFFFF; 824 extraInfo.initialize(); 825 } 826 827 @Override 828 @SuppressWarnings("fallthrough") decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush)829 protected CoderResult decodeLoop(ByteBuffer source, CharBuffer target, IntBuffer offsets, boolean flush) { 830 CoderResult cr = CoderResult.UNDERFLOW; 831 int targetUniChar = 0x0000; 832 short sourceChar = 0x0000; 833 UConverterDataISCII data; 834 boolean gotoCallBack = false; 835 int offset = 0; 836 837 data = extraInfo; 838 //data.contextCharToUnicode; /* contains previous ISCII codepoint visited */ 839 //this.toUnicodeStatus; /* contains the mapping to Unicode of the above codepoint */ 840 841 while (source.hasRemaining()) { 842 targetUniChar = UConverterConstants.missingCharMarker; 843 844 if (target.hasRemaining()) { 845 sourceChar = (short)(source.get() & UConverterConstants.UNSIGNED_BYTE_MASK); 846 847 /* look at the post-context perform special processing */ 848 if (data.contextCharToUnicode == ATR) { 849 /* If we have ATR in data.contextCharToUnicode then we need to change our 850 * state to Indic Script specified by sourceChar 851 */ 852 /* check if the sourceChar is supported script range */ 853 if (((short)(ISCIILang.PNJ - sourceChar) & UConverterConstants.UNSIGNED_BYTE_MASK) <= (ISCIILang.PNJ - ISCIILang.DEV)) { 854 data.currentDeltaToUnicode = (short)(lookupTable[sourceChar & 0x0F][0] * UniLang.DELTA); 855 data.currentMaskToUnicode = lookupTable[sourceChar & 0x0F][1]; 856 } else if (sourceChar == ISCIILang.DEF) { 857 /* switch back to default */ 858 data.currentDeltaToUnicode = data.defDeltaToUnicode; 859 data.currentMaskToUnicode = data.defMaskToUnicode; 860 } else { 861 if ((sourceChar >= 0x21 && sourceChar <= 0x3F)) { 862 /* these are display codes consume and continue */ 863 } else { 864 cr = CoderResult.malformedForLength(1); 865 /* reset */ 866 data.contextCharToUnicode = NO_CHAR_MARKER; 867 gotoCallBack = true; 868 } 869 } 870 /* reset */ 871 if (!gotoCallBack) { 872 data.contextCharToUnicode = NO_CHAR_MARKER; 873 continue; 874 } 875 } else if (data.contextCharToUnicode == EXT) { 876 /* check if sourceChar is in 0xA1 - 0xEE range */ 877 if (((short)(EXT_RANGE_END - sourceChar) & UConverterConstants.UNSIGNED_BYTE_MASK) <= (EXT_RANGE_END - EXT_RANGE_BEGIN)) { 878 /* We currently support only Anudatta and Devanagari abbreviation sign */ 879 if (sourceChar == 0xBF || sourceChar == 0xB8) { 880 targetUniChar = (sourceChar == 0xBF) ? DEV_ABBR_SIGN : DEV_ANUDATTA; 881 882 /* find out if the mappling is valid in this state */ 883 if ((validityTable[((short)targetUniChar) & UConverterConstants.UNSIGNED_BYTE_MASK] & data.currentMaskToUnicode) > 0) { 884 data.contextCharToUnicode = NO_CHAR_MARKER; 885 886 /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ 887 if (data.prevToUnicodeStatus != 0) { 888 cr = WriteToTargetToU(offsets, (source.position() - 1), source, target, data.prevToUnicodeStatus, (short)0); 889 data.prevToUnicodeStatus = 0x0000; 890 } 891 /* write to target */ 892 cr = WriteToTargetToU(offsets, (source.position() - 2), source, target, targetUniChar, data.currentDeltaToUnicode); 893 894 continue; 895 } 896 } 897 /* byte unit is unassigned */ 898 targetUniChar = UConverterConstants.missingCharMarker; 899 cr = CoderResult.unmappableForLength(1); 900 } else { 901 /* only 0xA1 - 0xEE are legal after EXT char */ 902 data.contextCharToUnicode = NO_CHAR_MARKER; 903 cr = CoderResult.malformedForLength(1); 904 } 905 gotoCallBack = true; 906 } else if (data.contextCharToUnicode == ISCII_INV) { 907 if (sourceChar == ISCII_HALANT) { 908 targetUniChar = 0x0020; /* replace with space according to Indic FAQ */ 909 } else { 910 targetUniChar = ZWJ; 911 } 912 913 /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ 914 if (data.prevToUnicodeStatus != 0) { 915 cr = WriteToTargetToU(offsets, (source.position() - 1), source, target, data.prevToUnicodeStatus, (short)0); 916 data.prevToUnicodeStatus = 0x0000; 917 } 918 919 /* write to target */ 920 cr = WriteToTargetToU(offsets, (source.position() - 2), source, target, targetUniChar, data.currentDeltaToUnicode); 921 /* reset */ 922 data.contextCharToUnicode = NO_CHAR_MARKER; 923 } 924 925 /* look at the pre-context and perform special processing */ 926 if (!gotoCallBack) { 927 switch (sourceChar) { 928 case ISCII_INV: 929 case EXT: /* falls through */ 930 case ATR: 931 data.contextCharToUnicode = (char)sourceChar; 932 933 if (this.toUnicodeStatus != UConverterConstants.missingCharMarker) { 934 /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ 935 if (data.prevToUnicodeStatus != 0) { 936 cr = WriteToTargetToU(offsets, (source.position() - 1), source, target, data.prevToUnicodeStatus, (short)0); 937 data.prevToUnicodeStatus = 0x0000; 938 } 939 cr = WriteToTargetToU(offsets, (source.position() - 2), source, target, this.toUnicodeStatus, data.currentDeltaToUnicode); 940 this.toUnicodeStatus = UConverterConstants.missingCharMarker; 941 } 942 continue; 943 case ISCII_DANDA: 944 /* handle double danda */ 945 if (data.contextCharToUnicode == ISCII_DANDA) { 946 targetUniChar = DOUBLE_DANDA; 947 /* clear the context */ 948 data.contextCharToUnicode = NO_CHAR_MARKER; 949 this.toUnicodeStatus = UConverterConstants.missingCharMarker; 950 } else { 951 targetUniChar = GetMapping(sourceChar, targetUniChar, data); 952 data.contextCharToUnicode = (char)sourceChar; 953 } 954 break; 955 case ISCII_HALANT: 956 /* handle explicit halant */ 957 if (data.contextCharToUnicode == ISCII_HALANT) { 958 targetUniChar = ZWNJ; 959 /* clear context */ 960 data.contextCharToUnicode = NO_CHAR_MARKER; 961 } else { 962 targetUniChar = GetMapping(sourceChar, targetUniChar, data); 963 data.contextCharToUnicode = (char)sourceChar; 964 } 965 break; 966 case 0x0A: 967 /* fall through */ 968 case 0x0D: 969 data.resetToDefaultToUnicode = true; 970 targetUniChar = GetMapping(sourceChar, targetUniChar, data); 971 data.contextCharToUnicode = (char)sourceChar; 972 break; 973 case ISCII_VOWEL_SIGN_E: 974 /* find <CHAR> + SIGN_VOWEL_E special mapping */ 975 int n = 1; 976 boolean find = false; 977 for (; n < vowelSignESpecialCases[0][0]; n++) { 978 if (vowelSignESpecialCases[n][0] == ((short)data.contextCharToUnicode & UConverterConstants.UNSIGNED_BYTE_MASK)) { 979 targetUniChar = vowelSignESpecialCases[n][1]; 980 find = true; 981 break; 982 } 983 } 984 if (find) { 985 /* find out if the mapping is valid in this state */ 986 if ((validityTable[(byte)targetUniChar] & data.currentMaskFromUnicode) > 0) { 987 data.contextCharToUnicode = NO_CHAR_MARKER; 988 this.toUnicodeStatus = UConverterConstants.missingCharMarker; 989 break; 990 } 991 } 992 targetUniChar = GetMapping(sourceChar, targetUniChar, data); 993 data.contextCharToUnicode = (char)sourceChar; 994 break; 995 case ISCII_NUKTA: 996 /* handle soft halant */ 997 if (data.contextCharToUnicode == ISCII_HALANT) { 998 targetUniChar = ZWJ; 999 /* clear the context */ 1000 data.contextCharToUnicode = NO_CHAR_MARKER; 1001 break; 1002 } else if (data.currentDeltaToUnicode == PNJ_DELTA && data.contextCharToUnicode == 0xc0) { 1003 /* We got here because ISCII_NUKTA was preceded by 0xc0 and we are converting Gurmukhi. 1004 * In that case we must convert (0xc0 0xe9) to (\u0a5c\u0a4d\u0a39). 1005 * WriteToTargetToU is given 0x095c instead of 0xa5c because that method will automatically 1006 * convert the code point given based on the delta provided. 1007 */ 1008 cr = WriteToTargetToU(offsets, (source.position() - 2), source, target, PNJ_RRA, (short)0); 1009 if (!cr.isOverflow()) { 1010 cr = WriteToTargetToU(offsets, (source.position() - 2), source, target, PNJ_SIGN_VIRAMA, (short)0); 1011 if (!cr.isOverflow()) { 1012 cr = WriteToTargetToU(offsets, (source.position() - 2), source, target, PNJ_HA, (short)0); 1013 } else { 1014 this.charErrorBufferArray[this.charErrorBufferLength++] = PNJ_HA; 1015 } 1016 } else { 1017 this.charErrorBufferArray[this.charErrorBufferLength++] = PNJ_SIGN_VIRAMA; 1018 this.charErrorBufferArray[this.charErrorBufferLength++] = PNJ_HA; 1019 } 1020 this.toUnicodeStatus = UConverterConstants.missingCharMarker; 1021 data.contextCharToUnicode = NO_CHAR_MARKER; 1022 if (!cr.isError()) { 1023 continue; 1024 } 1025 break; 1026 } else { 1027 /* try to handle <CHAR> + ISCII_NUKTA special mappings */ 1028 int i = 1; 1029 boolean found = false; 1030 for (; i < nuktaSpecialCases[0][0]; i++) { 1031 if (nuktaSpecialCases[i][0] == ((short)data.contextCharToUnicode & UConverterConstants.UNSIGNED_BYTE_MASK)) { 1032 targetUniChar = nuktaSpecialCases[i][1]; 1033 found = true; 1034 break; 1035 } 1036 } 1037 if (found) { 1038 /* find out if the mapping is valid in this state */ 1039 if ((validityTable[(byte)targetUniChar] & data.currentMaskToUnicode) > 0) { 1040 data.contextCharToUnicode = NO_CHAR_MARKER; 1041 this.toUnicodeStatus = UConverterConstants.missingCharMarker; 1042 if (data.currentDeltaToUnicode == PNJ_DELTA) { 1043 /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ 1044 if (data.prevToUnicodeStatus != 0) { 1045 cr = WriteToTargetToU(offsets, (source.position() - 1), source, target, data.prevToUnicodeStatus, (short)0); 1046 data.prevToUnicodeStatus = 0x0000; 1047 } 1048 cr = WriteToTargetToU(offsets, (source.position() - 2), source, target, targetUniChar, data.currentDeltaToUnicode); 1049 continue; 1050 } 1051 break; 1052 } 1053 /* else fall through to default */ 1054 } 1055 /* else fall through to default */ 1056 } 1057 1058 default: 1059 targetUniChar = GetMapping(sourceChar, targetUniChar, data); 1060 data.contextCharToUnicode = (char)sourceChar; 1061 break; 1062 } //end of switch 1063 }//end of CallBack if statement 1064 1065 if (!gotoCallBack && this.toUnicodeStatus != UConverterConstants.missingCharMarker) { 1066 /* Check to make sure that consonant clusters are handled correctly for Gurmukhi script. */ 1067 if (data.currentDeltaToUnicode == PNJ_DELTA && data.prevToUnicodeStatus != 0 && PNJ_CONSONANT_SET.contains(data.prevToUnicodeStatus) && 1068 (this.toUnicodeStatus + PNJ_DELTA) == PNJ_SIGN_VIRAMA && (targetUniChar + PNJ_DELTA) == data.prevToUnicodeStatus) { 1069 if (offsets != null) { 1070 offset = source.position() - 3; 1071 } 1072 cr = WriteToTargetToU(offsets, offset, source, target, PNJ_ADHAK, (short)0); 1073 cr = WriteToTargetToU(offsets, offset, source, target, data.prevToUnicodeStatus, (short)0); 1074 data.prevToUnicodeStatus = 0x0000; /* reset the previous unicode code point */ 1075 toUnicodeStatus = UConverterConstants.missingCharMarker; 1076 continue; 1077 } else { 1078 /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ 1079 if (data.prevToUnicodeStatus != 0) { 1080 cr = WriteToTargetToU(offsets, (source.position() - 1), source, target, data.prevToUnicodeStatus, (short)0); 1081 data.prevToUnicodeStatus = 0x0000; 1082 } 1083 /* Check to make sure that Bindi and Tippi are handled correctly for Gurmukhi script. 1084 * If 0xA2 is preceded by a codepoint in the PNJ_BINDI_TIPPI_SET then the target codepoint should be Tippi instead of Bindi. 1085 */ 1086 if (data.currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_BINDI && PNJ_BINDI_TIPPI_SET.contains(this.toUnicodeStatus + PNJ_DELTA)) { 1087 targetUniChar = PNJ_TIPPI - PNJ_DELTA; 1088 cr = WriteToTargetToU(offsets, (source.position() - 2), source, target, this.toUnicodeStatus, PNJ_DELTA); 1089 } else if (data.currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_SIGN_VIRAMA && PNJ_CONSONANT_SET.contains(this.toUnicodeStatus + PNJ_DELTA)) { 1090 /* Store the current toUnicodeStatus code point for later handling of consonant cluster in Gurmukhi. */ 1091 data.prevToUnicodeStatus = this.toUnicodeStatus + PNJ_DELTA; 1092 } else { 1093 /* write the previously mapped codepoint */ 1094 cr = WriteToTargetToU(offsets, (source.position() - 2), source, target, this.toUnicodeStatus, data.currentDeltaToUnicode); 1095 } 1096 } 1097 this.toUnicodeStatus = UConverterConstants.missingCharMarker; 1098 } 1099 1100 if (!gotoCallBack && targetUniChar != UConverterConstants.missingCharMarker) { 1101 /* now save the targetUniChar for delayed write */ 1102 this.toUnicodeStatus = (char)targetUniChar; 1103 if (data.resetToDefaultToUnicode) { 1104 data.currentDeltaToUnicode = data.defDeltaToUnicode; 1105 data.currentMaskToUnicode = data.defMaskToUnicode; 1106 data.resetToDefaultToUnicode = false; 1107 } 1108 } else { 1109 /* we reach here only if targetUniChar == missingCharMarker 1110 * so assign codes to reason and err 1111 */ 1112 if (!gotoCallBack) { 1113 cr = CoderResult.unmappableForLength(1); 1114 } 1115 //CallBack : 1116 toUBytesArray[0] = (byte)sourceChar; 1117 toULength = 1; 1118 gotoCallBack = false; 1119 break; 1120 } 1121 } else { 1122 cr = CoderResult.OVERFLOW; 1123 break; 1124 } 1125 1126 } //end of while 1127 1128 if (cr.isUnderflow() && flush && !source.hasRemaining()) { 1129 /*end of the input stream */ 1130 if (data.contextCharToUnicode == ATR || data.contextCharToUnicode == EXT || data.contextCharToUnicode == ISCII_INV) { 1131 /* set toUBytes[] */ 1132 toUBytesArray[0] = (byte)data.contextCharToUnicode; 1133 toULength = 1; 1134 1135 /* avoid looping on truncated sequences */ 1136 data.contextCharToUnicode = NO_CHAR_MARKER; 1137 } else { 1138 toULength = 0; 1139 } 1140 1141 if (this.toUnicodeStatus != UConverterConstants.missingCharMarker) { 1142 /* output a remaining target character */ 1143 WriteToTargetToU(offsets, (source.position() - 2), source, target, this.toUnicodeStatus, data.currentDeltaToUnicode); 1144 this.toUnicodeStatus = UConverterConstants.missingCharMarker; 1145 } 1146 } 1147 return cr; 1148 } 1149 WriteToTargetToU(IntBuffer offsets, int offset, ByteBuffer source, CharBuffer target, int targetUniChar, short delta)1150 private CoderResult WriteToTargetToU(IntBuffer offsets, int offset, ByteBuffer source, CharBuffer target, int targetUniChar, short delta) { 1151 CoderResult cr = CoderResult.UNDERFLOW; 1152 /* add offset to current Indic Block */ 1153 if (targetUniChar > ASCII_END && 1154 targetUniChar != ZWJ && 1155 targetUniChar != ZWNJ && 1156 targetUniChar != DANDA && 1157 targetUniChar != DOUBLE_DANDA) { 1158 targetUniChar += delta; 1159 } 1160 1161 /* now write the targetUniChar */ 1162 if (target.hasRemaining()) { 1163 target.put((char)targetUniChar); 1164 if (offsets != null) { 1165 offsets.put(offset); 1166 } 1167 } else { 1168 charErrorBufferArray[charErrorBufferLength++] = (char)targetUniChar; 1169 cr = CoderResult.OVERFLOW; 1170 } 1171 return cr; 1172 } 1173 GetMapping(short sourceChar, int targetUniChar, UConverterDataISCII data)1174 private int GetMapping(short sourceChar, int targetUniChar, UConverterDataISCII data) { 1175 targetUniChar = toUnicodeTable[sourceChar]; 1176 /* is the code point valid in current script? */ 1177 if (sourceChar > ASCII_END && 1178 (validityTable[targetUniChar & 0x7F] & data.currentMaskToUnicode) == 0) { 1179 /* Vocallic RR is assigne in ISCII Telugu and Unicode */ 1180 if (data.currentDeltaToUnicode != (TELUGU_DELTA) || targetUniChar != VOCALLIC_RR) { 1181 targetUniChar = UConverterConstants.missingCharMarker; 1182 } 1183 } 1184 return targetUniChar; 1185 } 1186 } 1187 1188 /* 1189 * Rules: 1190 * Explicit Halant : 1191 * <HALANT> + <ZWNJ> 1192 * Soft Halant : 1193 * <HALANT> + <ZWJ> 1194 */ 1195 class CharsetEncoderISCII extends CharsetEncoderICU { CharsetEncoderISCII(CharsetICU cs)1196 public CharsetEncoderISCII(CharsetICU cs) { 1197 super(cs, fromUSubstitution); 1198 implReset(); 1199 } 1200 1201 @Override implReset()1202 protected void implReset() { 1203 super.implReset(); 1204 extraInfo.initialize(); 1205 } 1206 1207 @Override encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush)1208 protected CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush) { 1209 int targetByteUnit = 0x0000; 1210 int sourceChar = 0x0000; 1211 UConverterDataISCII converterData; 1212 short newDelta = 0; 1213 short range = 0; 1214 boolean deltaChanged = false; 1215 int tempContextFromUnicode = 0x0000; /* For special handling of the Gurmukhi script. */ 1216 CoderResult cr = CoderResult.UNDERFLOW; 1217 1218 /* initialize data */ 1219 converterData = extraInfo; 1220 newDelta = converterData.currentDeltaFromUnicode; 1221 range = (short)(newDelta / UniLang.DELTA); 1222 1223 if ((sourceChar = fromUChar32) != 0) { 1224 cr = handleSurrogates(source, (char) sourceChar); 1225 return (cr != null) ? cr : CoderResult.unmappableForLength(2); 1226 } 1227 1228 /* writing the char to the output stream */ 1229 while (source.hasRemaining()) { 1230 if (!target.hasRemaining()) { 1231 return CoderResult.OVERFLOW; 1232 } 1233 1234 /* Write the language code following LF only if LF is not the last character. */ 1235 if (fromUnicodeStatus == LF) { 1236 targetByteUnit = ATR << 8; 1237 targetByteUnit += 0xff & (byte)lookupInitialData[range].isciiLang; 1238 fromUnicodeStatus = 0x0000; 1239 /* now append ATR and language code */ 1240 cr = WriteToTargetFromU(offsets, source, target, targetByteUnit); 1241 if (cr.isOverflow()) { 1242 break; 1243 } 1244 } 1245 1246 sourceChar = source.get(); 1247 tempContextFromUnicode = converterData.contextCharFromUnicode; 1248 1249 targetByteUnit = UConverterConstants.missingCharMarker; 1250 1251 /* check if input is in ASCII and C0 control codes range */ 1252 if (sourceChar <= ASCII_END) { 1253 fromUnicodeStatus = sourceChar; 1254 cr = WriteToTargetFromU(offsets, source, target, sourceChar); 1255 if (cr.isOverflow()) { 1256 break; 1257 } 1258 continue; 1259 } 1260 1261 switch (sourceChar) { 1262 case ZWNJ: 1263 /* contextChar has HALANT */ 1264 if (converterData.contextCharFromUnicode != 0) { 1265 converterData.contextCharFromUnicode = 0x00; 1266 targetByteUnit = ISCII_HALANT; 1267 } else { 1268 /* consume ZWNJ and continue */ 1269 converterData.contextCharFromUnicode = 0x00; 1270 continue; 1271 } 1272 break; 1273 case ZWJ: 1274 /* contextChar has HALANT */ 1275 if (converterData.contextCharFromUnicode != 0) { 1276 targetByteUnit = ISCII_NUKTA; 1277 } else { 1278 targetByteUnit = ISCII_INV; 1279 } 1280 converterData.contextCharFromUnicode = 0x00; 1281 break; 1282 default: 1283 /* is the sourceChar in the INDIC_RANGE? */ 1284 if((char)(INDIC_BLOCK_END - sourceChar) <= INDIC_RANGE) { 1285 /* Danda and Doube Danda are valid in Northern scripts.. since Unicode 1286 * does not include these codepoints in all Northern scripts we need to 1287 * filter them out 1288 */ 1289 if (sourceChar != DANDA && sourceChar != DOUBLE_DANDA) { 1290 /* find out to which block the sourceChar belongs */ 1291 range = (short)((sourceChar - INDIC_BLOCK_BEGIN) / UniLang.DELTA); 1292 newDelta = (short)(range * UniLang.DELTA); 1293 1294 /* Now are we in the same block as previous? */ 1295 if (newDelta != converterData.currentDeltaFromUnicode || converterData.isFirstBuffer) { 1296 converterData.currentDeltaFromUnicode = newDelta; 1297 converterData.currentMaskFromUnicode = lookupInitialData[range].maskEnum; 1298 deltaChanged = true; 1299 converterData.isFirstBuffer = false; 1300 } 1301 if (converterData.currentDeltaFromUnicode == PNJ_DELTA) { 1302 if (sourceChar == PNJ_TIPPI) { 1303 /* Make sure Tippi is converterd to Bindi. */ 1304 sourceChar = PNJ_BINDI; 1305 } else if (sourceChar == PNJ_ADHAK) { 1306 /* This is for consonant cluster handling. */ 1307 converterData.contextCharFromUnicode = PNJ_ADHAK; 1308 } 1309 } 1310 /* Normalize all Indic codepoints to Devanagari and map them to ISCII */ 1311 /* now subtract the new delta from sourceChar */ 1312 sourceChar -= converterData.currentDeltaFromUnicode; 1313 } 1314 /* get the target byte unit */ 1315 targetByteUnit = fromUnicodeTable[(short)sourceChar & UConverterConstants.UNSIGNED_BYTE_MASK]; 1316 1317 /* is the code point valid in current script? */ 1318 if ((validityTable[(short)sourceChar & UConverterConstants.UNSIGNED_BYTE_MASK] & converterData.currentMaskFromUnicode) == 0) { 1319 /* Vocallic RR is assigned in ISCII Telugu and Unicode */ 1320 if (converterData.currentDeltaFromUnicode != (TELUGU_DELTA) || sourceChar != VOCALLIC_RR) { 1321 targetByteUnit = UConverterConstants.missingCharMarker; 1322 } 1323 } 1324 1325 if (deltaChanged) { 1326 /* we are in a script block which is different than 1327 * previous sourceChar's script block write ATR and language codes 1328 */ 1329 char temp = 0; 1330 temp = (char)(ATR << 8); 1331 temp += (char)(lookupInitialData[range].isciiLang & UConverterConstants.UNSIGNED_BYTE_MASK); 1332 /* reset */ 1333 deltaChanged = false; 1334 /* now append ATR and language code */ 1335 cr = WriteToTargetFromU(offsets, source, target, temp); 1336 if (cr.isOverflow()) { 1337 break; 1338 } 1339 } 1340 if (converterData.currentDeltaFromUnicode == PNJ_DELTA && (sourceChar + PNJ_DELTA) == PNJ_ADHAK) { 1341 continue; 1342 } 1343 } 1344 /* reset context char */ 1345 converterData.contextCharFromUnicode = 0x00; 1346 break; 1347 } //end of switch 1348 if (converterData.currentDeltaFromUnicode == PNJ_DELTA && tempContextFromUnicode == PNJ_ADHAK && PNJ_CONSONANT_SET.contains(sourceChar + PNJ_DELTA)) { 1349 /* If the previous codepoint is Adhak and the current codepoint is a consonant, the targetByteUnit should be C + Halant + C. */ 1350 /* reset context char */ 1351 converterData.contextCharFromUnicode = 0x0000; 1352 targetByteUnit = targetByteUnit << 16 | ISCII_HALANT << 8 | targetByteUnit; 1353 /*write targetByteUnit to target */ 1354 cr = WriteToTargetFromU(offsets, source, target, targetByteUnit); 1355 if (cr.isOverflow()) { 1356 break; 1357 } 1358 } else if (targetByteUnit != UConverterConstants.missingCharMarker) { 1359 if (targetByteUnit == ISCII_HALANT) { 1360 converterData.contextCharFromUnicode = (char)targetByteUnit; 1361 } 1362 /*write targetByteUnit to target */ 1363 cr = WriteToTargetFromU(offsets, source, target, targetByteUnit); 1364 if (cr.isOverflow()) { 1365 break; 1366 } 1367 } else if (UTF16.isSurrogate((char)sourceChar)) { 1368 cr = handleSurrogates(source, (char) sourceChar); 1369 return (cr != null) ? cr : CoderResult.unmappableForLength(2); 1370 } else { 1371 return CoderResult.unmappableForLength(1); 1372 } 1373 } /* end of while */ 1374 1375 /* save the state and return */ 1376 return cr; 1377 } 1378 WriteToTargetFromU(IntBuffer offsets, CharBuffer source, ByteBuffer target, int targetByteUnit)1379 private CoderResult WriteToTargetFromU(IntBuffer offsets, CharBuffer source, ByteBuffer target, int targetByteUnit) { 1380 CoderResult cr = CoderResult.UNDERFLOW; 1381 int offset = source.position() - 1; 1382 /* write the targetUniChar to target */ 1383 if (target.hasRemaining()) { 1384 if (targetByteUnit <= 0xFF) { 1385 target.put((byte)targetByteUnit); 1386 if (offsets != null) { 1387 offsets.put(offset); 1388 } 1389 } else { 1390 if (targetByteUnit > 0xFFFF) { 1391 target.put((byte)(targetByteUnit >> 16)); 1392 if (offsets != null) { 1393 --offset; 1394 offsets.put(offset); 1395 } 1396 } 1397 if (!target.hasRemaining()) { 1398 errorBuffer[errorBufferLength++] = (byte)(targetByteUnit >> 8); 1399 errorBuffer[errorBufferLength++] = (byte)targetByteUnit; 1400 cr = CoderResult.OVERFLOW; 1401 return cr; 1402 } 1403 target.put((byte)(targetByteUnit >> 8)); 1404 if (offsets != null) { 1405 offsets.put(offset); 1406 } 1407 if (target.hasRemaining()) { 1408 target.put((byte)targetByteUnit); 1409 if (offsets != null) { 1410 offsets.put(offset); 1411 } 1412 } else { 1413 errorBuffer[errorBufferLength++] = (byte)targetByteUnit; 1414 cr = CoderResult.OVERFLOW; 1415 } 1416 } 1417 } else { 1418 if ((targetByteUnit > 0xFFFF)) { 1419 errorBuffer[errorBufferLength++] = (byte)(targetByteUnit >> 16); 1420 } else if ((targetByteUnit & 0xFF00) > 0) { 1421 errorBuffer[errorBufferLength++] = (byte)(targetByteUnit >> 8); 1422 } 1423 errorBuffer[errorBufferLength++] = (byte)(targetByteUnit); 1424 cr = CoderResult.OVERFLOW; 1425 } 1426 return cr; 1427 } 1428 } 1429 1430 @Override newDecoder()1431 public CharsetDecoder newDecoder() { 1432 return new CharsetDecoderISCII(this); 1433 } 1434 1435 @Override newEncoder()1436 public CharsetEncoder newEncoder() { 1437 return new CharsetEncoderISCII(this); 1438 } 1439 1440 @Override getUnicodeSetImpl( UnicodeSet setFillIn, int which)1441 void getUnicodeSetImpl( UnicodeSet setFillIn, int which){ 1442 int idx,script; 1443 char mask; 1444 1445 setFillIn.add(0,ASCII_END ); 1446 for(script = UniLang.DEVALANGARI ; script<= UniLang.MALAYALAM ;script++){ 1447 mask = (char)lookupInitialData[script].maskEnum; 1448 for(idx=0; idx < UniLang.DELTA ; idx++){ 1449 // Special check for telugu character 1450 if((validityTable[idx] & mask)!=0 || (script == UniLang.TELUGU && idx==0x31)){ 1451 setFillIn.add(idx+(script*UniLang.DELTA)+INDIC_BLOCK_BEGIN ); 1452 } 1453 } 1454 } 1455 setFillIn.add(DANDA); 1456 setFillIn.add(DOUBLE_DANDA); 1457 setFillIn.add(ZWNJ); 1458 setFillIn.add(ZWJ); 1459 1460 } 1461 } 1462