1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 ******************************************************************************* 5 * Copyright (C) 1996-2016, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 */ 9 10 package com.ibm.icu.impl; 11 12 import java.io.IOException; 13 import java.nio.ByteBuffer; 14 import java.util.Iterator; 15 import java.util.MissingResourceException; 16 17 import com.ibm.icu.lang.UCharacter; 18 import com.ibm.icu.lang.UCharacter.HangulSyllableType; 19 import com.ibm.icu.lang.UCharacter.NumericType; 20 import com.ibm.icu.lang.UCharacterCategory; 21 import com.ibm.icu.lang.UProperty; 22 import com.ibm.icu.lang.UScript; 23 import com.ibm.icu.text.Normalizer2; 24 import com.ibm.icu.text.UTF16; 25 import com.ibm.icu.text.UnicodeSet; 26 import com.ibm.icu.util.CodePointMap; 27 import com.ibm.icu.util.CodePointTrie; 28 import com.ibm.icu.util.ICUException; 29 import com.ibm.icu.util.VersionInfo; 30 31 /** 32 * <p>Internal class used for Unicode character property database.</p> 33 * <p>This classes store binary data read from uprops.icu. 34 * It does not have the capability to parse the data into more high-level 35 * information. It only returns bytes of information when required.</p> 36 * <p>Due to the form most commonly used for retrieval, array of char is used 37 * to store the binary data.</p> 38 * <p>UCharacterPropertyDB also contains information on accessing indexes to 39 * significant points in the binary data.</p> 40 * <p>Responsibility for molding the binary data into more meaning form lies on 41 * <a href=UCharacter.html>UCharacter</a>.</p> 42 * @author Syn Wee Quek 43 * @since release 2.1, february 1st 2002 44 */ 45 46 public final class UCharacterProperty 47 { 48 // public data members ----------------------------------------------- 49 50 /* 51 * public singleton instance 52 */ 53 public static final UCharacterProperty INSTANCE; 54 55 /** 56 * Trie data 57 */ 58 public Trie2_16 m_trie_; 59 /** 60 * Unicode version 61 */ 62 public VersionInfo m_unicodeVersion_; 63 /** 64 * Latin capital letter i with dot above 65 */ 66 public static final char LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE_ = 0x130; 67 /** 68 * Latin small letter i with dot above 69 */ 70 public static final char LATIN_SMALL_LETTER_DOTLESS_I_ = 0x131; 71 /** 72 * Latin lowercase i 73 */ 74 public static final char LATIN_SMALL_LETTER_I_ = 0x69; 75 /** 76 * Character type mask 77 */ 78 public static final int TYPE_MASK = 0x1F; 79 80 // uprops.h enum UPropertySource --------------------------------------- *** 81 82 /** No source, not a supported property. */ 83 public static final int SRC_NONE=0; 84 /** From uchar.c/uprops.icu main trie */ 85 public static final int SRC_CHAR=1; 86 /** From uchar.c/uprops.icu properties vectors trie */ 87 public static final int SRC_PROPSVEC=2; 88 /** From unames.c/unames.icu */ 89 public static final int SRC_NAMES=3; 90 /** From ucase.c/ucase.icu */ 91 public static final int SRC_CASE=4; 92 /** From ubidi_props.c/ubidi.icu */ 93 public static final int SRC_BIDI=5; 94 /** From uchar.c/uprops.icu main trie as well as properties vectors trie */ 95 public static final int SRC_CHAR_AND_PROPSVEC=6; 96 /** From ucase.c/ucase.icu as well as unorm.cpp/unorm.icu */ 97 public static final int SRC_CASE_AND_NORM=7; 98 /** From normalizer2impl.cpp/nfc.nrm */ 99 public static final int SRC_NFC=8; 100 /** From normalizer2impl.cpp/nfkc.nrm */ 101 public static final int SRC_NFKC=9; 102 /** From normalizer2impl.cpp/nfkc_cf.nrm */ 103 public static final int SRC_NFKC_CF=10; 104 /** From normalizer2impl.cpp/nfc.nrm canonical iterator data */ 105 public static final int SRC_NFC_CANON_ITER=11; 106 // Text layout properties. 107 public static final int SRC_INPC=12; 108 public static final int SRC_INSC=13; 109 public static final int SRC_VO=14; 110 /** One more than the highest UPropertySource (SRC_) constant. */ 111 public static final int SRC_COUNT=15; 112 113 // hardcoded text layout properties ---------------------------------- 114 // TODO(ICU-20111): move to a data file and load on demand 115 makeTrie(String data)116 private static final CodePointTrie makeTrie(String data) { 117 // One char == one byte. 118 // U+0000 and U+007A='z' are swapped because 119 // Java class String literals encode U+0000 and U+0080..U+07FF in two bytes. 120 byte[] bytes = new byte[data.length()]; 121 for (int i = 0; i < bytes.length; ++i) { 122 char c = data.charAt(i); 123 if (c == 0) { 124 c = 'z'; 125 } else if (c == 'z') { 126 c = 0; 127 } 128 assert 0 <= c && c <= 0xff; 129 bytes[i] = (byte)c; 130 } 131 return CodePointTrie.fromBinary(null, null, ByteBuffer.wrap(bytes)); 132 } 133 134 // Do not store the data in static String variables because 135 // those would not be garbage-collected. 136 137 private static final class InPCTrie { 138 static final CodePointTrie INSTANCE = makeTrie( 139 "\63\151\162\124\102z\375\2\162\13\2zzz\220z" + 140 "zz\100zzzzzzzzzzzzz" + 141 "zzzzzzzzzzzzzzzz" + 142 "zzzzzzzzzzzzzzzz" + 143 "zzzzzzzzzzzzzzzz" + 144 "zzzzzzzz\200z\300z\377z\77\1" + 145 "\176\1\276\1\176\1\376\1\76\2\176\2\274\2\374\2" + 146 "\74\3\173\3\76\2\273\3\373\3\71\4\167\4\255\4" + 147 "\341\4\41\5\61\5\161\5\231\5\331\5\31\6\126\6" + 148 "\267\2\306\2\322\2\306\2\355\2zz\20z\40z" + 149 "\60z\100z\120z\140z\160zzz\20z\40z" + 150 "\60zzz\20z\40z\60zzz\20z\40z" + 151 "\60zzz\20z\40z\60zzz\20z\40z" + 152 "\60zzz\20z\40z\60zzz\20z\40z" + 153 "\60zzz\20z\40z\60z\200z\220z\240z" + 154 "\260z\300z\320z\340z\360z\377z\17\1\37\1" + 155 "\57\1\77\1\117\1\137\1\157\1\176\1\216\1\236\1" + 156 "\256\1\276\1\316\1\336\1\356\1\176\1\216\1\236\1" + 157 "\256\1\376\1\16\2\36\2\56\2\76\2\116\2\136\2" + 158 "\156\2\176\2\216\2\236\2\256\2\274\2\314\2\334\2" + 159 "\354\2\374\2\14\3\34\3\54\3\74\3\114\3\134\3" + 160 "\154\3\173\3\213\3\233\3\253\3\76\2\116\2\136\2" + 161 "\156\2\273\3\313\3\333\3\353\3\373\3\13\4\33\4" + 162 "\53\4\71\4\111\4\131\4\151\4\167\4\207\4\227\4" + 163 "\247\4\255\4\275\4\315\4\335\4\341\4\361\4\1\5" + 164 "\21\5\41\5\61\5\101\5\121\5\61\5\101\5\121\5" + 165 "\141\5\161\5\201\5\221\5\241\5\231\5\251\5\271\5" + 166 "\311\5\331\5\351\5\371\5\11\6\31\6\51\6\71\6" + 167 "\111\6\126\6\146\6\166\6\206\6zzzz\213\6" + 168 "\232\6zz\251\6\270\6\307\6\325\6\345\6zz" + 169 "zzzzzzzzzzzzzzzz" + 170 "zzzzzzzzzzzzzzzz" + 171 "zzzzzzzzzz\363\6zz\363\6" + 172 "zz\1\7zz\1\7zzzzzz\13\7" + 173 "\33\7\51\7zzzzzzzzzzzz" + 174 "zzzzzzzzzzzzzzzz" + 175 "zzzzzzzz\71\7\111\7zzzz" + 176 "zzzzzzzzzz\131\7\150\7zz" + 177 "zzzz\162\7zzzzzz\176\7\215\7" + 178 "\233\7zzzzzzzzzzzzzz" + 179 "zz\253\7zzzz\267\7\307\7zz\314\7" + 180 "\54\5\201zzz\334\7zzzzzz\352\7" + 181 "\373\3zzzz\372\7\7\10zzzzzz" + 182 "zzzzzzzzzzzz\27\10\47\10" + 183 "\65\10zzzzzzzzzzzzzz" + 184 "zzzzzzzzzzzzzzzz" + 185 "\263\2\77\10zz\114\10zzzzzzzz" + 186 "zz\1\1zzzz\130\10\144\10zz\164\10" + 187 "\202\10zzzz\222\10zz\240\10\373\3zz" + 188 "zz\200zzzzz\260\10\300\10zz\271\2" + 189 "zzzz\307\10\326\10\343\10zzzz\361\10" + 190 "zzzzzz\1\11\275\2zz\21\11\121\1" + 191 "zzzzzzzzzzzzzzzz" + 192 "zzzzzzzzzzzz\41\11zz" + 193 "\60\11zzzz\100\11zzzzzzzz" + 194 "zzzzzzzzzzzzzzzz" + 195 "zzzzzzzzzzzzzzzz" + 196 "zzzzzzzzzzzzzzzz" + 197 "\120\11zzzz\130\11\146\11zzzzzz" + 198 "\201zzzzz\166\11zzzzzzzz" + 199 "\55\5zz\201\11\221\11\313\3zzzz\131\6" + 200 "\201zzzzz\236\11\256\11zzzzzz" + 201 "\273\11\313\11zzzzzzzzzzzz" + 202 "zzzzzz\161z\333\11zz\377zzz" + 203 "zz\346\11\366\11\117\1\4\12\53\5zzzz" + 204 "zzzzzzzzzzzz\234\11\24\12" + 205 "\157\1zzzzzzzzzz\44\12\63\12" + 206 "zzzzzzzzzzzzzzzz" + 207 "zzzzzzzzzz\353\2\103\12\343z" + 208 "\24\2zzzzzz\123\12\276\2zzzz" + 209 "zzzzzz\143\12\163\12zzzzzz" + 210 "zzzz\173\12\213\12zzzzzzzz" + 211 "zzzzzzzzzzzzzzzz" + 212 "zz\227\12\246\12zzzzzzzzzz" + 213 "zzzzzzzzzzzzzzzz" + 214 "zzzzzzzzzzzzzzzz" + 215 "zzzzzzzzzzzzzz\265\12" + 216 "zzzz\302\12zz\321\12zzzz\335\12" + 217 "\347\12zzzzzzzzzzzzzz" + 218 "zzzzzzzzzzzzzzzz" + 219 "zzzzzzzzzzzzzz\353\2" + 220 "\367\12zzzzzzzzzz\7\13\17\13" + 221 "\36\13zzzzzzzzzzzzzz" + 222 "\55\13\74\13zzzzzz\104\13\124\13zz" + 223 "zzzzzzzzzzzzzzzz" + 224 "zzzzzzzzzzzz\141\13zz" + 225 "zzzzzzzzzzzzzzzz" + 226 "zzzzzzzzzzzzzz\105z" + 227 "\115z\115z\115z\135z\175z\235z\275z\335z" + 228 "\2z\2z\354z\12\1\51\1\111\1\2z\2z" + 229 "\2z\2z\2z\2z\2z\2z\2z\2z" + 230 "\2z\2z\2z\2z\2z\2z\2z\2z" + 231 "\2z\2z\2z\2z\2z\2z\2z\2z" + 232 "\2z\2z\2z\2z\2z\2z\151\1\210\1" + 233 "\2z\2z\2z\2z\2z\2z\2z\2z" + 234 "\2z\2z\250\1\2z\2z\310\1\346\1\3\2" + 235 "\41\2\77\2\137\2\175\2\227\2zzzzzz" + 236 "zzzzzzzzzzzzzzzz" + 237 "zzzzzzzzzzzzzzzz" + 238 "zzzzzzzzzzzzzzzz" + 239 "zzzzzzzzzzzzzzzz" + 240 "zzzzzzzzzzzzzzzz" + 241 "zzzzzzzzzzzzzzzz" + 242 "zzzzzzzzzzzzzzzz" + 243 "zzzzzzzzzz\10\10\10\7zz" + 244 "zzzzzzzzzzzzzzzz" + 245 "zzzzzzzzzzzzzzzz" + 246 "zzzzzzzzzzzzzzzz" + 247 "zzzz\10\7\1z\7\4\7\1\1\1\1\10" + 248 "\10\10\10\7\7\7\7\1\4\7z\10\1\10\10\10" + 249 "\1\1zzzzzzzzzz\1\1zz" + 250 "zzzzzzzzzzzzzzzz" + 251 "zzzzzzzzzz\10\7\7zzz" + 252 "zzzzzzzzzzzzzzzz" + 253 "zzzzzzzzzzzzzzzz" + 254 "zzzzzzzzzzzzzzzz" + 255 "zzzzz\1z\7\4\7\1\1\1\1zz" + 256 "\4\4zz\5\5\1zzzzzzzzz" + 257 "\7zzzzzzzzzz\1\1zzz" + 258 "zzzzzzzzzzzzzzzz" + 259 "zzzzzzz\10z\10\10\7zzzz" + 260 "zzzzzzzzzzzzzzzz" + 261 "zzzzzzzzzzzzzzzz" + 262 "zzzzzzzzzzzzzzzz" + 263 "zzzz\1z\7\4\7\1\1zzzz\10" + 264 "\10zz\10\10\1zzzzzzzzzz" + 265 "zzzzzzzzzzzzzzzz" + 266 "zzzzzzzz\10\10zzz\1zz" + 267 "zzzzzzzz\7\1\1\1\1\10z\10" + 268 "\10\15z\7\7\1zzzzzzzzzz" + 269 "zzzzzzzzzz\1\1zzzz" + 270 "zzzzzzzzzzzzzzzz" + 271 "zz\10\10\10\10\10\10z\10\7\7zzzz" + 272 "zzzzzzzzzzzzzzzz" + 273 "zzzzzzzzzzzzzzzz" + 274 "zzzzzzzzzzzzzzzz" + 275 "zzzz\1z\7\10\7\1\1\1\1zz\4" + 276 "\13zz\5\14\1zzzzzzzz\10\15" + 277 "zzzzzzzzzz\1\1zzzz" + 278 "zzzzzzzzzzzzzzzz" + 279 "zzzzzzzz\10zzzzzzz" + 280 "zzzzzzzzzzzzzzzz" + 281 "zzzzzzzzzzzzzzzz" + 282 "zzzzzzzzzzzzzzzz" + 283 "zzzz\7\7\10\7\7zzz\4\4\4z" + 284 "\5\5\5\10zzzzzzzzz\7zz" + 285 "zzzzzzzzzzzzzzzz" + 286 "zzzzzzzzzzzzzzzz" + 287 "zzzzzz\10\7\7\7\10zzzzz" + 288 "zzzzzzzzzzzzzzzz" + 289 "zzzzzzzzzzzzzzzz" + 290 "zzzzzzzzzzzzzzzz" + 291 "zzzz\10\10\7\7\7\7z\10\10\11z\10" + 292 "\10\10\10zzzzzzz\10\1zzzz" + 293 "zzzzzzz\1\1zzzzzzz" + 294 "zzzzzzzzzzzzzzzz" + 295 "zzzzz\15\7\7\7\7z\10\15\15z\15" + 296 "\15\10\10zzzzzzz\7\7zzzz" + 297 "zzzzzzz\1\1zzzzzzz" + 298 "zzzzzzzzzzzzzzzz" + 299 "zzzzz\10\10\7\7zzzzzzz" + 300 "zzzzzzzzzzzzzzzz" + 301 "zzzzzzzzzzzzzzzz" + 302 "zzzzzzzzzzzzzzzz" + 303 "\10\10z\7\7\7\1\1z\4\4\4z\5\5\5" + 304 "\10zzzzzzzzz\7zzzzz" + 305 "zzzzz\1\1zzzzzzzzz" + 306 "zzzzzzzzzzzzzzzz" + 307 "zzz\7\7zzzzzzzzzzz" + 308 "zzzzzzzzzzzzzzzz" + 309 "zzzzzzzzzzzzzzzz" + 310 "zzzzzzzzzzzzzzzz" + 311 "z\10zzzz\7\7\7\10\10\1z\1z\7" + 312 "\4\13\4\5\14\5\7zzzzzzzzz" + 313 "zzzzzzzzz\7\7zzzzz" + 314 "zzzzzzzzzzzzzzzz" + 315 "zzzzzzzzzzzzzzzz" + 316 "zzzzzzzzzzz\7\10\7\7\10" + 317 "\10\10\10\1\1\1zzzzz\16\16\16\16\16" + 318 "\7z\10\10\10\10\10\10\10\10zzzzzz" + 319 "zzzzzzzzzzzzzzzz" + 320 "zzzzzzzzzzzzzzzz" + 321 "zzzzzzzzzzz\7\10\7\7\10" + 322 "\10\10\10\1\1z\10\1zzz\16\16\16\16\16" + 323 "zzz\10\10\10\10\10\10zzzzzzz" + 324 "zzzzzzzzzzzzzzzz" + 325 "zzzzzzzzzzzzzzzz" + 326 "zzzzzzzzzzz\1\1zzz" + 327 "zzzzzzzzzzzzzzzz" + 328 "zzzzzzzz\1z\1z\10zzz" + 329 "z\7\4zzzzzzzzzzzzz" + 330 "zzzzzzzzzzzzzzzz" + 331 "zzzzzzzzzzzzzzzz" + 332 "zzzz\1\10\11\1\1\11\11\11\11\10\10\10" + 333 "\10\10\7\10\11\10\10\1z\10\10zzzzz" + 334 "\1\1\1\1\1\1\1\1\1\1\1z\1\1\1\1" + 335 "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1" + 336 "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1" + 337 "zzzzzz\1zzzzzzzzz" + 338 "zzzzzzzzzzzzzzzz" + 339 "zzzzzzzzzzzzzzzz" + 340 "zzzzzzzzzzzzzzzz" + 341 "\7\7\10\10\1\4\10\10\10\10\10\1\7z\10\7" + 342 "z\1\1zzzzzz\7\7\1\1zzz" + 343 "z\1\1z\7\7\7zz\7\7\7\7\7\7\7" + 344 "zz\10\10\10\10zzzzzzzzzz" + 345 "z\1\7\4\10\10\7\7\7\7\7\7\1z\7z" + 346 "zzzzzzzzz\7\7\7\10zz\10" + 347 "\1\1zzzzzzzzzzz\10\1z" + 348 "zzzzzzzzzzz\7\10\10\10\10" + 349 "\1\1\1\13\14\5\4\4\4\5\5\10\7\7\10\10" + 350 "\10\10\10\10\10z\10zzzzzzzzz" + 351 "\10zz\10\10\1\7\7\15\15\10\10\7\7\7z" + 352 "zzz\7\7\1\7\7\7\7\7\7\1\10\1z" + 353 "zzz\7\7\7\7\7\16\16\16\7\7\16\7\7" + 354 "\7\7\7zzzzzzz\7\7zzzz" + 355 "zzz\10\1\4\7\10zzzzz\4\1\7" + 356 "\10\10\10\1\1\1\1z\7\10\7\7\10\10\10\10" + 357 "\1\1\10\1\7\4\4\4\10\10\10\10\10\10\10\10" + 358 "\10\10zz\1\10\10\10\10\7zzzzzz" + 359 "zzzzz\10\7\10\10\1\1\1\3\11\12\4" + 360 "\4\5\5\10\15\7zzzzzzzzzz" + 361 "z\10\1\10\10\10z\7\1\1\10\1\4\7\10\10" + 362 "\7z\1\1zzzzzz\10\7\10\10\7\7" + 363 "\7\10\7\10zzzz\7\7\7\4\4\13\7\7" + 364 "\1\10\10\10\10\4\4\10\1zzzzzzz" + 365 "z\10\10\10z\6\1\1\1\1\1\10\10\1\1\1" + 366 "\1\10\7\6\6\6\6\6\6\6zzzz\1z" + 367 "zzz\10zz\7zzzzzzzz\10" + 368 "zzzz\10zzzz\7\7\1\10\7zz" + 369 "zzzzzz\7\7\7\7\7\7\7\7\7\7" + 370 "\7\7\1\10zzzzzzzzzz\10\10" + 371 "\10\10\10\10\10\10\10\10\10\10\10\10\10\10zz" + 372 "zzzzzzzzzzz\10zzzz" + 373 "zzzzzzz\1\1\1zzzzzz" + 374 "z\1\1\1\10\1\1\1\1\10zzz\10\7\7" + 375 "\10\10\1\1\4\4\10\7\7\2\3zzzzz" + 376 "zzzzzzzzzz\10\10\10\10\1\10" + 377 "\4\10\1\7\4\1\1zzzzzzzzz" + 378 "\10zzzzzzzz\10\7zzzzz" + 379 "zzzzzz\7\10\7zz\10\7\10\10\1" + 380 "\16\16\10\10\16\7\16\16\7\10\10zzzzz" + 381 "zzzzzz\4\1\10\4\7zzz\7\7" + 382 "\10\7\7\1\7\7z\7\1zz\6\1\1z\10" + 383 "\6zzzzz\1\1\1\10zzzzzz" + 384 "zz\10\1\1zzzzz\7\10\7zzz" + 385 "zzzzzzzzzz\10\10\10\10\1\1" + 386 "\1\1\10\10\10\10\10zzzzzzzzz" + 387 "\7\4\7\1\1\10\10\7\7\1\1zzzzz" + 388 "zz\10\10\10\1\1\4\10\11\11\10\1\1z\10" + 389 "zzzzzzzzzzz\7\4\7\1\1" + 390 "\1\1\1\1\10\10\10\15\7zzzzzzz" + 391 "z\1z\10\1zzzzzzzzzzz" + 392 "z\7\7\7\1\10\10\15\15\10\7\10\10zzz" + 393 "zzz\10z\7\4\7\1\1\10\10\10\10\1\1" + 394 "zzzzzzzzzzz\1\1z\7\7" + 395 "\10\7\7\7\7zz\4\4zz\5\5\7zz" + 396 "\7\7zz\10\10\10\10\10\10\10zzz\7\7" + 397 "\1\10\10\7\1zzzzzzzzz\7\4" + 398 "\7\1\1\1\1\1\1\4\10\13\5\7\5\10\7\1" + 399 "\1zzzzzzzzzzzz\4\7\1" + 400 "\1\1\1zz\4\13\5\14\10\10\7\1\7\7\7" + 401 "\1\1\1\1\1\1\10\10\7\7\10\7\1zzz" + 402 "zzzzzzzz\10\7\10\4\7\1\1\10" + 403 "\10\10\10\7\1zzzzzzzzzzz" + 404 "zz\1z\10\7\7\10\10\1\1\4\10\1\10\10" + 405 "\10zzzzzzzzzzzz\7\4\7" + 406 "\1\1\1\10\10\10\10\10\7\1\1zzzzz" + 407 "\10\1\1\10\10\10\10\10\10\1zzzzz\1" + 408 "\1\10\10\10\10\7z\1\1\1\1z\10\1\1\10" + 409 "\10\10\7\7\1\1\1zzzzzzzzz" + 410 "z\1\1\1\1\1\1\10\7\10zzzzzz" + 411 "z\10\10\1\1\1\1\1z\10\10\10\10\10\10\7" + 412 "\1zz\1\1\1\1\1\1\1\1\1\1\1\1\1" + 413 "\1z\7\1\1\1\1\1\1\4\1\10\7\10\10z" + 414 "zzzzzzzz\10\10\10\10\10\1zz" + 415 "z\10z\10\10z\10\10\1\10\1zz\1zz" + 416 "zzzzzzzz\7\7\7\7\7z\10\10" + 417 "z\7\7\10\7zzzzzzzzz\10\1" + 418 "\4\7zzzzzzzzzz"); 419 } 420 421 private static final class InSCTrie { 422 static final CodePointTrie INSTANCE = makeTrie( 423 "\63\151\162\124\102z\102\3\170\17\4z\100z\220z" + 424 "zz\100z\140z\224z\100z\100z\100z\100z" + 425 "\100z\100z\100z\100z\100z\100z\100z\100z" + 426 "\100z\100z\100z\100z\100z\100z\100z\100z" + 427 "\100z\100z\100z\100z\100z\100z\100z\100z" + 428 "\100z\100z\100z\100z\324z\22\1\122\1\220\1" + 429 "\317\1\15\2\114\2\212\2\312\2\10\3\106\3\204\3" + 430 "\304\3\2\4\101\4\177\4\277\4\375\4\75\5\175\5" + 431 "\274\5\374\5\73\6\173\6\233\6\333\6\33\7\130\7" + 432 "\370\2\13\3\27\3\13\3\62\3zz\20z\40z" + 433 "\60z\100z\120z\140z\160z\140z\160z\200z" + 434 "\220z\224z\244z\264z\304z\100z\120z\140z" + 435 "\160z\100z\120z\140z\160z\100z\120z\140z" + 436 "\160z\100z\120z\140z\160z\100z\120z\140z" + 437 "\160z\100z\120z\140z\160z\100z\120z\140z" + 438 "\160z\100z\120z\140z\160z\324z\344z\364z" + 439 "\4\1\22\1\42\1\62\1\102\1\122\1\142\1\162\1" + 440 "\202\1\220\1\240\1\260\1\300\1\317\1\337\1\357\1" + 441 "\377\1\15\2\35\2\55\2\75\2\114\2\134\2\154\2" + 442 "\174\2\212\2\232\2\252\2\272\2\312\2\332\2\352\2" + 443 "\372\2\10\3\30\3\50\3\70\3\106\3\126\3\146\3" + 444 "\166\3\204\3\224\3\244\3\264\3\304\3\324\3\344\3" + 445 "\364\3\2\4\22\4\42\4\62\4\101\4\121\4\141\4" + 446 "\161\4\177\4\217\4\237\4\257\4\277\4\317\4\337\4" + 447 "\357\4\375\4\15\5\35\5\55\5\75\5\115\5\135\5" + 448 "\155\5\175\5\215\5\235\5\255\5\274\5\314\5\334\5" + 449 "\354\5\374\5\14\6\34\6\54\6\73\6\113\6\133\6" + 450 "\153\6\173\6\213\6\233\6\253\6\233\6\253\6\273\6" + 451 "\313\6\333\6\353\6\373\6\13\7\33\7\53\7\73\7" + 452 "\113\7\130\7\150\7\170\7\210\7\351z\351z\230\7" + 453 "\243\7\263\7\303\7\322\7\341\7\357\7\377\7\100z" + 454 "\100z\100z\100z\100z\100z\100z\100z\100z" + 455 "\100z\100z\100z\100z\100z\100z\100z\100z" + 456 "\100z\100z\100z\100z\100z\17\10\35\10\346z" + 457 "\35\10\346z\55\10\17\10\75\10\351z\351z\115\10" + 458 "\131\10\143\10\162\10\60z\100z\100z\100z\100z" + 459 "\100z\100z\100z\100z\100z\100z\100z\100z" + 460 "\100z\100z\100z\100z\202\10\154\1\222\10\242\10" + 461 "\55\2\351z\262\10\302\10\351z\351z\164\3\322\10" + 462 "\341\10\60z\100z\100z\351z\361\10\351z\351z" + 463 "\1\11\16\11\36\11\52\11\60z\60z\100z\100z" + 464 "\100z\100z\100z\100z\72\11\346z\351z\112\11" + 465 "\126\11\60z\100z\100z\146\11\351z\165\11\205\11" + 466 "\351z\351z\225\11\245\11\351z\351z\265\11\302\11" + 467 "\322\11\100z\100z\100z\100z\100z\100z\100z" + 468 "\100z\342\11\360\11\376\11\100z\100z\100z\100z" + 469 "\100z\100z\100z\100z\100z\100z\100z\100z" + 470 "\100z\100z\100z\10\12\24\12\44\12\100z\100z" + 471 "\100z\100z\100z\132\7\62\12\100z\100z\100z" + 472 "\100z\100z\100z\100z\100z\100z\100z\100z" + 473 "\100z\100z\100z\100z\100z\100z\100z\100z" + 474 "\100z\100z\100z\100z\100z\100z\100z\100z" + 475 "\100z\164z\100z\100z\100z\102\12\351z\117\12" + 476 "\100z\351z\137\12\155\12\174\12\326z\347z\351z" + 477 "\214\12\230\12\60z\250\12\266\12\306\12\351z\324\12" + 478 "\351z\344\12\363\12\100z\100z\3\13\351z\351z" + 479 "\22\13\227\2\60z\42\13\62\13\343z\351z\211\10" + 480 "\102\13\122\13\60z\351z\141\13\351z\351z\351z" + 481 "\161\13\201\13\100z\221\13\241\13\100z\100z\100z" + 482 "\100z\100z\100z\100z\100z\100z\100z\100z" + 483 "\100z\261\13\301\13\316\13\60z\336\13\356\13\351z" + 484 "\370\13\61z\100z\100z\100z\100z\100z\100z" + 485 "\100z\100z\100z\100z\100z\100z\100z\100z" + 486 "\100z\100z\100z\100z\100z\100z\100z\100z" + 487 "\100z\100z\100z\100z\100z\10\14\346z\351z" + 488 "\212\10\30\14\46\14\60\14\100\14\120\14\351z\351z" + 489 "\140\14\100z\100z\100z\100z\160\14\351z\213\10" + 490 "\200\14\220\14\240\14\351z\255\14\325z\350z\351z" + 491 "\275\14\315\14\60z\272\6\65z\341z\353\3\206\10" + 492 "\335\14\100z\100z\100z\100z\355\14\155\1\374\14" + 493 "\337z\351z\14\15\34\15\60z\54\15\142\1\162\1" + 494 "\74\15\10\3\114\15\134\15\355\11\100z\100z\100z" + 495 "\100z\100z\100z\100z\100z\333z\351z\351z" + 496 "\154\15\0\15\212\15\100z\100z\231\15\351z\351z" + 497 "\37\11\251\15\60z\100z\100z\100z\100z\100z" + 498 "\100z\100z\100z\100z\100z\333z\351z\377z" + 499 "\271\15\311\15\321\15\100z\100z\333z\351z\351z" + 500 "\341\15\361\15\60z\100z\100z\337z\351z\1\16" + 501 "\16\16\60z\100z\100z\100z\351z\36\16\56\16" + 502 "\76\16\100z\100z\100z\100z\100z\100z\100z" + 503 "\100z\100z\100z\100z\100z\337z\351z\206\10" + 504 "\116\16\100z\100z\100z\100z\100z\100z\100z" + 505 "\100z\100z\100z\100z\100z\100z\100z\100z" + 506 "\100z\100z\100z\100z\100z\100z\100z\100z" + 507 "\100z\100z\100z\100z\100z\136\16\351z\351z" + 508 "\153\16\173\16\213\16\351z\351z\227\16\241\16\100z" + 509 "\100z\100z\100z\100z\100z\100z\100z\100z" + 510 "\100z\100z\100z\100z\100z\100z\100z\100z" + 511 "\100z\100z\100z\100z\100z\261\16\351z\377z" + 512 "\301\16\321\16\273\6\341\16\125\5\351z\357\16\53\7" + 513 "\377\16\100z\100z\100z\100z\17\17\351z\351z" + 514 "\36\17\56\17\60z\76\17\351z\112\17\127\17\60z" + 515 "\100z\100z\100z\100z\100z\100z\100z\100z" + 516 "\100z\100z\100z\100z\100z\100z\351z\147\17" + 517 "\100z\100z\100z\100z\100z\100z\100z\100z" + 518 "\100z\100z\100z\100z\100z\100z\100z\100z" + 519 "\105z\125z\125z\125z\145z\205z\245z\305z" + 520 "\345z\4z\4z\365z\24\1\64\1\124\1\4z" + 521 "\164\1\4z\175\1\4z\4z\4z\4z\4z" + 522 "\4z\4z\4z\4z\4z\4z\4z\4z" + 523 "\4z\4z\4z\4z\4z\4z\4z\4z" + 524 "\4z\4z\4z\4z\4z\4z\4z\4z" + 525 "\4z\4z\4z\235\1\275\1\4z\4z\4z" + 526 "\4z\4z\4z\4z\4z\4z\4z\335\1" + 527 "\4z\4z\375\1\35\2\75\2\135\2\175\2\235\2" + 528 "\275\2\330\2zzzzzzzzzzzz" + 529 "zzzzzzzzzzzzzzzz" + 530 "zzzzzzzzzzzzzzzz" + 531 "z\14zz\30\30\30\30\30\30\30\30\30\30zz" + 532 "zzzzzzzzzzzzzzzz" + 533 "zzzzzzzzzzzzzzzz" + 534 "zzzzzzzzzzzzzzzz" + 535 "zzzzzzzzzzzzzzzz" + 536 "zzzz\14zzzzzzzzzzz" + 537 "zzzzzz\34\34zzzzzzzz" + 538 "zzzzzzzzzzzzzzz\14" + 539 "zzzzzzzzzzzzzzzz" + 540 "zzzzzzzzzzzzzzzz" + 541 "zzzzzzzz\2\2\2\40\43\43\43\43" + 542 "\43\43\43\43\43\43\43\43\43\43\43\43\43\5\5\5" + 543 "\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5" + 544 "\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5" + 545 "\5\5\42\42\27\1\42\42\42\42\42\42\42\42\42\42" + 546 "\42\42\42\37\42\42z\4\4zz\42\42\42\5\5" + 547 "\5\5\5\5\5\5\43\43\42\42zz\30\30\30\30" + 548 "\30\30\30\30\30\30zz\43\43\43\43\43\43\5\5" + 549 "\5\5\5\5\5\5\14\2\2\40z\43\43\43\43\43" + 550 "\43\43\43zz\43\43zz\43\43\5\5\5\5\5" + 551 "\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5z" + 552 "\5\5\5\5\5\5\5z\5zzz\5\5\5\5" + 553 "zz\27\1\42\42\42\42\42zz\42\42zz\42" + 554 "\42\37\6zzzzzzzz\42zzzz" + 555 "\5\5z\5\43\43\42\42zz\30\30\30\30\30\30" + 556 "\30\30\30\30\5\5zzzzzzzzzz" + 557 "\2z\34z\2\2\40z\43\43\43\43\43\43zz" + 558 "zz\43\43zz\43\43\5\5\5\5\5\5\5\5" + 559 "\5\5\5\5\5\5\5\5\5\5\5\5z\5\5\5" + 560 "\5\5\5\5z\5\5z\5\5z\5\5zz\27" + 561 "z\42\42\42zzzz\42\42zz\42\42\37z" + 562 "zz\4zzzzzzz\5\5\5\5z\5" + 563 "zzzzzzz\30\30\30\30\30\30\30\30\30" + 564 "\30\2\22\14\14z\13zzzzzzzzz" + 565 "z\2\2\40z\43\43\43\43\43\43\43\43\43z\43" + 566 "\43\43z\43\43\5\5\5\5\5\5\5\5\5\5\5" + 567 "\5\5\5\5\5\5\5\5\5z\5\5\5\5\5\5" + 568 "\5z\5\5z\5\5\5\5\5zz\27\1\42\42" + 569 "\42\42\42\42z\42\42\42z\42\42\37zzzz" + 570 "zzzzzzzzzzzzzz\43\43" + 571 "\42\42zz\30\30\30\30\30\30\30\30\30\30zz" + 572 "zzzzzzz\5\4\4\4\27\27\27z\2" + 573 "\2\40z\43\43\43\43\43\43\43\43zz\43\43z" + 574 "z\43\43\5\5\5\5\5\5\5\5\5\5\5\5\5" + 575 "\5\5\5\5\5\5\5z\5\5\5\5\5\5\5z" + 576 "\5\5z\5\5\5\5\5zz\27\1\42\42\42\42" + 577 "\42zz\42\42zz\42\42\37zzzzzz" + 578 "zz\42\42zzzz\5\5z\5\43\43\42\42" + 579 "zz\30\30\30\30\30\30\30\30\30\30z\5zz" + 580 "zzzzzzzzzzzz\2\25z\43" + 581 "\43\43\43\43\43zzz\43\43\43z\43\43\43\5" + 582 "zzz\5\5z\5z\5\5zzz\5\5z" + 583 "zz\5\5\5zzz\5\5\5\5\5\5\5\5" + 584 "\5\5\5\5zzzz\42\42\42zzz\42\42" + 585 "\42z\42\42\42\37zzzzzzzzz\42" + 586 "zzzzzzzzzzzzzz\30\30" + 587 "\30\30\30\30\30\30\30\30zzzzzzzz" + 588 "zzzzzzzz\2\2\2\40\2\43\43\43" + 589 "\43\43\43\43\43z\43\43\43z\43\43\43\5\5\5" + 590 "\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5" + 591 "\5z\5\5\5\5\5\5\5\5\5\5\5\5\5\5" + 592 "\5\5zzz\1\42\42\42\42\42z\42\42\42z" + 593 "\42\42\42\37zzzzzzz\42\42z\5\5" + 594 "\5zzzzz\43\43\42\42zz\30\30\30\30" + 595 "\30\30\30\30\30\30zzzzzzzzzz" + 596 "zzzzzz\2\2\40z\43\43\43\43\43\43" + 597 "\43\43z\43\43\43z\43\43\43\5\5\5\5\5\5" + 598 "\5\5\5\5\5\5\5\5\5\5\5\5\5\5z\5" + 599 "\5\5\5\5\5\5\5\5\5z\5\5\5\5\5z" + 600 "z\27\1\42\42\42\42\42z\42\42\42z\42\42\42" + 601 "\37zzzzzzz\42\42zzzzzz" + 602 "z\5z\43\43\42\42zz\30\30\30\30\30\30\30" + 603 "\30\30\30z\21\21zzzzzzzzzz" + 604 "zzz\2\2\2\40z\43\43\43\43\43\43\43\43" + 605 "z\43\43\43z\43\43\43\5\5\5\5\5\5\5\5" + 606 "\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5" + 607 "\5\5\5\5\5\5\5\5\5\5\5\5\5\5\32\32" + 608 "\1\42\42\42\42\42z\42\42\42z\42\42\42\37\15" + 609 "zzzzz\6\6\6\42zzzzzzz" + 610 "\43\43\43\42\42zz\30\30\30\30\30\30\30\30\30" + 611 "\30zzzzzzzzzz\6\6\6\6\6" + 612 "\6zz\2\40z\43\43\43\43\43\43\43\43\43\43" + 613 "\43\43\43\43\43\43\43\43zzz\5\5\5\5\5" + 614 "\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5" + 615 "\5\5\5z\5\5\5\5\5\5\5\5\5z\5z" + 616 "z\5\5\5\5\5\5\5zzz\37zzzz" + 617 "\42\42\42\42\42\42z\42z\42\42\42\42\42\42\42" + 618 "\42zzzzzz\30\30\30\30\30\30\30\30\30" + 619 "\30zz\42\42zzzzzzzzzzz" + 620 "z\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5" + 621 "\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5" + 622 "\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5z" + 623 "\42\42\42\42\42\42\42\42\42\42\32zzzzz" + 624 "\42\42\42\42\42\42z\42\36\36\36\36\12\2\32z" + 625 "\30\30\30\30\30\30\30\30\30\30zzzzzz" + 626 "zzzzzzzzzzzzzzzz" + 627 "zzzzzzzzzzzzzzzz" + 628 "\5\5z\5zz\5\5z\5zz\5zzz" + 629 "zzz\5\5\5\5z\5\5\5\5\5\5\5z" + 630 "\5\5\5z\5z\5zz\5\5z\5\5z\42" + 631 "\42\42\42\42\42\42\42\42\42z\42\13\13zz\42" + 632 "\42\42\42\42zzz\36\36\36\36z\2zz\30" + 633 "\30\30\30\30\30\30\30\30\30zz\5\5\5\5z" + 634 "zzzzzzzzzzzzzzzz" + 635 "zzzzzzzzzzzzzzz\30" + 636 "\30\30\30\30\30\30\30\30\30\30\30\30\30\30\30\30" + 637 "\30\30\30z\34z\34z\27zzzzzz\5" + 638 "\5\5\5\5\5\5\5z\5\5\5\5\5\5\5\5" + 639 "\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5" + 640 "\5\5\5\5\5\5\5\5\5\5\5\5zzzz" + 641 "\42\42\42\42\42\42\42\42\42\42\42\42\42\2\40\42" + 642 "\42\2\2\32\1zz\10\10\10\10\10\17\17\17\17" + 643 "\17\17\17\17\17\17\17z\17\17\17\17\17\17\17\17" + 644 "\17\17\17\17\17\17\17\17\17\17\17\17\17\17\17\17" + 645 "\17\17\17\17\17\17\17\17\17\17\17\17zzzz" + 646 "zz\34zzzzzzzzzzzzz" + 647 "zzzzzzzzzzzzzzzz" + 648 "zzzzzzzzzzzzzzzz" + 649 "zzzzzzzzzzzz\5\43\43\43" + 650 "\43\43\43\43\43\43\43\42\42\42\42\42\42\2\36\40" + 651 "\23\32\13\13\13\13\5\30\30\30\30\30\30\30\30\30" + 652 "\30z\14zz\14z\5\5\43\43\43\43\42\42\42" + 653 "\42\5\5\5\5\13\13\5\42\36\36\5\5\42\42\36" + 654 "\36\36\36\36\5\5\42\42\42\42\5\5\5\5\5\5" + 655 "\5\5\5\5\5\13\42\42\42\42\36\36\36\36\36\36" + 656 "\36\5\36\30\30\30\30\30\30\30\30\30\30\36\36\42" + 657 "\42zz\43\43\43\5\5\5\5\5\5\5\5\5\5" + 658 "z\5\5\42\42\32zzzzzzzzzz" + 659 "z\5\5\42\42zzzzzzzzzzz" + 660 "z\5z\42\42zzzzzzzzzzz" + 661 "z\5\5\5\43\43\43\43\43\43\43\43\43\43\43\43" + 662 "\43zz\42\42\42\42\42\42\42\42\42\42\2\40\42" + 663 "\33\33\34\20\12\34\34\32\23\34zzzzzz" + 664 "zz\1\34zz\14\5\5\5\5\5\5\5\5\5" + 665 "\5\5\5\5\5\5\42\42\42\42\42\42\42\42\42\17" + 666 "\17\17zzzz\7\7\2\7\7\7\7\7\7\7" + 667 "\42\34zzzz\5\5\5\41\41\41\41\41\41\41" + 668 "\41\41\41\41zz\35\35\35\35\35zzzzz" + 669 "zzzzzz\42\42\42\42\42\42\42\42\42\42" + 670 "\42\42\42\42\42\42\7\7\7\7\7\7\7\36\36z" + 671 "zzzzz\5\5\5\5\5\5\5\42\42\42\42" + 672 "\42zzzz\5\5\5\5\5\5\5\5\5\5\5" + 673 "\5\5\43\43\43\5\5\13\13\17\7\7\11\17\17\17" + 674 "\17z\23\42\42\42\42\42\42\42\42\42\42\42\42\42" + 675 "\42\42\2\36\36\36\36\36\32\34\34zz\34\2\2" + 676 "\2\20\40\43\43\43\43\43\43\43\43\43\43\43\5\5" + 677 "\5\5\27\42\42\42\42\42\42\42\42\42\42\42\37\5" + 678 "\5\5\5\5\5\5zzzz\2\20\40\43\43\43" + 679 "\43\43\43\43\5\5\5\5\5\5\17\17\17\42\42\42" + 680 "\42\42\42\32\23\17\17\5\5\30\30\30\30\30\30\30" + 681 "\30\30\30\1\5\5\5\7\7\5\5\5\5\43\43\27" + 682 "\42\42\42\42\42\42\42\42\42\7\7\32\32zzz" + 683 "zzzzzzzzz\5\5\5\5\17\17\42" + 684 "\42\42\42\42\42\42\7\7\7\7\2\2\34\27zz" + 685 "zzzzzz\30\30\30\30\30\30\30\30\30\30" + 686 "zzz\5\5\5\4\4\4z\4\4\4\4\4\4" + 687 "\4\4\4\4\4\4zzzzzzzzzz" + 688 "zzzz\40\40\4\21\21\4\4\4zzzz" + 689 "zzzzzzz\34zzzzzzzz" + 690 "zzzz\26\24zz\14\14\14\14\14zzz" + 691 "zzzzzzzz\34\34\34zzzzz" + 692 "zzzzzz\43\43z\43\43\43\32\5\5\5" + 693 "\5\2\5\5\5\5\42\42\42\42\42zzzzz" + 694 "zzz\5\5\5\5\5\5\5\5\5\5\5\5\5" + 695 "\5\41\41\5\5\5\5\41\17\17\5\5\5\5\5\5" + 696 "\5\17\5\2zzzzzzzzzzzz" + 697 "\5\5\5\5\13\42\42\42\42\42\42\42\42\42\42\42" + 698 "\37\2zzzzzzzzzz\4\4\4\4" + 699 "\4\4\4\4\4\4\4\4\4\4\4\4\2\2zz" + 700 "zzzzzzzz\43\42\30\30\30\30\30\30" + 701 "\30\30\30\30\5\5\5\5\5\5\41\41\41\41\41\41" + 702 "\41\41\41\36\36\36zz\5\5\5\5\5\5\5\42" + 703 "\42\42\42\42\42\42\42\7\7\7\32zzzzz" + 704 "zzzzzzz\2\2\20\40\43\43\43\43\43" + 705 "\5\5\5\43\43\43\5\5\5\27\42\42\42\42\42\42" + 706 "\42\42\42\17\13\13\5\5\5\5\5\42z\5\5\5" + 707 "\5\5\5\5\5\5\30\30\30\30\30\30\30\30\30\30" + 708 "\5\5\5\5\5z\42\42\42\13\13\13\13zzz" + 709 "zzzzzz\7\7\7\7\7\7\7\7\7\7" + 710 "\7\7\7\7zz\5\5\5\14\14\14zzz\5" + 711 "\36\36\36\5\5\42\42\42\42\42\42\42\42\42\42\42" + 712 "\42\42\42\42\36\35\36\35zzzzzzzz" + 713 "zzzzz\43\43\5\5\5\5\5\5\5\5\5" + 714 "\42\42\42\42\42zzzzz\40\23zzzz" + 715 "zzzzz\5\5\5\5\5\5\5\5\5\5\5" + 716 "\5\5\5\43\43\5\43\5\5\5\5\5\5\5\5\5" + 717 "\7\7\7\7\7\42\42\42\42\42\42\42\42z\36\32" + 718 "zz\5\42\42\42z\42\42zzzzz\42\42" + 719 "\2\40\5\5\5\5z\5\5\5z\5\5\5\5\5" + 720 "\5\5zz\27\27\27zzzz\23\2\2\40\21" + 721 "\21\43\43\43\43\43\43\43\43\43\43\43\42\42\42\42" + 722 "\42\42\37zzzzzzzzz\3\3\3\3" + 723 "\3\3\3\3\3\3\3\3\3\3\30\30\30\30\30\30" + 724 "\30\30\30\30zzzzzzzzzzzz" + 725 "zzz\31\2\2\40\43\43\43\43\43\43\43\43\43" + 726 "\43\5\5\5\42\42\42\42\42\42\42\42\42\37\27z" + 727 "zzzz\2\2\40\43\43\43\43\5\5\5\5\5" + 728 "\5\5\5\5\42\42\42\23\32z\30\30\30\30\30\30" + 729 "\30\30\30\30zzzz\5\42\42zzzzz" + 730 "zzzz\41\41\41\41\41\5\5\5\5\5\5\5" + 731 "\5\5\5\5\27zzzzzzzzzzz" + 732 "z\5\5\5\42\42\42\42\42\42\42\42\42\42\42\42" + 733 "\42\37\1\16\16zzzzz\34\27\42\42zz" + 734 "z\42\42\42\42\2\37\27\22zzzzzz\4" + 735 "z\43\43\43\43\5\5\5z\5z\5\5\5\5z" + 736 "\5\5\5\5\5\5\5\5\5zzzzzzz" + 737 "\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\2" + 738 "\42\42\42\42\42\42\42\42\42\27\32zzzzz" + 739 "\2\2\2\40z\43\43\43\43\43\43\43\43zz\43" + 740 "\5z\5\5z\5\5\5\5\5z\27\27\1\42\42" + 741 "zzzzzzz\42zzzzzz\2\2" + 742 "\43\43\42\42zz\4\4\4\4\4\4\4zzz" + 743 "\5\5\5\5\5\42\42\42\42\42\42\42\42\42\42\42" + 744 "\37\2\2\40\27\1zzzzzzzz\30\30" + 745 "\30\30\30\30\30\30\30\30zzzz\34z\43\43" + 746 "\43\43\43\43\43\43\43\43\43\43\43\43\5\2\40\37" + 747 "\27\1zzzzzzzzzzz\42\42\42" + 748 "\42\42\42zz\42\42\42\42\2\2\40\37\27zz" + 749 "zzzzzzzzzzzzz\43\43\43" + 750 "\43\42\42zz\42\42\42\42\42\42\42\42\42\42\42" + 751 "\42\42\2\40\37\42zzzzzzzzzz" + 752 "zzzzz\5\5\5\5\5\5\5\5\5\5\5" + 753 "\2\40\42\42\42\42\42\42\37\27zzzzzz" + 754 "zz\5\5\5\5\5\5\5\5\5\5\5zz\13" + 755 "\13\13\42\42\42\42\42\42\42\42\42\42\42\32zz" + 756 "zz\30\30\30\30\30\30\30\30\30\30\30\30zz" + 757 "zz\42\42\42\42\42\42\42\2\40\37\27zzz" + 758 "zz\43\42\42\42\42\42\42\42\42\42\42\5\5\5" + 759 "\5\5\34\32\2\2\2\2\40\16\13\13\13\13\14z" + 760 "zzzz\14z\23zzzzzzzz\43" + 761 "\42\42\42\42\42\42\42\42\42\42\42\5\5\5\5z" + 762 "z\16\16\16\16\7\7\7\7\7\7\2\40\22\23z" + 763 "zz\1zz\43\43\43\43\43\43\43\43\43z\43" + 764 "\43\43\43\5\5\42\42\42\42\42\42\42z\42\42\42" + 765 "\42\2\2\40\37\1zzzzzzzzzz" + 766 "zzzzz\30\30\30\30\30\30\30\30\30\30\30" + 767 "\30\30zzz\17\17\17\17\17\17\17\17\17\17\17" + 768 "\17\17\17\42\42\42\42\42\2\2zzzzzz" + 769 "zzz\43\43\43\43\43\43\43z\43\43z\43\5" + 770 "\5\5\5\42\42\42\42\42\42zzz\42z\42\42" + 771 "z\42\2\40\27\42\32\23\15\13zzzzzz" + 772 "zz\43\43\43\43\43\43z\43\43z\43\43\5\5" + 773 "\5\5\5\5\5\5\5\5\42\42\42\42\42z\42\42" + 774 "\2\40\23zzzzzzzz\5\5\14\42\42" + 775 "\42\42zzzzzzzzzz"); 776 } 777 778 private static final class VoTrie { 779 static final CodePointTrie INSTANCE = makeTrie( 780 "\63\151\162\124\102z\114\4\74\3\14zzz\200\10" + 781 "zz\100z\131z\230zzzzzzzzz" + 782 "zzzzzz\320zzzzzzzzz" + 783 "zzzzzzzzzzzzzzzz" + 784 "zzzzzzzzzzzzzzzz" + 785 "zzzzzzzzzzzzzzzz" + 786 "zzzzzzzzzzzzzzzz" + 787 "zzzzzzzzzzzzzzzz" + 788 "zzzzzzzzzzzzzzzz" + 789 "\73\3\125\3\143\3\171\3\231\3\267\3\322\3\354\3" + 790 "\125\3\125\3\125\3\14\4\125\3\125\3\125\3\14\4" + 791 "\54\4\54\4\54\4\54\4\54\4\54\4\54\4\54\4" + 792 "\54\4\54\4\54\4\54\4\54\4\54\4\54\4\54\4" + 793 "\54\4\54\4\54\4\54\4\54\4\54\4\54\4\54\4" + 794 "\54\4\54\4\54\4\54\4\54\4\54\4\54\4\54\4" + 795 "\54\4\54\4\54\4\54\4\54\4\54\4\54\4\54\4" + 796 "\54\4\54\4\54\4\54\4\125\3\125\3\125\3\14\4" + 797 "\125\3\125\3\125\3\14\4zz\20z\40z\60z" + 798 "\100z\120z\140z\160z\131z\151z\171z\211z" + 799 "\230z\250z\270z\310zzz\20z\40z\60z" + 800 "zz\20z\40z\60zzz\20z\40z\60z" + 801 "zz\20z\40z\60z\320z\340z\360zz\1" + 802 "zz\20z\40z\60zzz\20z\40z\60z" + 803 "zz\20z\40z\60zzz\20z\40z\60z" + 804 "zz\20z\40z\60zzz\20z\40z\60z" + 805 "zz\20z\40z\60zzz\20z\40z\60z" + 806 "zzzzzzzzzzzzzzzz" + 807 "zzzzzzzzzzzzzzzz" + 808 "\20\1\20\1\20\1\20\1\20\1\20\1\20\1\20\1" + 809 "\20\1\20\1\20\1\20\1\20\1\20\1\20\1\20\1" + 810 "\17\1\20\1\20\1\20\1\20\1\20\1\20\1\20\1" + 811 "\20\1\20\1\20\1\20\1\20\1\20\1\20\1\20\1" + 812 "\20\1\20\1\20\1\20\1\20\1\20\1\20\1\20\1" + 813 "\20\1\20\1\20\1\20\1\20\1\20\1\20\1\20\1" + 814 "zzzzzzzzzzzzzzzz" + 815 "zzzzzzzzzzzzzzzz" + 816 "zzzzzzzzzzzzzzzz" + 817 "\20\1\20\1\20\1\20\1\20\1zzzzzz" + 818 "zzzzzzzzzzzzzzzz" + 819 "zzzzzzzzzz\251z\226z\36\1" + 820 "\54\1\256z\252zzzzzzzzzzz" + 821 "zz\3\1\74\1zz\114\1\130\1\146\1\13\1" + 822 "\165\1\20\1\20\1\20\1\204\1zzzzzz" + 823 "zzzzzzzz\162zzz\366zzz" + 824 "zzzzzzzzzzzzzzzz" + 825 "zzzzzz\220\1\20\1\230\1zzzz" + 826 "zzzz\3\1\20\1\25\1zz\354z\250\1" + 827 "\266\1\16\1\20\1\20\1\306\1\20\1\20\1\20\1" + 828 "\20\1\20\1\20\1\20\1\20\1\20\1\20\1\20\1" + 829 "\20\1\20\1zzzzzzzzzzzz" + 830 "zzzzzzzz\20\1\20\1\20\1\20\1" + 831 "\20\1\20\1\26\1\20\1\20\1\20\1\20\1\20\1" + 832 "\20\1\20\1\20\1\20\1\20\1\20\1\20\1\20\1" + 833 "\20\1\20\1\20\1\20\1\20\1\20\1\20\1\30\1" + 834 "\12\1\20\1\322\1zzzzzzzzzz" + 835 "zzzzzzzzzzzzzzzz" + 836 "zzzzzzzz\16\1\20\1zzzz" + 837 "\26\1zzzzzzzzzz\10\1\20\1" + 838 "\342\1\24\1\20\1zzzzzzzzzz" + 839 "zzzzzz\20\1\20\1\20\1\20\1\20\1" + 840 "\20\1\20\1\20\1\20\1\20\1\20\1\20\1\20\1" + 841 "\20\1\20\1\20\1\20\1\20\1\20\1\20\1\20\1" + 842 "\20\1\20\1\20\1\361\1\377\1\20\1\16\2\35\2" + 843 "\20\1\52\2\20\1\67\2\106\2\126\2\20\1\52\2" + 844 "\20\1\67\2\141\2\20\1\20\1\156\2\20\1\20\1" + 845 "\20\1\20\1\20\1\20\1\20\1\20\1\20\1\20\1" + 846 "\20\1\20\1\176\2\20\1\20\1\20\1\20\1\20\1" + 847 "\20\1\20\1\20\1\20\1\20\1\20\1\20\1\20\1" + 848 "\20\1\20\1\20\1\176\2\176\2\176\2\176\2\176\2" + 849 "\206\2\20\1\216\2\20\1\20\1\20\1\20\1\20\1" + 850 "\20\1\20\1\20\1\20\1\20\1\20\1\20\1\20\1" + 851 "\20\1\20\1\20\1\20\1\20\1\20\1\20\1\20\1" + 852 "\20\1\20\1\20\1\20\1\20\1\20\1\20\1\20\1" + 853 "\20\1\20\1\20\1zzzzzzzzzz" + 854 "zzzzzzzzzzzzzzzz" + 855 "zzzzzzzzzzzzzzzz" + 856 "zz\20\1\20\1zzzzzzzzzz" + 857 "zzzzzz\20\1zz\20\1\27\1\233\2" + 858 "\252\2zzzzzzzzzzzzzz" + 859 "zzzz\272\2\311\2\20\1\331\2\20\1\351\2" + 860 "\370\2zzzzzzzzzzzzzz" + 861 "\10\3\30\3zzzzzzzzzzzz" + 862 "zzzzzzzzzzzzzzzz" + 863 "zzzzzzzzzzzzzzzz" + 864 "zzzz\20\1\20\1zzzzzzzz" + 865 "zzzzzzzzzzzzzzzz" + 866 "zzzzzzzzzzzzzzzz" + 867 "zzzzzzzz\20\1\20\1\20\1\20\1" + 868 "\20\1\20\1\20\1\20\1zzzzzzzz" + 869 "zzzzzzzzzzzzzzzz" + 870 "zzzzzzzzzzzzzzzz" + 871 "zzzzzzzzzzzzzzzz" + 872 "zzzz\20\1\20\1\20\1\20\1\20\1\20\1" + 873 "\20\1\20\1\20\1\20\1\20\1\20\1\20\1\20\1" + 874 "\20\1\20\1\20\1\20\1\20\1zzzzzz" + 875 "zz\20\1\20\1\20\1\20\1\20\1\20\1\20\1" + 876 "\20\1\20\1zzzzzzzzzzzz" + 877 "zzzzzzzzzzzzzzzz" + 878 "\20\1\20\1\20\1\20\1\20\1\20\1\20\1\20\1" + 879 "\20\1\20\1zzzzzzzzzzzz" + 880 "zzzz\50\3\20\1\20\1\20\1\20\1\20\1" + 881 "\20\1\20\1\20\1\20\1\20\1\20\1\20\1\20\1" + 882 "\20\1\20\1\20\1\20\1\20\1\20\1\20\1\20\1" + 883 "\20\1\20\1\20\1\20\1\20\1\20\1\20\1\20\1" + 884 "\20\1\20\1\22\1\204z\230z\250z\250z\250z" + 885 "\250z\250z\250z\310z\14z\350zz\1\25\1" + 886 "\14z\14z\14z\64\1\123\1\162\1\221\1\14z" + 887 "\253\1\14z\313\1\353\1\13\2\43\2\43\2\43\2" + 888 "\43\2\43\2\43\2\43\2\43\2\43\2\43\2\43\2" + 889 "\43\2\43\2\43\2\43\2\43\2\43\2\43\2\43\2" + 890 "\43\2\43\2\43\2\43\2\43\2\43\2\43\2\43\2" + 891 "\43\2\43\2\43\2\43\2\43\2\373z\14z\103\2" + 892 "\14z\43\2\43\2\43\2\43\2\43\2\43\2\43\2" + 893 "\43\2\43\2\43\2\43\2\43\2\14z\14z\14z" + 894 "\14z\43\2\43\2\43\2\43\2\43\2\43\2\43\2" + 895 "\43\2\43\2\43\2\43\2\43\2\43\2\370z\14z" + 896 "\142\2\14z\14z\14z\14z\202\2\14z\14z" + 897 "\14z\14z\14z\234\2\14z\14z\375z\14z" + 898 "\14z\14z\14z\14z\14z\14z\14z\14z" + 899 "\14z\43\2\43\2\271\2\14z\14z\14z\14z" + 900 "\14z\43\2z\1\14z\14z\14z\14z\14z" + 901 "\14z\14z\14z\14z\14z\14z\14z\14z" + 902 "\14z\14z\14z\14z\14z\14z\274\2\43\2" + 903 "\43\2\43\2\43\2\43\2\43\2\43\2\43\2\370z" + 904 "\14z\14z\14z\14z\14z\14z\14z\14z" + 905 "\14z\14z\14z\14z\14z\14z\14z\14z" + 906 "\14z\14z\332\2\370z\14z\14z\14z\14z" + 907 "\14z\14z\14z\14z\43\2\372\2\14z\14z" + 908 "\43\2\375z\14z\14z\14z\14z\14z\14z" + 909 "\14z\14z\14z\14z\43\2\32\3\43\2\43\2" + 910 "\310z\265\2\14z\14z\43\2\43\2\43\2\43\2" + 911 "\43\2\43\2\43\2\43\2\43\2\43\2\43\2\43\2" + 912 "\43\2\43\2\43\2\43\2\43\2\43\2\43\2\43\2" + 913 "\43\2\43\2\43\2\43\2\43\2\43\2\43\2\43\2" + 914 "\43\2\43\2\43\2\33\3\14z\14z\14z\14z" + 915 "\14z\14z\14z\14z\14z\14z\14z\14z" + 916 "\14z\14z\14z\14z\14z\14z\14z\14z" + 917 "\14z\14z\14z\14z\14z\14z\14z\14z" + 918 "\14z\14z\14z\14zzzzzzzzz" + 919 "zzzzzzzzzzzzzzzz" + 920 "zzzzzzzzzzzzzzzz" + 921 "zzzzzzzzzzzzzzzz" + 922 "zzzzzzzzzzzzzzzz" + 923 "zzzzzzzzzzzzzzzz" + 924 "zzzzzzzzzzzzzzzz" + 925 "zzzzzzzzzzzzzzzz" + 926 "zzzzzzzz\3z\3zzzz\3" + 927 "zz\3zzzzzzzzzz\3\3\3" + 928 "zzzzzzzzzzzzzzzz" + 929 "zzzzzzz\3zzzzzzzz" + 930 "zzzzzzzzzzzzzzzz" + 931 "zzzzzzz\3zzzzzzzz" + 932 "zzzzzzzzzzzzzzzz" + 933 "zzzzzzzzzzzzzzzz" + 934 "zz\3\3zzzzzzzzzzzz" + 935 "zzzzzzzz\3\3\3\3\3\3\3\3" + 936 "\3\3\3\3\3\3\3\3zzzzzzzz" + 937 "z\3\3zzz\3zzzz\3\3\3zz" + 938 "zzzz\3z\3\3\3zzzzzzz" + 939 "zzzz\3\3z\3\3\3\3\3\3\3zz" + 940 "zzz\3\3z\3\3zzzzzz\3\3" + 941 "\3\3z\3z\3z\3zzzz\3zzz" + 942 "zz\3\3\3\3\3\3z\3\3z\3\3\3\3" + 943 "\3\3\3\3\3\3zz\3\3\3\3\3\3\3\3" + 944 "zzzz\3\3\3\3\3\1\1\3zzzz" + 945 "\3\3\3\3\3\3\3\3\3\3\3\3\3\3z\3" + 946 "\3\3\3\3\3\3\3\3\3\3zzzz\3\3" + 947 "\3z\3\3\3\3\3\3\3\3\3\3\3\3zz" + 948 "zzzzzzzzzz\3\3z\3\3\3" + 949 "\3\3\3\3\3\3\3\3\3\3\2\2\3\3\3\3" + 950 "\3\1\1\1\1\1\1\1\1\3\3\1\1\1\1\1" + 951 "\1\1\1\1\1\1\1\3\3\3\3\3\3\3\3\3" + 952 "\3\3\3\3\3\3\2\3\2\3\2\3\2\3\2\3" + 953 "\3\3\3\3\3\2\3\3\3\3\3\3\3\3\3\3" + 954 "\3\3\2\3\2\3\2\3\3\3\3\3\3\2\3\3" + 955 "\3\3\3\2\2\3\3\3\3\2\2\3\3\3\1\2" + 956 "\3\2\3\2\3\2\3\2\3\3\3\3\3\3\2\2" + 957 "\3\3\3\3\3\1\3\3\3\3\3\3\3\2\3\3" + 958 "\3\3\3\3\3\3\2\2\2\2\2\2\2\2\2\2" + 959 "\2\2\2\2\2\2\3\3\3\3\3\3\3\3\3\3" + 960 "\3\2\2\2\2\2\3\3\3\3\3z\1\1\1\1" + 961 "\1\1\3\3\3zzzz\3\3\3\3\3\3\3" + 962 "\3\3z\2\3\3\3\3\3\3\1\1\3\3\2z" + 963 "\2\3\3\3\3\3\3\3\3\3\3\1\1zzz" + 964 "\2\3\3\3\3\3\3\3\3\3\3\3\1\3\1\3" + 965 "\1\3\3\3\3\3\3\3\3\3\3\3\1\1\1\1" + 966 "\1zzzzzzzzzzzzzzz" + 967 "\3\3\3\1\3\3\3\3zzzzzzzz" + 968 "\3\3\3\3\3\3\3\3\3zzz\3\3zz" + 969 "\2\2\3\3\3\3\3\3\3\3\3\3\3\3\3\3" + 970 "zzzz"); 971 } 972 973 // public methods ---------------------------------------------------- 974 975 /** 976 * Gets the main property value for code point ch. 977 * @param ch code point whose property value is to be retrieved 978 * @return property value of code point 979 */ getProperty(int ch)980 public final int getProperty(int ch) 981 { 982 return m_trie_.get(ch); 983 } 984 985 /** 986 * Gets the unicode additional properties. 987 * Java version of C u_getUnicodeProperties(). 988 * @param codepoint codepoint whose additional properties is to be 989 * retrieved 990 * @param column The column index. 991 * @return unicode properties 992 */ getAdditional(int codepoint, int column)993 public int getAdditional(int codepoint, int column) { 994 assert column >= 0; 995 if (column >= m_additionalColumnsCount_) { 996 return 0; 997 } 998 return m_additionalVectors_[m_additionalTrie_.get(codepoint) + column]; 999 } 1000 1001 static final int MY_MASK = UCharacterProperty.TYPE_MASK 1002 & ((1<<UCharacterCategory.UPPERCASE_LETTER) | 1003 (1<<UCharacterCategory.LOWERCASE_LETTER) | 1004 (1<<UCharacterCategory.TITLECASE_LETTER) | 1005 (1<<UCharacterCategory.MODIFIER_LETTER) | 1006 (1<<UCharacterCategory.OTHER_LETTER)); 1007 1008 1009 /** 1010 * <p>Get the "age" of the code point.</p> 1011 * <p>The "age" is the Unicode version when the code point was first 1012 * designated (as a non-character or for Private Use) or assigned a 1013 * character.</p> 1014 * <p>This can be useful to avoid emitting code points to receiving 1015 * processes that do not accept newer characters.</p> 1016 * <p>The data is from the UCD file DerivedAge.txt.</p> 1017 * <p>This API does not check the validity of the codepoint.</p> 1018 * @param codepoint The code point. 1019 * @return the Unicode version number 1020 */ getAge(int codepoint)1021 public VersionInfo getAge(int codepoint) 1022 { 1023 int version = getAdditional(codepoint, 0) >> AGE_SHIFT_; 1024 return VersionInfo.getInstance( 1025 (version >> FIRST_NIBBLE_SHIFT_) & LAST_NIBBLE_MASK_, 1026 version & LAST_NIBBLE_MASK_, 0, 0); 1027 } 1028 1029 private static final int GC_CN_MASK = getMask(UCharacter.UNASSIGNED); 1030 private static final int GC_CC_MASK = getMask(UCharacter.CONTROL); 1031 private static final int GC_CS_MASK = getMask(UCharacter.SURROGATE); 1032 private static final int GC_ZS_MASK = getMask(UCharacter.SPACE_SEPARATOR); 1033 private static final int GC_ZL_MASK = getMask(UCharacter.LINE_SEPARATOR); 1034 private static final int GC_ZP_MASK = getMask(UCharacter.PARAGRAPH_SEPARATOR); 1035 /** Mask constant for multiple UCharCategory bits (Z Separators). */ 1036 private static final int GC_Z_MASK = GC_ZS_MASK|GC_ZL_MASK|GC_ZP_MASK; 1037 1038 /** 1039 * Checks if c is in 1040 * [^\p{space}\p{gc=Control}\p{gc=Surrogate}\p{gc=Unassigned}] 1041 * with space=\p{Whitespace} and Control=Cc. 1042 * Implements UCHAR_POSIX_GRAPH. 1043 * @internal 1044 */ isgraphPOSIX(int c)1045 private static final boolean isgraphPOSIX(int c) { 1046 /* \p{space}\p{gc=Control} == \p{gc=Z}\p{Control} */ 1047 /* comparing ==0 returns FALSE for the categories mentioned */ 1048 return (getMask(UCharacter.getType(c))& 1049 (GC_CC_MASK|GC_CS_MASK|GC_CN_MASK|GC_Z_MASK)) 1050 ==0; 1051 } 1052 1053 // binary properties --------------------------------------------------- *** 1054 1055 private class BinaryProperty { 1056 int column; // SRC_PROPSVEC column, or "source" if mask==0 1057 int mask; BinaryProperty(int column, int mask)1058 BinaryProperty(int column, int mask) { 1059 this.column=column; 1060 this.mask=mask; 1061 } BinaryProperty(int source)1062 BinaryProperty(int source) { 1063 this.column=source; 1064 this.mask=0; 1065 } getSource()1066 final int getSource() { 1067 return mask==0 ? column : SRC_PROPSVEC; 1068 } contains(int c)1069 boolean contains(int c) { 1070 // systematic, directly stored properties 1071 return (getAdditional(c, column)&mask)!=0; 1072 } 1073 } 1074 1075 private class CaseBinaryProperty extends BinaryProperty { // case mapping properties 1076 int which; CaseBinaryProperty(int which)1077 CaseBinaryProperty(int which) { 1078 super(SRC_CASE); 1079 this.which=which; 1080 } 1081 @Override contains(int c)1082 boolean contains(int c) { 1083 return UCaseProps.INSTANCE.hasBinaryProperty(c, which); 1084 } 1085 } 1086 1087 private class NormInertBinaryProperty extends BinaryProperty { // UCHAR_NF*_INERT properties 1088 int which; NormInertBinaryProperty(int source, int which)1089 NormInertBinaryProperty(int source, int which) { 1090 super(source); 1091 this.which=which; 1092 } 1093 @Override contains(int c)1094 boolean contains(int c) { 1095 return Norm2AllModes.getN2WithImpl(which-UProperty.NFD_INERT).isInert(c); 1096 } 1097 } 1098 1099 BinaryProperty[] binProps={ 1100 /* 1101 * Binary-property implementations must be in order of corresponding UProperty, 1102 * and there must be exactly one entry per binary UProperty. 1103 */ 1104 new BinaryProperty(1, (1<<ALPHABETIC_PROPERTY_)), 1105 new BinaryProperty(1, (1<<ASCII_HEX_DIGIT_PROPERTY_)), 1106 new BinaryProperty(SRC_BIDI) { // UCHAR_BIDI_CONTROL 1107 @Override 1108 boolean contains(int c) { 1109 return UBiDiProps.INSTANCE.isBidiControl(c); 1110 } 1111 }, 1112 new BinaryProperty(SRC_BIDI) { // UCHAR_BIDI_MIRRORED 1113 @Override 1114 boolean contains(int c) { 1115 return UBiDiProps.INSTANCE.isMirrored(c); 1116 } 1117 }, 1118 new BinaryProperty(1, (1<<DASH_PROPERTY_)), 1119 new BinaryProperty(1, (1<<DEFAULT_IGNORABLE_CODE_POINT_PROPERTY_)), 1120 new BinaryProperty(1, (1<<DEPRECATED_PROPERTY_)), 1121 new BinaryProperty(1, (1<<DIACRITIC_PROPERTY_)), 1122 new BinaryProperty(1, (1<<EXTENDER_PROPERTY_)), 1123 new BinaryProperty(SRC_NFC) { // UCHAR_FULL_COMPOSITION_EXCLUSION 1124 @Override 1125 boolean contains(int c) { 1126 // By definition, Full_Composition_Exclusion is the same as NFC_QC=No. 1127 Normalizer2Impl impl=Norm2AllModes.getNFCInstance().impl; 1128 return impl.isCompNo(impl.getNorm16(c)); 1129 } 1130 }, 1131 new BinaryProperty(1, (1<<GRAPHEME_BASE_PROPERTY_)), 1132 new BinaryProperty(1, (1<<GRAPHEME_EXTEND_PROPERTY_)), 1133 new BinaryProperty(1, (1<<GRAPHEME_LINK_PROPERTY_)), 1134 new BinaryProperty(1, (1<<HEX_DIGIT_PROPERTY_)), 1135 new BinaryProperty(1, (1<<HYPHEN_PROPERTY_)), 1136 new BinaryProperty(1, (1<<ID_CONTINUE_PROPERTY_)), 1137 new BinaryProperty(1, (1<<ID_START_PROPERTY_)), 1138 new BinaryProperty(1, (1<<IDEOGRAPHIC_PROPERTY_)), 1139 new BinaryProperty(1, (1<<IDS_BINARY_OPERATOR_PROPERTY_)), 1140 new BinaryProperty(1, (1<<IDS_TRINARY_OPERATOR_PROPERTY_)), 1141 new BinaryProperty(SRC_BIDI) { // UCHAR_JOIN_CONTROL 1142 @Override 1143 boolean contains(int c) { 1144 return UBiDiProps.INSTANCE.isJoinControl(c); 1145 } 1146 }, 1147 new BinaryProperty(1, (1<<LOGICAL_ORDER_EXCEPTION_PROPERTY_)), 1148 new CaseBinaryProperty(UProperty.LOWERCASE), 1149 new BinaryProperty(1, (1<<MATH_PROPERTY_)), 1150 new BinaryProperty(1, (1<<NONCHARACTER_CODE_POINT_PROPERTY_)), 1151 new BinaryProperty(1, (1<<QUOTATION_MARK_PROPERTY_)), 1152 new BinaryProperty(1, (1<<RADICAL_PROPERTY_)), 1153 new CaseBinaryProperty(UProperty.SOFT_DOTTED), 1154 new BinaryProperty(1, (1<<TERMINAL_PUNCTUATION_PROPERTY_)), 1155 new BinaryProperty(1, (1<<UNIFIED_IDEOGRAPH_PROPERTY_)), 1156 new CaseBinaryProperty(UProperty.UPPERCASE), 1157 new BinaryProperty(1, (1<<WHITE_SPACE_PROPERTY_)), 1158 new BinaryProperty(1, (1<<XID_CONTINUE_PROPERTY_)), 1159 new BinaryProperty(1, (1<<XID_START_PROPERTY_)), 1160 new CaseBinaryProperty(UProperty.CASE_SENSITIVE), 1161 new BinaryProperty(1, (1<<S_TERM_PROPERTY_)), 1162 new BinaryProperty(1, (1<<VARIATION_SELECTOR_PROPERTY_)), 1163 new NormInertBinaryProperty(SRC_NFC, UProperty.NFD_INERT), 1164 new NormInertBinaryProperty(SRC_NFKC, UProperty.NFKD_INERT), 1165 new NormInertBinaryProperty(SRC_NFC, UProperty.NFC_INERT), 1166 new NormInertBinaryProperty(SRC_NFKC, UProperty.NFKC_INERT), 1167 new BinaryProperty(SRC_NFC_CANON_ITER) { // UCHAR_SEGMENT_STARTER 1168 @Override 1169 boolean contains(int c) { 1170 return Norm2AllModes.getNFCInstance().impl. 1171 ensureCanonIterData().isCanonSegmentStarter(c); 1172 } 1173 }, 1174 new BinaryProperty(1, (1<<PATTERN_SYNTAX)), 1175 new BinaryProperty(1, (1<<PATTERN_WHITE_SPACE)), 1176 new BinaryProperty(SRC_CHAR_AND_PROPSVEC) { // UCHAR_POSIX_ALNUM 1177 @Override 1178 boolean contains(int c) { 1179 return UCharacter.isUAlphabetic(c) || UCharacter.isDigit(c); 1180 } 1181 }, 1182 new BinaryProperty(SRC_CHAR) { // UCHAR_POSIX_BLANK 1183 @Override 1184 boolean contains(int c) { 1185 // "horizontal space" 1186 if(c<=0x9f) { 1187 return c==9 || c==0x20; /* TAB or SPACE */ 1188 } else { 1189 /* Zs */ 1190 return UCharacter.getType(c)==UCharacter.SPACE_SEPARATOR; 1191 } 1192 } 1193 }, 1194 new BinaryProperty(SRC_CHAR) { // UCHAR_POSIX_GRAPH 1195 @Override 1196 boolean contains(int c) { 1197 return isgraphPOSIX(c); 1198 } 1199 }, 1200 new BinaryProperty(SRC_CHAR) { // UCHAR_POSIX_PRINT 1201 @Override 1202 boolean contains(int c) { 1203 /* 1204 * Checks if codepoint is in \p{graph}\p{blank} - \p{cntrl}. 1205 * 1206 * The only cntrl character in graph+blank is TAB (in blank). 1207 * Here we implement (blank-TAB)=Zs instead of calling u_isblank(). 1208 */ 1209 return (UCharacter.getType(c)==UCharacter.SPACE_SEPARATOR) || isgraphPOSIX(c); 1210 } 1211 }, 1212 new BinaryProperty(SRC_CHAR) { // UCHAR_POSIX_XDIGIT 1213 @Override 1214 boolean contains(int c) { 1215 /* check ASCII and Fullwidth ASCII a-fA-F */ 1216 if( 1217 (c<=0x66 && c>=0x41 && (c<=0x46 || c>=0x61)) || 1218 (c>=0xff21 && c<=0xff46 && (c<=0xff26 || c>=0xff41)) 1219 ) { 1220 return true; 1221 } 1222 return UCharacter.getType(c)==UCharacter.DECIMAL_DIGIT_NUMBER; 1223 } 1224 }, 1225 new CaseBinaryProperty(UProperty.CASED), 1226 new CaseBinaryProperty(UProperty.CASE_IGNORABLE), 1227 new CaseBinaryProperty(UProperty.CHANGES_WHEN_LOWERCASED), 1228 new CaseBinaryProperty(UProperty.CHANGES_WHEN_UPPERCASED), 1229 new CaseBinaryProperty(UProperty.CHANGES_WHEN_TITLECASED), 1230 new BinaryProperty(SRC_CASE_AND_NORM) { // UCHAR_CHANGES_WHEN_CASEFOLDED 1231 @Override 1232 boolean contains(int c) { 1233 String nfd=Norm2AllModes.getNFCInstance().impl.getDecomposition(c); 1234 if(nfd!=null) { 1235 /* c has a decomposition */ 1236 c=nfd.codePointAt(0); 1237 if(Character.charCount(c)!=nfd.length()) { 1238 /* multiple code points */ 1239 c=-1; 1240 } 1241 } else if(c<0) { 1242 return false; /* protect against bad input */ 1243 } 1244 if(c>=0) { 1245 /* single code point */ 1246 UCaseProps csp=UCaseProps.INSTANCE; 1247 UCaseProps.dummyStringBuilder.setLength(0); 1248 return csp.toFullFolding(c, UCaseProps.dummyStringBuilder, 1249 UCharacter.FOLD_CASE_DEFAULT)>=0; 1250 } else { 1251 String folded=UCharacter.foldCase(nfd, true); 1252 return !folded.equals(nfd); 1253 } 1254 } 1255 }, 1256 new CaseBinaryProperty(UProperty.CHANGES_WHEN_CASEMAPPED), 1257 new BinaryProperty(SRC_NFKC_CF) { // UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED 1258 @Override 1259 boolean contains(int c) { 1260 Normalizer2Impl kcf=Norm2AllModes.getNFKC_CFInstance().impl; 1261 String src=UTF16.valueOf(c); 1262 StringBuilder dest=new StringBuilder(); 1263 // Small destCapacity for NFKC_CF(c). 1264 Normalizer2Impl.ReorderingBuffer buffer=new Normalizer2Impl.ReorderingBuffer(kcf, dest, 5); 1265 kcf.compose(src, 0, src.length(), false, true, buffer); 1266 return !Normalizer2Impl.UTF16Plus.equal(dest, src); 1267 } 1268 }, 1269 new BinaryProperty(2, 1<<PROPS_2_EMOJI), 1270 new BinaryProperty(2, 1<<PROPS_2_EMOJI_PRESENTATION), 1271 new BinaryProperty(2, 1<<PROPS_2_EMOJI_MODIFIER), 1272 new BinaryProperty(2, 1<<PROPS_2_EMOJI_MODIFIER_BASE), 1273 new BinaryProperty(2, 1<<PROPS_2_EMOJI_COMPONENT), 1274 new BinaryProperty(SRC_PROPSVEC) { // REGIONAL_INDICATOR 1275 // Property starts are a subset of lb=RI etc. 1276 @Override 1277 boolean contains(int c) { 1278 return 0x1F1E6<=c && c<=0x1F1FF; 1279 } 1280 }, 1281 new BinaryProperty(1, 1<<PREPENDED_CONCATENATION_MARK), 1282 new BinaryProperty(2, 1<<PROPS_2_EXTENDED_PICTOGRAPHIC), 1283 }; 1284 hasBinaryProperty(int c, int which)1285 public boolean hasBinaryProperty(int c, int which) { 1286 if(which<UProperty.BINARY_START || UProperty.BINARY_LIMIT<=which) { 1287 // not a known binary property 1288 return false; 1289 } else { 1290 return binProps[which].contains(c); 1291 } 1292 } 1293 1294 // int-value and enumerated properties --------------------------------- *** 1295 getType(int c)1296 public int getType(int c) { 1297 return getProperty(c)&TYPE_MASK; 1298 } 1299 1300 /* 1301 * Map some of the Grapheme Cluster Break values to Hangul Syllable Types. 1302 * Hangul_Syllable_Type is fully redundant with a subset of Grapheme_Cluster_Break. 1303 */ 1304 private static final int /* UHangulSyllableType */ gcbToHst[]={ 1305 HangulSyllableType.NOT_APPLICABLE, /* U_GCB_OTHER */ 1306 HangulSyllableType.NOT_APPLICABLE, /* U_GCB_CONTROL */ 1307 HangulSyllableType.NOT_APPLICABLE, /* U_GCB_CR */ 1308 HangulSyllableType.NOT_APPLICABLE, /* U_GCB_EXTEND */ 1309 HangulSyllableType.LEADING_JAMO, /* U_GCB_L */ 1310 HangulSyllableType.NOT_APPLICABLE, /* U_GCB_LF */ 1311 HangulSyllableType.LV_SYLLABLE, /* U_GCB_LV */ 1312 HangulSyllableType.LVT_SYLLABLE, /* U_GCB_LVT */ 1313 HangulSyllableType.TRAILING_JAMO, /* U_GCB_T */ 1314 HangulSyllableType.VOWEL_JAMO /* U_GCB_V */ 1315 /* 1316 * Omit GCB values beyond what we need for hst. 1317 * The code below checks for the array length. 1318 */ 1319 }; 1320 1321 private class IntProperty { 1322 int column; // SRC_PROPSVEC column, or "source" if mask==0 1323 int mask; 1324 int shift; IntProperty(int column, int mask, int shift)1325 IntProperty(int column, int mask, int shift) { 1326 this.column=column; 1327 this.mask=mask; 1328 this.shift=shift; 1329 } IntProperty(int source)1330 IntProperty(int source) { 1331 this.column=source; 1332 this.mask=0; 1333 } getSource()1334 final int getSource() { 1335 return mask==0 ? column : SRC_PROPSVEC; 1336 } getValue(int c)1337 int getValue(int c) { 1338 // systematic, directly stored properties 1339 return (getAdditional(c, column)&mask)>>>shift; 1340 } getMaxValue(int which)1341 int getMaxValue(int which) { 1342 return (getMaxValues(column)&mask)>>>shift; 1343 } 1344 } 1345 1346 private class BiDiIntProperty extends IntProperty { BiDiIntProperty()1347 BiDiIntProperty() { 1348 super(SRC_BIDI); 1349 } 1350 @Override getMaxValue(int which)1351 int getMaxValue(int which) { 1352 return UBiDiProps.INSTANCE.getMaxValue(which); 1353 } 1354 } 1355 1356 private class CombiningClassIntProperty extends IntProperty { CombiningClassIntProperty(int source)1357 CombiningClassIntProperty(int source) { 1358 super(source); 1359 } 1360 @Override getMaxValue(int which)1361 int getMaxValue(int which) { 1362 return 0xff; 1363 } 1364 } 1365 1366 private class NormQuickCheckIntProperty extends IntProperty { // UCHAR_NF*_QUICK_CHECK properties 1367 int which; 1368 int max; NormQuickCheckIntProperty(int source, int which, int max)1369 NormQuickCheckIntProperty(int source, int which, int max) { 1370 super(source); 1371 this.which=which; 1372 this.max=max; 1373 } 1374 @Override getValue(int c)1375 int getValue(int c) { 1376 return Norm2AllModes.getN2WithImpl(which-UProperty.NFD_QUICK_CHECK).getQuickCheck(c); 1377 } 1378 @Override getMaxValue(int which)1379 int getMaxValue(int which) { 1380 return max; 1381 } 1382 } 1383 1384 IntProperty intProps[]={ 1385 new BiDiIntProperty() { // BIDI_CLASS 1386 @Override 1387 int getValue(int c) { 1388 return UBiDiProps.INSTANCE.getClass(c); 1389 } 1390 }, 1391 new IntProperty(0, BLOCK_MASK_, BLOCK_SHIFT_), 1392 new CombiningClassIntProperty(SRC_NFC) { // CANONICAL_COMBINING_CLASS 1393 @Override 1394 int getValue(int c) { 1395 return Normalizer2.getNFDInstance().getCombiningClass(c); 1396 } 1397 }, 1398 new IntProperty(2, DECOMPOSITION_TYPE_MASK_, 0), 1399 new IntProperty(0, EAST_ASIAN_MASK_, EAST_ASIAN_SHIFT_), 1400 new IntProperty(SRC_CHAR) { // GENERAL_CATEGORY 1401 @Override 1402 int getValue(int c) { 1403 return getType(c); 1404 } 1405 @Override 1406 int getMaxValue(int which) { 1407 return UCharacterCategory.CHAR_CATEGORY_COUNT-1; 1408 } 1409 }, 1410 new BiDiIntProperty() { // JOINING_GROUP 1411 @Override 1412 int getValue(int c) { 1413 return UBiDiProps.INSTANCE.getJoiningGroup(c); 1414 } 1415 }, 1416 new BiDiIntProperty() { // JOINING_TYPE 1417 @Override 1418 int getValue(int c) { 1419 return UBiDiProps.INSTANCE.getJoiningType(c); 1420 } 1421 }, 1422 new IntProperty(2, LB_MASK, LB_SHIFT), // LINE_BREAK 1423 new IntProperty(SRC_CHAR) { // NUMERIC_TYPE 1424 @Override 1425 int getValue(int c) { 1426 return ntvGetType(getNumericTypeValue(getProperty(c))); 1427 } 1428 @Override 1429 int getMaxValue(int which) { 1430 return NumericType.COUNT-1; 1431 } 1432 }, 1433 new IntProperty(0, SCRIPT_MASK_, 0) { 1434 @Override 1435 int getValue(int c) { 1436 return UScript.getScript(c); 1437 } 1438 }, 1439 new IntProperty(SRC_PROPSVEC) { // HANGUL_SYLLABLE_TYPE 1440 @Override 1441 int getValue(int c) { 1442 /* see comments on gcbToHst[] above */ 1443 int gcb=(getAdditional(c, 2)&GCB_MASK)>>>GCB_SHIFT; 1444 if(gcb<gcbToHst.length) { 1445 return gcbToHst[gcb]; 1446 } else { 1447 return HangulSyllableType.NOT_APPLICABLE; 1448 } 1449 } 1450 @Override 1451 int getMaxValue(int which) { 1452 return HangulSyllableType.COUNT-1; 1453 } 1454 }, 1455 // max=1=YES -- these are never "maybe", only "no" or "yes" 1456 new NormQuickCheckIntProperty(SRC_NFC, UProperty.NFD_QUICK_CHECK, 1), 1457 new NormQuickCheckIntProperty(SRC_NFKC, UProperty.NFKD_QUICK_CHECK, 1), 1458 // max=2=MAYBE 1459 new NormQuickCheckIntProperty(SRC_NFC, UProperty.NFC_QUICK_CHECK, 2), 1460 new NormQuickCheckIntProperty(SRC_NFKC, UProperty.NFKC_QUICK_CHECK, 2), 1461 new CombiningClassIntProperty(SRC_NFC) { // LEAD_CANONICAL_COMBINING_CLASS 1462 @Override 1463 int getValue(int c) { 1464 return Norm2AllModes.getNFCInstance().impl.getFCD16(c)>>8; 1465 } 1466 }, 1467 new CombiningClassIntProperty(SRC_NFC) { // TRAIL_CANONICAL_COMBINING_CLASS 1468 @Override 1469 int getValue(int c) { 1470 return Norm2AllModes.getNFCInstance().impl.getFCD16(c)&0xff; 1471 } 1472 }, 1473 new IntProperty(2, GCB_MASK, GCB_SHIFT), // GRAPHEME_CLUSTER_BREAK 1474 new IntProperty(2, SB_MASK, SB_SHIFT), // SENTENCE_BREAK 1475 new IntProperty(2, WB_MASK, WB_SHIFT), // WORD_BREAK 1476 new BiDiIntProperty() { // BIDI_PAIRED_BRACKET_TYPE 1477 @Override 1478 int getValue(int c) { 1479 return UBiDiProps.INSTANCE.getPairedBracketType(c); 1480 } 1481 }, 1482 new IntProperty(SRC_INPC) { 1483 @Override 1484 int getValue(int c) { 1485 return InPCTrie.INSTANCE.get(c); 1486 } 1487 @Override 1488 int getMaxValue(int which) { 1489 return 14; 1490 } 1491 }, 1492 new IntProperty(SRC_INSC) { 1493 @Override 1494 int getValue(int c) { 1495 return InSCTrie.INSTANCE.get(c); 1496 } 1497 @Override 1498 int getMaxValue(int which) { 1499 return 35; 1500 } 1501 }, 1502 new IntProperty(SRC_VO) { 1503 @Override 1504 int getValue(int c) { 1505 return VoTrie.INSTANCE.get(c); 1506 } 1507 @Override 1508 int getMaxValue(int which) { 1509 return 3; 1510 } 1511 }, 1512 }; 1513 getIntPropertyValue(int c, int which)1514 public int getIntPropertyValue(int c, int which) { 1515 if(which<UProperty.INT_START) { 1516 if(UProperty.BINARY_START<=which && which<UProperty.BINARY_LIMIT) { 1517 return binProps[which].contains(c) ? 1 : 0; 1518 } 1519 } else if(which<UProperty.INT_LIMIT) { 1520 return intProps[which-UProperty.INT_START].getValue(c); 1521 } else if (which == UProperty.GENERAL_CATEGORY_MASK) { 1522 return getMask(getType(c)); 1523 } 1524 return 0; // undefined 1525 } 1526 getIntPropertyMaxValue(int which)1527 public int getIntPropertyMaxValue(int which) { 1528 if(which<UProperty.INT_START) { 1529 if(UProperty.BINARY_START<=which && which<UProperty.BINARY_LIMIT) { 1530 return 1; // maximum TRUE for all binary properties 1531 } 1532 } else if(which<UProperty.INT_LIMIT) { 1533 return intProps[which-UProperty.INT_START].getMaxValue(which); 1534 } 1535 return -1; // undefined 1536 } 1537 getSource(int which)1538 final int getSource(int which) { 1539 if(which<UProperty.BINARY_START) { 1540 return SRC_NONE; /* undefined */ 1541 } else if(which<UProperty.BINARY_LIMIT) { 1542 return binProps[which].getSource(); 1543 } else if(which<UProperty.INT_START) { 1544 return SRC_NONE; /* undefined */ 1545 } else if(which<UProperty.INT_LIMIT) { 1546 return intProps[which-UProperty.INT_START].getSource(); 1547 } else if(which<UProperty.STRING_START) { 1548 switch(which) { 1549 case UProperty.GENERAL_CATEGORY_MASK: 1550 case UProperty.NUMERIC_VALUE: 1551 return SRC_CHAR; 1552 1553 default: 1554 return SRC_NONE; 1555 } 1556 } else if(which<UProperty.STRING_LIMIT) { 1557 switch(which) { 1558 case UProperty.AGE: 1559 return SRC_PROPSVEC; 1560 1561 case UProperty.BIDI_MIRRORING_GLYPH: 1562 return SRC_BIDI; 1563 1564 case UProperty.CASE_FOLDING: 1565 case UProperty.LOWERCASE_MAPPING: 1566 case UProperty.SIMPLE_CASE_FOLDING: 1567 case UProperty.SIMPLE_LOWERCASE_MAPPING: 1568 case UProperty.SIMPLE_TITLECASE_MAPPING: 1569 case UProperty.SIMPLE_UPPERCASE_MAPPING: 1570 case UProperty.TITLECASE_MAPPING: 1571 case UProperty.UPPERCASE_MAPPING: 1572 return SRC_CASE; 1573 1574 case UProperty.ISO_COMMENT: 1575 case UProperty.NAME: 1576 case UProperty.UNICODE_1_NAME: 1577 return SRC_NAMES; 1578 1579 default: 1580 return SRC_NONE; 1581 } 1582 } else { 1583 switch(which) { 1584 case UProperty.SCRIPT_EXTENSIONS: 1585 return SRC_PROPSVEC; 1586 default: 1587 return SRC_NONE; /* undefined */ 1588 } 1589 } 1590 } 1591 1592 /** 1593 * <p> 1594 * Unicode property names and property value names are compared 1595 * "loosely". Property[Value]Aliases.txt say: 1596 * <quote> 1597 * "With loose matching of property names, the case distinctions, 1598 * whitespace, and '_' are ignored." 1599 * </quote> 1600 * </p> 1601 * <p> 1602 * This function does just that, for ASCII (char *) name strings. 1603 * It is almost identical to ucnv_compareNames() but also ignores 1604 * ASCII White_Space characters (U+0009..U+000d). 1605 * </p> 1606 * @param name1 name to compare 1607 * @param name2 name to compare 1608 * @return 0 if names are equal, < 0 if name1 is less than name2 and > 0 1609 * if name1 is greater than name2. 1610 */ 1611 /* to be implemented in 2.4 1612 * public static int comparePropertyNames(String name1, String name2) 1613 { 1614 int result = 0; 1615 int i1 = 0; 1616 int i2 = 0; 1617 while (true) { 1618 char ch1 = 0; 1619 char ch2 = 0; 1620 // Ignore delimiters '-', '_', and ASCII White_Space 1621 if (i1 < name1.length()) { 1622 ch1 = name1.charAt(i1 ++); 1623 } 1624 while (ch1 == '-' || ch1 == '_' || ch1 == ' ' || ch1 == '\t' 1625 || ch1 == '\n' // synwee what is || ch1 == '\v' 1626 || ch1 == '\f' || ch1=='\r') { 1627 if (i1 < name1.length()) { 1628 ch1 = name1.charAt(i1 ++); 1629 } 1630 else { 1631 ch1 = 0; 1632 } 1633 } 1634 if (i2 < name2.length()) { 1635 ch2 = name2.charAt(i2 ++); 1636 } 1637 while (ch2 == '-' || ch2 == '_' || ch2 == ' ' || ch2 == '\t' 1638 || ch2 == '\n' // synwee what is || ch1 == '\v' 1639 || ch2 == '\f' || ch2=='\r') { 1640 if (i2 < name2.length()) { 1641 ch2 = name2.charAt(i2 ++); 1642 } 1643 else { 1644 ch2 = 0; 1645 } 1646 } 1647 1648 // If we reach the ends of both strings then they match 1649 if (ch1 == 0 && ch2 == 0) { 1650 return 0; 1651 } 1652 1653 // Case-insensitive comparison 1654 if (ch1 != ch2) { 1655 result = Character.toLowerCase(ch1) 1656 - Character.toLowerCase(ch2); 1657 if (result != 0) { 1658 return result; 1659 } 1660 } 1661 } 1662 } 1663 */ 1664 1665 /** 1666 * Get the the maximum values for some enum/int properties. 1667 * @return maximum values for the integer properties. 1668 */ getMaxValues(int column)1669 public int getMaxValues(int column) 1670 { 1671 // return m_maxBlockScriptValue_; 1672 1673 switch(column) { 1674 case 0: 1675 return m_maxBlockScriptValue_; 1676 case 2: 1677 return m_maxJTGValue_; 1678 default: 1679 return 0; 1680 } 1681 } 1682 1683 /** 1684 * Gets the type mask 1685 * @param type character type 1686 * @return mask 1687 */ getMask(int type)1688 public static final int getMask(int type) 1689 { 1690 return 1 << type; 1691 } 1692 1693 1694 /** 1695 * Returns the digit values of characters like 'A' - 'Z', normal, 1696 * half-width and full-width. This method assumes that the other digit 1697 * characters are checked by the calling method. 1698 * @param ch character to test 1699 * @return -1 if ch is not a character of the form 'A' - 'Z', otherwise 1700 * its corresponding digit will be returned. 1701 */ getEuropeanDigit(int ch)1702 public static int getEuropeanDigit(int ch) { 1703 if ((ch > 0x7a && ch < 0xff21) 1704 || ch < 0x41 || (ch > 0x5a && ch < 0x61) 1705 || ch > 0xff5a || (ch > 0xff3a && ch < 0xff41)) { 1706 return -1; 1707 } 1708 if (ch <= 0x7a) { 1709 // ch >= 0x41 or ch < 0x61 1710 return ch + 10 - ((ch <= 0x5a) ? 0x41 : 0x61); 1711 } 1712 // ch >= 0xff21 1713 if (ch <= 0xff3a) { 1714 return ch + 10 - 0xff21; 1715 } 1716 // ch >= 0xff41 && ch <= 0xff5a 1717 return ch + 10 - 0xff41; 1718 } 1719 digit(int c)1720 public int digit(int c) { 1721 int value = getNumericTypeValue(getProperty(c)) - NTV_DECIMAL_START_; 1722 if(value<=9) { 1723 return value; 1724 } else { 1725 return -1; 1726 } 1727 } 1728 getNumericValue(int c)1729 public int getNumericValue(int c) { 1730 // slightly pruned version of getUnicodeNumericValue(), plus getEuropeanDigit() 1731 int ntv = getNumericTypeValue(getProperty(c)); 1732 1733 if(ntv==NTV_NONE_) { 1734 return getEuropeanDigit(c); 1735 } else if(ntv<NTV_DIGIT_START_) { 1736 /* decimal digit */ 1737 return ntv-NTV_DECIMAL_START_; 1738 } else if(ntv<NTV_NUMERIC_START_) { 1739 /* other digit */ 1740 return ntv-NTV_DIGIT_START_; 1741 } else if(ntv<NTV_FRACTION_START_) { 1742 /* small integer */ 1743 return ntv-NTV_NUMERIC_START_; 1744 } else if(ntv<NTV_LARGE_START_) { 1745 /* fraction */ 1746 return -2; 1747 } else if(ntv<NTV_BASE60_START_) { 1748 /* large, single-significant-digit integer */ 1749 int mant=(ntv>>5)-14; 1750 int exp=(ntv&0x1f)+2; 1751 if(exp<9 || (exp==9 && mant<=2)) { 1752 int numValue=mant; 1753 do { 1754 numValue*=10; 1755 } while(--exp>0); 1756 return numValue; 1757 } else { 1758 return -2; 1759 } 1760 } else if(ntv<NTV_FRACTION20_START_) { 1761 /* sexagesimal (base 60) integer */ 1762 int numValue=(ntv>>2)-0xbf; 1763 int exp=(ntv&3)+1; 1764 1765 switch(exp) { 1766 case 4: 1767 numValue*=60*60*60*60; 1768 break; 1769 case 3: 1770 numValue*=60*60*60; 1771 break; 1772 case 2: 1773 numValue*=60*60; 1774 break; 1775 case 1: 1776 numValue*=60; 1777 break; 1778 case 0: 1779 default: 1780 break; 1781 } 1782 1783 return numValue; 1784 } else if(ntv<NTV_RESERVED_START_) { 1785 // fraction-20 e.g. 3/80 1786 return -2; 1787 } else { 1788 /* reserved */ 1789 return -2; 1790 } 1791 } 1792 getUnicodeNumericValue(int c)1793 public double getUnicodeNumericValue(int c) { 1794 // equivalent to c version double u_getNumericValue(UChar32 c) 1795 int ntv = getNumericTypeValue(getProperty(c)); 1796 1797 if(ntv==NTV_NONE_) { 1798 return UCharacter.NO_NUMERIC_VALUE; 1799 } else if(ntv<NTV_DIGIT_START_) { 1800 /* decimal digit */ 1801 return ntv-NTV_DECIMAL_START_; 1802 } else if(ntv<NTV_NUMERIC_START_) { 1803 /* other digit */ 1804 return ntv-NTV_DIGIT_START_; 1805 } else if(ntv<NTV_FRACTION_START_) { 1806 /* small integer */ 1807 return ntv-NTV_NUMERIC_START_; 1808 } else if(ntv<NTV_LARGE_START_) { 1809 /* fraction */ 1810 int numerator=(ntv>>4)-12; 1811 int denominator=(ntv&0xf)+1; 1812 return (double)numerator/denominator; 1813 } else if(ntv<NTV_BASE60_START_) { 1814 /* large, single-significant-digit integer */ 1815 double numValue; 1816 int mant=(ntv>>5)-14; 1817 int exp=(ntv&0x1f)+2; 1818 numValue=mant; 1819 1820 /* multiply by 10^exp without math.h */ 1821 while(exp>=4) { 1822 numValue*=10000.; 1823 exp-=4; 1824 } 1825 switch(exp) { 1826 case 3: 1827 numValue*=1000.; 1828 break; 1829 case 2: 1830 numValue*=100.; 1831 break; 1832 case 1: 1833 numValue*=10.; 1834 break; 1835 case 0: 1836 default: 1837 break; 1838 } 1839 1840 return numValue; 1841 } else if(ntv<NTV_FRACTION20_START_) { 1842 /* sexagesimal (base 60) integer */ 1843 int numValue=(ntv>>2)-0xbf; 1844 int exp=(ntv&3)+1; 1845 1846 switch(exp) { 1847 case 4: 1848 numValue*=60*60*60*60; 1849 break; 1850 case 3: 1851 numValue*=60*60*60; 1852 break; 1853 case 2: 1854 numValue*=60*60; 1855 break; 1856 case 1: 1857 numValue*=60; 1858 break; 1859 case 0: 1860 default: 1861 break; 1862 } 1863 1864 return numValue; 1865 } else if(ntv<NTV_RESERVED_START_) { 1866 // fraction-20 e.g. 3/80 1867 int frac20=ntv-NTV_FRACTION20_START_; // 0..0x17 1868 int numerator=2*(frac20&3)+1; 1869 int denominator=20<<(frac20>>2); 1870 return (double)numerator/denominator; 1871 } else { 1872 /* reserved */ 1873 return UCharacter.NO_NUMERIC_VALUE; 1874 } 1875 } 1876 1877 // protected variables ----------------------------------------------- 1878 1879 /** 1880 * Extra property trie 1881 */ 1882 Trie2_16 m_additionalTrie_; 1883 /** 1884 * Extra property vectors, 1st column for age and second for binary 1885 * properties. 1886 */ 1887 int m_additionalVectors_[]; 1888 /** 1889 * Number of additional columns 1890 */ 1891 int m_additionalColumnsCount_; 1892 /** 1893 * Maximum values for block, bits used as in vector word 1894 * 0 1895 */ 1896 int m_maxBlockScriptValue_; 1897 /** 1898 * Maximum values for script, bits used as in vector word 1899 * 0 1900 */ 1901 int m_maxJTGValue_; 1902 1903 /** 1904 * Script_Extensions data 1905 */ 1906 public char[] m_scriptExtensions_; 1907 1908 // private variables ------------------------------------------------- 1909 1910 /** 1911 * Default name of the datafile 1912 */ 1913 private static final String DATA_FILE_NAME_ = "uprops.icu"; 1914 1915 // property data constants ------------------------------------------------- 1916 1917 /** 1918 * Numeric types and values in the main properties words. 1919 */ 1920 private static final int NUMERIC_TYPE_VALUE_SHIFT_ = 6; getNumericTypeValue(int props)1921 private static final int getNumericTypeValue(int props) { 1922 return props >> NUMERIC_TYPE_VALUE_SHIFT_; 1923 } 1924 /* constants for the storage form of numeric types and values */ 1925 /** No numeric value. */ 1926 private static final int NTV_NONE_ = 0; 1927 /** Decimal digits: nv=0..9 */ 1928 private static final int NTV_DECIMAL_START_ = 1; 1929 /** Other digits: nv=0..9 */ 1930 private static final int NTV_DIGIT_START_ = 11; 1931 /** Small integers: nv=0..154 */ 1932 private static final int NTV_NUMERIC_START_ = 21; 1933 /** Fractions: ((ntv>>4)-12) / ((ntv&0xf)+1) = -1..17 / 1..16 */ 1934 private static final int NTV_FRACTION_START_ = 0xb0; 1935 /** 1936 * Large integers: 1937 * ((ntv>>5)-14) * 10^((ntv&0x1f)+2) = (1..9)*(10^2..10^33) 1938 * (only one significant decimal digit) 1939 */ 1940 private static final int NTV_LARGE_START_ = 0x1e0; 1941 /** 1942 * Sexagesimal numbers: 1943 * ((ntv>>2)-0xbf) * 60^((ntv&3)+1) = (1..9)*(60^1..60^4) 1944 */ 1945 private static final int NTV_BASE60_START_=0x300; 1946 /** 1947 * Fraction-20 values: 1948 * frac20 = ntv-0x324 = 0..0x17 -> 1|3|5|7 / 20|40|80|160|320|640 1949 * numerator: num = 2*(frac20&3)+1 1950 * denominator: den = 20<<(frac20>>2) 1951 */ 1952 private static final int NTV_FRACTION20_START_ = NTV_BASE60_START_ + 36; // 0x300+9*4=0x324 1953 /** No numeric value (yet). */ 1954 private static final int NTV_RESERVED_START_ = NTV_FRACTION20_START_ + 24; // 0x324+6*4=0x34c 1955 ntvGetType(int ntv)1956 private static final int ntvGetType(int ntv) { 1957 return 1958 (ntv==NTV_NONE_) ? NumericType.NONE : 1959 (ntv<NTV_DIGIT_START_) ? NumericType.DECIMAL : 1960 (ntv<NTV_NUMERIC_START_) ? NumericType.DIGIT : 1961 NumericType.NUMERIC; 1962 } 1963 1964 /* 1965 * Properties in vector word 0 1966 * Bits 1967 * 31..24 DerivedAge version major/minor one nibble each 1968 * 23..22 3..1: Bits 7..0 = Script_Extensions index 1969 * 3: Script value from Script_Extensions 1970 * 2: Script=Inherited 1971 * 1: Script=Common 1972 * 0: Script=bits 7..0 1973 * 21..20 reserved 1974 * 19..17 East Asian Width 1975 * 16.. 8 UBlockCode 1976 * 7.. 0 UScriptCode 1977 */ 1978 1979 /** 1980 * Script_Extensions: mask includes Script 1981 */ 1982 public static final int SCRIPT_X_MASK = 0x00c000ff; 1983 //private static final int SCRIPT_X_SHIFT = 22; 1984 /** 1985 * Integer properties mask and shift values for East Asian cell width. 1986 * Equivalent to icu4c UPROPS_EA_MASK 1987 */ 1988 private static final int EAST_ASIAN_MASK_ = 0x000e0000; 1989 /** 1990 * Integer properties mask and shift values for East Asian cell width. 1991 * Equivalent to icu4c UPROPS_EA_SHIFT 1992 */ 1993 private static final int EAST_ASIAN_SHIFT_ = 17; 1994 /** 1995 * Integer properties mask and shift values for blocks. 1996 * Equivalent to icu4c UPROPS_BLOCK_MASK 1997 */ 1998 private static final int BLOCK_MASK_ = 0x0001ff00; 1999 /** 2000 * Integer properties mask and shift values for blocks. 2001 * Equivalent to icu4c UPROPS_BLOCK_SHIFT 2002 */ 2003 private static final int BLOCK_SHIFT_ = 8; 2004 /** 2005 * Integer properties mask and shift values for scripts. 2006 * Equivalent to icu4c UPROPS_SHIFT_MASK 2007 */ 2008 public static final int SCRIPT_MASK_ = 0x000000ff; 2009 2010 /* SCRIPT_X_WITH_COMMON must be the lowest value that involves Script_Extensions. */ 2011 public static final int SCRIPT_X_WITH_COMMON = 0x400000; 2012 public static final int SCRIPT_X_WITH_INHERITED = 0x800000; 2013 public static final int SCRIPT_X_WITH_OTHER = 0xc00000; 2014 2015 /** 2016 * Additional properties used in internal trie data 2017 */ 2018 /* 2019 * Properties in vector word 1 2020 * Each bit encodes one binary property. 2021 * The following constants represent the bit number, use 1<<UPROPS_XYZ. 2022 * UPROPS_BINARY_1_TOP<=32! 2023 * 2024 * Keep this list of property enums in sync with 2025 * propListNames[] in icu/source/tools/genprops/props2.c! 2026 * 2027 * ICU 2.6/uprops format version 3.2 stores full properties instead of "Other_". 2028 */ 2029 private static final int WHITE_SPACE_PROPERTY_ = 0; 2030 private static final int DASH_PROPERTY_ = 1; 2031 private static final int HYPHEN_PROPERTY_ = 2; 2032 private static final int QUOTATION_MARK_PROPERTY_ = 3; 2033 private static final int TERMINAL_PUNCTUATION_PROPERTY_ = 4; 2034 private static final int MATH_PROPERTY_ = 5; 2035 private static final int HEX_DIGIT_PROPERTY_ = 6; 2036 private static final int ASCII_HEX_DIGIT_PROPERTY_ = 7; 2037 private static final int ALPHABETIC_PROPERTY_ = 8; 2038 private static final int IDEOGRAPHIC_PROPERTY_ = 9; 2039 private static final int DIACRITIC_PROPERTY_ = 10; 2040 private static final int EXTENDER_PROPERTY_ = 11; 2041 private static final int NONCHARACTER_CODE_POINT_PROPERTY_ = 12; 2042 private static final int GRAPHEME_EXTEND_PROPERTY_ = 13; 2043 private static final int GRAPHEME_LINK_PROPERTY_ = 14; 2044 private static final int IDS_BINARY_OPERATOR_PROPERTY_ = 15; 2045 private static final int IDS_TRINARY_OPERATOR_PROPERTY_ = 16; 2046 private static final int RADICAL_PROPERTY_ = 17; 2047 private static final int UNIFIED_IDEOGRAPH_PROPERTY_ = 18; 2048 private static final int DEFAULT_IGNORABLE_CODE_POINT_PROPERTY_ = 19; 2049 private static final int DEPRECATED_PROPERTY_ = 20; 2050 private static final int LOGICAL_ORDER_EXCEPTION_PROPERTY_ = 21; 2051 private static final int XID_START_PROPERTY_ = 22; 2052 private static final int XID_CONTINUE_PROPERTY_ = 23; 2053 private static final int ID_START_PROPERTY_ = 24; 2054 private static final int ID_CONTINUE_PROPERTY_ = 25; 2055 private static final int GRAPHEME_BASE_PROPERTY_ = 26; 2056 private static final int S_TERM_PROPERTY_ = 27; 2057 private static final int VARIATION_SELECTOR_PROPERTY_ = 28; 2058 private static final int PATTERN_SYNTAX = 29; /* new in ICU 3.4 and Unicode 4.1 */ 2059 private static final int PATTERN_WHITE_SPACE = 30; 2060 private static final int PREPENDED_CONCATENATION_MARK = 31; // new in ICU 60 and Unicode 10 2061 2062 /* 2063 * Properties in vector word 2 2064 * Bits 2065 * 31..26 http://www.unicode.org/reports/tr51/#Emoji_Properties 2066 * 25..20 Line Break 2067 * 19..15 Sentence Break 2068 * 14..10 Word Break 2069 * 9.. 5 Grapheme Cluster Break 2070 * 4.. 0 Decomposition Type 2071 */ 2072 private static final int PROPS_2_EXTENDED_PICTOGRAPHIC=26; 2073 private static final int PROPS_2_EMOJI_COMPONENT = 27; 2074 private static final int PROPS_2_EMOJI = 28; 2075 private static final int PROPS_2_EMOJI_PRESENTATION = 29; 2076 private static final int PROPS_2_EMOJI_MODIFIER = 30; 2077 private static final int PROPS_2_EMOJI_MODIFIER_BASE = 31; 2078 2079 private static final int LB_MASK = 0x03f00000; 2080 private static final int LB_SHIFT = 20; 2081 2082 private static final int SB_MASK = 0x000f8000; 2083 private static final int SB_SHIFT = 15; 2084 2085 private static final int WB_MASK = 0x00007c00; 2086 private static final int WB_SHIFT = 10; 2087 2088 private static final int GCB_MASK = 0x000003e0; 2089 private static final int GCB_SHIFT = 5; 2090 2091 /** 2092 * Integer properties mask for decomposition type. 2093 * Equivalent to icu4c UPROPS_DT_MASK. 2094 */ 2095 private static final int DECOMPOSITION_TYPE_MASK_ = 0x0000001f; 2096 2097 /** 2098 * First nibble shift 2099 */ 2100 private static final int FIRST_NIBBLE_SHIFT_ = 0x4; 2101 /** 2102 * Second nibble mask 2103 */ 2104 private static final int LAST_NIBBLE_MASK_ = 0xF; 2105 /** 2106 * Age value shift 2107 */ 2108 private static final int AGE_SHIFT_ = 24; 2109 2110 2111 // private constructors -------------------------------------------------- 2112 2113 /** 2114 * Constructor 2115 * @exception IOException thrown when data reading fails or data corrupted 2116 */ UCharacterProperty()2117 private UCharacterProperty() throws IOException 2118 { 2119 // consistency check 2120 if(binProps.length!=UProperty.BINARY_LIMIT) { 2121 throw new ICUException("binProps.length!=UProperty.BINARY_LIMIT"); 2122 } 2123 if(intProps.length!=(UProperty.INT_LIMIT-UProperty.INT_START)) { 2124 throw new ICUException("intProps.length!=(UProperty.INT_LIMIT-UProperty.INT_START)"); 2125 } 2126 2127 // jar access 2128 ByteBuffer bytes=ICUBinary.getRequiredData(DATA_FILE_NAME_); 2129 m_unicodeVersion_ = ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, new IsAcceptable()); 2130 // Read or skip the 16 indexes. 2131 int propertyOffset = bytes.getInt(); 2132 /* exceptionOffset = */ bytes.getInt(); 2133 /* caseOffset = */ bytes.getInt(); 2134 int additionalOffset = bytes.getInt(); 2135 int additionalVectorsOffset = bytes.getInt(); 2136 m_additionalColumnsCount_ = bytes.getInt(); 2137 int scriptExtensionsOffset = bytes.getInt(); 2138 int reservedOffset7 = bytes.getInt(); 2139 /* reservedOffset8 = */ bytes.getInt(); 2140 /* dataTopOffset = */ bytes.getInt(); 2141 m_maxBlockScriptValue_ = bytes.getInt(); 2142 m_maxJTGValue_ = bytes.getInt(); 2143 ICUBinary.skipBytes(bytes, (16 - 12) << 2); 2144 2145 // read the main properties trie 2146 m_trie_ = Trie2_16.createFromSerialized(bytes); 2147 int expectedTrieLength = (propertyOffset - 16) * 4; 2148 int trieLength = m_trie_.getSerializedLength(); 2149 if(trieLength > expectedTrieLength) { 2150 throw new IOException("uprops.icu: not enough bytes for main trie"); 2151 } 2152 // skip padding after trie bytes 2153 ICUBinary.skipBytes(bytes, expectedTrieLength - trieLength); 2154 2155 // skip unused intervening data structures 2156 ICUBinary.skipBytes(bytes, (additionalOffset - propertyOffset) * 4); 2157 2158 if(m_additionalColumnsCount_ > 0) { 2159 // reads the additional property block 2160 m_additionalTrie_ = Trie2_16.createFromSerialized(bytes); 2161 expectedTrieLength = (additionalVectorsOffset-additionalOffset)*4; 2162 trieLength = m_additionalTrie_.getSerializedLength(); 2163 if(trieLength > expectedTrieLength) { 2164 throw new IOException("uprops.icu: not enough bytes for additional-properties trie"); 2165 } 2166 // skip padding after trie bytes 2167 ICUBinary.skipBytes(bytes, expectedTrieLength - trieLength); 2168 2169 // additional properties 2170 int size = scriptExtensionsOffset - additionalVectorsOffset; 2171 m_additionalVectors_ = ICUBinary.getInts(bytes, size, 0); 2172 } 2173 2174 // Script_Extensions 2175 int numChars = (reservedOffset7 - scriptExtensionsOffset) * 2; 2176 if(numChars > 0) { 2177 m_scriptExtensions_ = ICUBinary.getChars(bytes, numChars, 0); 2178 } 2179 } 2180 2181 private static final class IsAcceptable implements ICUBinary.Authenticate { 2182 @Override isDataVersionAcceptable(byte version[])2183 public boolean isDataVersionAcceptable(byte version[]) { 2184 return version[0] == 7; 2185 } 2186 } 2187 private static final int DATA_FORMAT = 0x5550726F; // "UPro" 2188 2189 // private methods ------------------------------------------------------- 2190 2191 /* 2192 * Compare additional properties to see if it has argument type 2193 * @param property 32 bit properties 2194 * @param type character type 2195 * @return true if property has type 2196 */ 2197 /*private boolean compareAdditionalType(int property, int type) 2198 { 2199 return (property & (1 << type)) != 0; 2200 }*/ 2201 2202 // property starts for UnicodeSet -------------------------------------- *** 2203 2204 private static final int TAB = 0x0009; 2205 //private static final int LF = 0x000a; 2206 //private static final int FF = 0x000c; 2207 private static final int CR = 0x000d; 2208 private static final int U_A = 0x0041; 2209 private static final int U_F = 0x0046; 2210 private static final int U_Z = 0x005a; 2211 private static final int U_a = 0x0061; 2212 private static final int U_f = 0x0066; 2213 private static final int U_z = 0x007a; 2214 private static final int DEL = 0x007f; 2215 private static final int NL = 0x0085; 2216 private static final int NBSP = 0x00a0; 2217 private static final int CGJ = 0x034f; 2218 private static final int FIGURESP= 0x2007; 2219 private static final int HAIRSP = 0x200a; 2220 //private static final int ZWNJ = 0x200c; 2221 //private static final int ZWJ = 0x200d; 2222 private static final int RLM = 0x200f; 2223 private static final int NNBSP = 0x202f; 2224 private static final int WJ = 0x2060; 2225 private static final int INHSWAP = 0x206a; 2226 private static final int NOMDIG = 0x206f; 2227 private static final int U_FW_A = 0xff21; 2228 private static final int U_FW_F = 0xff26; 2229 private static final int U_FW_Z = 0xff3a; 2230 private static final int U_FW_a = 0xff41; 2231 private static final int U_FW_f = 0xff46; 2232 private static final int U_FW_z = 0xff5a; 2233 private static final int ZWNBSP = 0xfeff; 2234 addPropertyStarts(UnicodeSet set)2235 public UnicodeSet addPropertyStarts(UnicodeSet set) { 2236 /* add the start code point of each same-value range of the main trie */ 2237 Iterator<Trie2.Range> trieIterator = m_trie_.iterator(); 2238 Trie2.Range range; 2239 while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) { 2240 set.add(range.startCodePoint); 2241 } 2242 2243 /* add code points with hardcoded properties, plus the ones following them */ 2244 2245 /* add for u_isblank() */ 2246 set.add(TAB); 2247 set.add(TAB+1); 2248 2249 /* add for IS_THAT_CONTROL_SPACE() */ 2250 set.add(CR+1); /* range TAB..CR */ 2251 set.add(0x1c); 2252 set.add(0x1f+1); 2253 set.add(NL); 2254 set.add(NL+1); 2255 2256 /* add for u_isIDIgnorable() what was not added above */ 2257 set.add(DEL); /* range DEL..NBSP-1, NBSP added below */ 2258 set.add(HAIRSP); 2259 set.add(RLM+1); 2260 set.add(INHSWAP); 2261 set.add(NOMDIG+1); 2262 set.add(ZWNBSP); 2263 set.add(ZWNBSP+1); 2264 2265 /* add no-break spaces for u_isWhitespace() what was not added above */ 2266 set.add(NBSP); 2267 set.add(NBSP+1); 2268 set.add(FIGURESP); 2269 set.add(FIGURESP+1); 2270 set.add(NNBSP); 2271 set.add(NNBSP+1); 2272 2273 /* add for u_charDigitValue() */ 2274 // TODO remove when UCharacter.getHanNumericValue() is changed to just return 2275 // Unicode numeric values 2276 set.add(0x3007); 2277 set.add(0x3008); 2278 set.add(0x4e00); 2279 set.add(0x4e01); 2280 set.add(0x4e8c); 2281 set.add(0x4e8d); 2282 set.add(0x4e09); 2283 set.add(0x4e0a); 2284 set.add(0x56db); 2285 set.add(0x56dc); 2286 set.add(0x4e94); 2287 set.add(0x4e95); 2288 set.add(0x516d); 2289 set.add(0x516e); 2290 set.add(0x4e03); 2291 set.add(0x4e04); 2292 set.add(0x516b); 2293 set.add(0x516c); 2294 set.add(0x4e5d); 2295 set.add(0x4e5e); 2296 2297 /* add for u_digit() */ 2298 set.add(U_a); 2299 set.add(U_z+1); 2300 set.add(U_A); 2301 set.add(U_Z+1); 2302 set.add(U_FW_a); 2303 set.add(U_FW_z+1); 2304 set.add(U_FW_A); 2305 set.add(U_FW_Z+1); 2306 2307 /* add for u_isxdigit() */ 2308 set.add(U_f+1); 2309 set.add(U_F+1); 2310 set.add(U_FW_f+1); 2311 set.add(U_FW_F+1); 2312 2313 /* add for UCHAR_DEFAULT_IGNORABLE_CODE_POINT what was not added above */ 2314 set.add(WJ); /* range WJ..NOMDIG */ 2315 set.add(0xfff0); 2316 set.add(0xfffb+1); 2317 set.add(0xe0000); 2318 set.add(0xe0fff+1); 2319 2320 /* add for UCHAR_GRAPHEME_BASE and others */ 2321 set.add(CGJ); 2322 set.add(CGJ+1); 2323 2324 return set; // for chaining 2325 } 2326 upropsvec_addPropertyStarts(UnicodeSet set)2327 public void upropsvec_addPropertyStarts(UnicodeSet set) { 2328 /* add the start code point of each same-value range of the properties vectors trie */ 2329 if(m_additionalColumnsCount_>0) { 2330 /* if m_additionalColumnsCount_==0 then the properties vectors trie may not be there at all */ 2331 Iterator<Trie2.Range> trieIterator = m_additionalTrie_.iterator(); 2332 Trie2.Range range; 2333 while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) { 2334 set.add(range.startCodePoint); 2335 } 2336 } 2337 } 2338 ulayout_addPropertyStarts(int src, UnicodeSet set)2339 public UnicodeSet ulayout_addPropertyStarts(int src, UnicodeSet set) { 2340 CodePointTrie trie; 2341 switch (src) { 2342 case SRC_INPC: 2343 trie = InPCTrie.INSTANCE; 2344 break; 2345 case SRC_INSC: 2346 trie = InSCTrie.INSTANCE; 2347 break; 2348 case SRC_VO: 2349 trie = VoTrie.INSTANCE; 2350 break; 2351 default: 2352 throw new IllegalStateException(); 2353 } 2354 2355 // Add the start code point of each same-value range of the trie. 2356 CodePointMap.Range range = new CodePointMap.Range(); 2357 int start = 0; 2358 while (trie.getRange(start, null, range)) { 2359 set.add(start); 2360 start = range.getEnd() + 1; 2361 } 2362 return set; 2363 } 2364 2365 // This static initializer block must be placed after 2366 // other static member initialization 2367 static { 2368 try { 2369 INSTANCE = new UCharacterProperty(); 2370 } 2371 catch (IOException e) { 2372 throw new MissingResourceException(e.getMessage(),"",""); 2373 } 2374 } 2375 2376 /*---------------------------------------------------------------- 2377 * Inclusions list 2378 *----------------------------------------------------------------*/ 2379 2380 /* 2381 * Return a set of characters for property enumeration. 2382 * The set implicitly contains 0x110000 as well, which is one more than the highest 2383 * Unicode code point. 2384 * 2385 * This set is used as an ordered list - its code points are ordered, and 2386 * consecutive code points (in Unicode code point order) in the set define a range. 2387 * For each two consecutive characters (start, limit) in the set, 2388 * all of the UCD/normalization and related properties for 2389 * all code points start..limit-1 are all the same, 2390 * except for character names and ISO comments. 2391 * 2392 * All Unicode code points U+0000..U+10ffff are covered by these ranges. 2393 * The ranges define a partition of the Unicode code space. 2394 * ICU uses the inclusions set to enumerate properties for generating 2395 * UnicodeSets containing all code points that have a certain property value. 2396 * 2397 * The Inclusion List is generated from the UCD. It is generated 2398 * by enumerating the data tries, and code points for hardcoded properties 2399 * are added as well. 2400 * 2401 * -------------------------------------------------------------------------- 2402 * 2403 * The following are ideas for getting properties-unique code point ranges, 2404 * with possible optimizations beyond the current implementation. 2405 * These optimizations would require more code and be more fragile. 2406 * The current implementation generates one single list (set) for all properties. 2407 * 2408 * To enumerate properties efficiently, one needs to know ranges of 2409 * repetitive values, so that the value of only each start code point 2410 * can be applied to the whole range. 2411 * This information is in principle available in the uprops.icu/unorm.icu data. 2412 * 2413 * There are two obstacles: 2414 * 2415 * 1. Some properties are computed from multiple data structures, 2416 * making it necessary to get repetitive ranges by intersecting 2417 * ranges from multiple tries. 2418 * 2419 * 2. It is not economical to write code for getting repetitive ranges 2420 * that are precise for each of some 50 properties. 2421 * 2422 * Compromise ideas: 2423 * 2424 * - Get ranges per trie, not per individual property. 2425 * Each range contains the same values for a whole group of properties. 2426 * This would generate currently five range sets, two for uprops.icu tries 2427 * and three for unorm.icu tries. 2428 * 2429 * - Combine sets of ranges for multiple tries to get sufficient sets 2430 * for properties, e.g., the uprops.icu main and auxiliary tries 2431 * for all non-normalization properties. 2432 * 2433 * Ideas for representing ranges and combining them: 2434 * 2435 * - A UnicodeSet could hold just the start code points of ranges. 2436 * Multiple sets are easily combined by or-ing them together. 2437 * 2438 * - Alternatively, a UnicodeSet could hold each even-numbered range. 2439 * All ranges could be enumerated by using each start code point 2440 * (for the even-numbered ranges) as well as each limit (end+1) code point 2441 * (for the odd-numbered ranges). 2442 * It should be possible to combine two such sets by xor-ing them, 2443 * but no more than two. 2444 * 2445 * The second way to represent ranges may(?!) yield smaller UnicodeSet arrays, 2446 * but the first one is certainly simpler and applicable for combining more than 2447 * two range sets. 2448 * 2449 * It is possible to combine all range sets for all uprops/unorm tries into one 2450 * set that can be used for all properties. 2451 * As an optimization, there could be less-combined range sets for certain 2452 * groups of properties. 2453 * The relationship of which less-combined range set to use for which property 2454 * depends on the implementation of the properties and must be hardcoded 2455 * - somewhat error-prone and higher maintenance but can be tested easily 2456 * by building property sets "the simple way" in test code. 2457 * 2458 * --- 2459 * 2460 * Do not use a UnicodeSet pattern because that causes infinite recursion; 2461 * UnicodeSet depends on the inclusions set. 2462 * 2463 * --- 2464 * 2465 * getInclusions() is commented out starting 2005-feb-12 because 2466 * UnicodeSet now calls the uxyz_addPropertyStarts() directly, 2467 * and only for the relevant property source. 2468 */ 2469 /* 2470 public UnicodeSet getInclusions() { 2471 UnicodeSet set = new UnicodeSet(); 2472 NormalizerImpl.addPropertyStarts(set); 2473 addPropertyStarts(set); 2474 return set; 2475 } 2476 */ 2477 } 2478