1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /** 4 ******************************************************************************* 5 * Copyright (C) 2006-2015, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 */ 9 10 package com.ibm.icu.dev.test.charset; 11 12 import java.nio.BufferOverflowException; 13 import java.nio.ByteBuffer; 14 import java.nio.CharBuffer; 15 import java.nio.charset.CharacterCodingException; 16 import java.nio.charset.Charset; 17 import java.nio.charset.CharsetDecoder; 18 import java.nio.charset.CharsetEncoder; 19 import java.nio.charset.CoderResult; 20 import java.nio.charset.CodingErrorAction; 21 import java.nio.charset.UnsupportedCharsetException; 22 import java.nio.charset.spi.CharsetProvider; 23 import java.util.ArrayList; 24 import java.util.Iterator; 25 import java.util.MissingResourceException; 26 import java.util.Set; 27 import java.util.SortedMap; 28 29 import org.junit.Test; 30 import org.junit.runner.RunWith; 31 import org.junit.runners.JUnit4; 32 33 import com.ibm.icu.charset.CharsetCallback; 34 import com.ibm.icu.charset.CharsetDecoderICU; 35 import com.ibm.icu.charset.CharsetEncoderICU; 36 import com.ibm.icu.charset.CharsetICU; 37 import com.ibm.icu.charset.CharsetProviderICU; 38 import com.ibm.icu.dev.test.TestFmwk; 39 import com.ibm.icu.text.UTF16; 40 import com.ibm.icu.text.UnicodeSet; 41 42 @RunWith(JUnit4.class) 43 public class TestCharset extends TestFmwk { 44 @Test TestUTF16Converter()45 public void TestUTF16Converter(){ 46 CharsetProvider icu = new CharsetProviderICU(); 47 Charset cs1 = icu.charsetForName("UTF-16BE"); 48 CharsetEncoder e1 = cs1.newEncoder(); 49 CharsetDecoder d1 = cs1.newDecoder(); 50 51 Charset cs2 = icu.charsetForName("UTF-16LE"); 52 CharsetEncoder e2 = cs2.newEncoder(); 53 CharsetDecoder d2 = cs2.newDecoder(); 54 55 for(int i=0x0000; i<0x10FFFF; i+=0xFF){ 56 CharBuffer us = CharBuffer.allocate(0xFF*2); 57 ByteBuffer bs1 = ByteBuffer.allocate(0xFF*8); 58 ByteBuffer bs2 = ByteBuffer.allocate(0xFF*8); 59 for(int j=0;j<0xFF; j++){ 60 int c = i+j; 61 62 if((c>=0xd800&&c<=0xdFFF)||c>0x10FFFF){ 63 continue; 64 } 65 66 if(c>0xFFFF){ 67 char lead = UTF16.getLeadSurrogate(c); 68 char trail = UTF16.getTrailSurrogate(c); 69 if(!UTF16.isLeadSurrogate(lead)){ 70 errln("lead is not lead!"+lead+" for cp: \\U"+Integer.toHexString(c)); 71 continue; 72 } 73 if(!UTF16.isTrailSurrogate(trail)){ 74 errln("trail is not trail!"+trail); 75 continue; 76 } 77 us.put(lead); 78 us.put(trail); 79 bs1.put((byte)(lead>>8)); 80 bs1.put((byte)(lead&0xFF)); 81 bs1.put((byte)(trail>>8)); 82 bs1.put((byte)(trail&0xFF)); 83 84 bs2.put((byte)(lead&0xFF)); 85 bs2.put((byte)(lead>>8)); 86 bs2.put((byte)(trail&0xFF)); 87 bs2.put((byte)(trail>>8)); 88 }else{ 89 90 if(c<0xFF){ 91 bs1.put((byte)0x00); 92 bs1.put((byte)(c)); 93 bs2.put((byte)(c)); 94 bs2.put((byte)0x00); 95 }else{ 96 bs1.put((byte)(c>>8)); 97 bs1.put((byte)(c&0xFF)); 98 99 bs2.put((byte)(c&0xFF)); 100 bs2.put((byte)(c>>8)); 101 } 102 us.put((char)c); 103 } 104 } 105 106 107 us.limit(us.position()); 108 us.position(0); 109 if(us.length()==0){ 110 continue; 111 } 112 113 114 bs1.limit(bs1.position()); 115 bs1.position(0); 116 ByteBuffer newBS = ByteBuffer.allocate(bs1.capacity()); 117 //newBS.put((byte)0xFE); 118 //newBS.put((byte)0xFF); 119 newBS.put(bs1); 120 bs1.position(0); 121 smBufDecode(d1, "UTF-16", bs1, us); 122 smBufEncode(e1, "UTF-16", us, newBS); 123 124 bs2.limit(bs2.position()); 125 bs2.position(0); 126 newBS.clear(); 127 //newBS.put((byte)0xFF); 128 //newBS.put((byte)0xFE); 129 newBS.put(bs2); 130 bs2.position(0); 131 smBufDecode(d2, "UTF16-LE", bs2, us); 132 smBufEncode(e2, "UTF-16LE", us, newBS); 133 134 } 135 } 136 137 @Test TestUTF32Converter()138 public void TestUTF32Converter(){ 139 CharsetProvider icu = new CharsetProviderICU(); 140 Charset cs1 = icu.charsetForName("UTF-32BE"); 141 CharsetEncoder e1 = cs1.newEncoder(); 142 CharsetDecoder d1 = cs1.newDecoder(); 143 144 Charset cs2 = icu.charsetForName("UTF-32LE"); 145 CharsetEncoder e2 = cs2.newEncoder(); 146 CharsetDecoder d2 = cs2.newDecoder(); 147 148 for(int i=0x000; i<0x10FFFF; i+=0xFF){ 149 CharBuffer us = CharBuffer.allocate(0xFF*2); 150 ByteBuffer bs1 = ByteBuffer.allocate(0xFF*8); 151 ByteBuffer bs2 = ByteBuffer.allocate(0xFF*8); 152 for(int j=0;j<0xFF; j++){ 153 int c = i+j; 154 155 if((c>=0xd800&&c<=0xdFFF)||c>0x10FFFF){ 156 continue; 157 } 158 159 if(c>0xFFFF){ 160 char lead = UTF16.getLeadSurrogate(c); 161 char trail = UTF16.getTrailSurrogate(c); 162 163 us.put(lead); 164 us.put(trail); 165 }else{ 166 us.put((char)c); 167 } 168 bs1.put((byte) (c >>> 24)); 169 bs1.put((byte) (c >>> 16)); 170 bs1.put((byte) (c >>> 8)); 171 bs1.put((byte) (c & 0xFF)); 172 173 bs2.put((byte) (c & 0xFF)); 174 bs2.put((byte) (c >>> 8)); 175 bs2.put((byte) (c >>> 16)); 176 bs2.put((byte) (c >>> 24)); 177 } 178 bs1.limit(bs1.position()); 179 bs1.position(0); 180 bs2.limit(bs2.position()); 181 bs2.position(0); 182 us.limit(us.position()); 183 us.position(0); 184 if(us.length()==0){ 185 continue; 186 } 187 188 189 ByteBuffer newBS = ByteBuffer.allocate(bs1.capacity()); 190 191 newBS.put((byte)0x00); 192 newBS.put((byte)0x00); 193 newBS.put((byte)0xFE); 194 newBS.put((byte)0xFF); 195 196 newBS.put(bs1); 197 bs1.position(0); 198 smBufDecode(d1, "UTF-32", bs1, us); 199 smBufEncode(e1, "UTF-32", us, newBS); 200 201 202 newBS.clear(); 203 204 newBS.put((byte)0xFF); 205 newBS.put((byte)0xFE); 206 newBS.put((byte)0x00); 207 newBS.put((byte)0x00); 208 209 newBS.put(bs2); 210 bs2.position(0); 211 smBufDecode(d2, "UTF-32LE", bs2, us); 212 smBufEncode(e2, "UTF-32LE", us, newBS); 213 214 } 215 } 216 217 @Test TestASCIIConverter()218 public void TestASCIIConverter() { 219 runTestASCIIBasedConverter("ASCII", 0x80); 220 } 221 222 @Test Test88591Converter()223 public void Test88591Converter() { 224 runTestASCIIBasedConverter("iso-8859-1", 0x100); 225 } 226 runTestASCIIBasedConverter(String converter, int limit)227 public void runTestASCIIBasedConverter(String converter, int limit){ 228 CharsetProvider icu = new CharsetProviderICU(); 229 Charset icuChar = icu.charsetForName(converter); 230 CharsetEncoder encoder = icuChar.newEncoder(); 231 CharsetDecoder decoder = icuChar.newDecoder(); 232 CoderResult cr; 233 234 /* test with and without array-backed buffers */ 235 236 byte[] bytes = new byte[0x10000]; 237 char[] chars = new char[0x10000]; 238 for (int j = 0; j <= 0xffff; j++) { 239 bytes[j] = (byte) j; 240 chars[j] = (char) j; 241 } 242 243 boolean fail = false; 244 boolean arrays = false; 245 boolean decoding = false; 246 int i; 247 248 // 0 thru limit - 1 249 ByteBuffer bs = ByteBuffer.wrap(bytes, 0, limit); 250 CharBuffer us = CharBuffer.wrap(chars, 0, limit); 251 smBufDecode(decoder, converter, bs, us, true); 252 smBufDecode(decoder, converter, bs, us, false); 253 smBufEncode(encoder, converter, us, bs, true); 254 smBufEncode(encoder, converter, us, bs, false); 255 for (i = 0; i < limit; i++) { 256 bs = ByteBuffer.wrap(bytes, i, 1).slice(); 257 us = CharBuffer.wrap(chars, i, 1).slice(); 258 try { 259 decoding = true; 260 arrays = true; 261 smBufDecode(decoder, converter, bs, us, true, false, true); 262 263 decoding = true; 264 arrays = false; 265 smBufDecode(decoder, converter, bs, us, true, false, false); 266 267 decoding = false; 268 arrays = true; 269 smBufEncode(encoder, converter, us, bs, true, false, true); 270 271 decoding = false; 272 arrays = false; 273 smBufEncode(encoder, converter, us, bs, true, false, false); 274 275 } catch (Exception ex) { 276 errln("Failed to fail to " + (decoding ? "decode" : "encode") + " 0x" 277 + Integer.toHexString(i) + (arrays ? " with arrays" : " without arrays") + " in " + converter); 278 return; 279 } 280 } 281 282 // decode limit thru 255 283 for (i = limit; i <= 0xff; i++) { 284 bs = ByteBuffer.wrap(bytes, i, 1).slice(); 285 us = CharBuffer.wrap(chars, i, 1).slice(); 286 try { 287 smBufDecode(decoder, converter, bs, us, true, false, true); 288 fail = true; 289 arrays = true; 290 break; 291 } catch (Exception ex) { 292 } 293 try { 294 smBufDecode(decoder, converter, bs, us, true, false, false); 295 fail = true; 296 arrays = false; 297 break; 298 } catch (Exception ex) { 299 } 300 } 301 if (fail) { 302 errln("Failed to fail to decode 0x" + Integer.toHexString(i) 303 + (arrays ? " with arrays" : " without arrays") + " in " + converter); 304 return; 305 } 306 307 // encode limit thru 0xffff, skipping through much of the 1ff to feff range to save 308 // time (it would take too much time to test every possible case) 309 for (i = limit; i <= 0xffff; i = ((i>=0x1ff && i<0xfeff) ? i+0xfd : i+1)) { 310 bs = ByteBuffer.wrap(bytes, i, 1).slice(); 311 us = CharBuffer.wrap(chars, i, 1).slice(); 312 try { 313 smBufEncode(encoder, converter, us, bs, true, false, true); 314 fail = true; 315 arrays = true; 316 break; 317 } catch (Exception ex) { 318 } 319 try { 320 smBufEncode(encoder, converter, us, bs, true, false, false); 321 fail = true; 322 arrays = false; 323 break; 324 } catch (Exception ex) { 325 } 326 } 327 if (fail) { 328 errln("Failed to fail to encode 0x" + Integer.toHexString(i) 329 + (arrays ? " with arrays" : " without arrays") + " in " + converter); 330 return; 331 } 332 333 // test overflow / underflow edge cases 334 outer: for (int n = 1; n <= 3; n++) { 335 for (int m = 0; m < n; m++) { 336 // expecting underflow 337 try { 338 bs = ByteBuffer.wrap(bytes, 'a', m).slice(); 339 us = CharBuffer.wrap(chars, 'a', m).slice(); 340 smBufDecode(decoder, converter, bs, us, true, false, true); 341 smBufDecode(decoder, converter, bs, us, true, false, false); 342 smBufEncode(encoder, converter, us, bs, true, false, true); 343 smBufEncode(encoder, converter, us, bs, true, false, false); 344 bs = ByteBuffer.wrap(bytes, 'a', m).slice(); 345 us = CharBuffer.wrap(chars, 'a', n).slice(); 346 smBufDecode(decoder, converter, bs, us, true, false, true, m); 347 smBufDecode(decoder, converter, bs, us, true, false, false, m); 348 bs = ByteBuffer.wrap(bytes, 'a', n).slice(); 349 us = CharBuffer.wrap(chars, 'a', m).slice(); 350 smBufEncode(encoder, converter, us, bs, true, false, true, m); 351 smBufEncode(encoder, converter, us, bs, true, false, false, m); 352 bs = ByteBuffer.wrap(bytes, 'a', n).slice(); 353 us = CharBuffer.wrap(chars, 'a', n).slice(); 354 smBufDecode(decoder, converter, bs, us, true, false, true); 355 smBufDecode(decoder, converter, bs, us, true, false, false); 356 smBufEncode(encoder, converter, us, bs, true, false, true); 357 smBufEncode(encoder, converter, us, bs, true, false, false); 358 } catch (Exception ex) { 359 fail = true; 360 break outer; 361 } 362 363 // expecting overflow 364 try { 365 bs = ByteBuffer.wrap(bytes, 'a', n).slice(); 366 us = CharBuffer.wrap(chars, 'a', m).slice(); 367 smBufDecode(decoder, converter, bs, us, true, false, true); 368 fail = true; 369 break; 370 } catch (Exception ex) { 371 if (!(ex instanceof BufferOverflowException)) { 372 fail = true; 373 break outer; 374 } 375 } 376 try { 377 bs = ByteBuffer.wrap(bytes, 'a', n).slice(); 378 us = CharBuffer.wrap(chars, 'a', m).slice(); 379 smBufDecode(decoder, converter, bs, us, true, false, false); 380 fail = true; 381 } catch (Exception ex) { 382 if (!(ex instanceof BufferOverflowException)) { 383 fail = true; 384 break outer; 385 } 386 } 387 try { 388 bs = ByteBuffer.wrap(bytes, 'a', m).slice(); 389 us = CharBuffer.wrap(chars, 'a', n).slice(); 390 smBufEncode(encoder, converter, us, bs, true, false, true); 391 fail = true; 392 } catch (Exception ex) { 393 if (!(ex instanceof BufferOverflowException)) { 394 fail = true; 395 break outer; 396 } 397 } 398 try { 399 bs = ByteBuffer.wrap(bytes, 'a', m).slice(); 400 us = CharBuffer.wrap(chars, 'a', n).slice(); 401 smBufEncode(encoder, converter, us, bs, true, false, false); 402 fail = true; 403 } catch (Exception ex) { 404 if (!(ex instanceof BufferOverflowException)) { 405 fail = true; 406 break outer; 407 } 408 } 409 } 410 } 411 if (fail) { 412 errln("Incorrect result in " + converter + " for underflow / overflow edge cases"); 413 return; 414 } 415 416 // test surrogate combinations in encoding 417 String lead = "\ud888"; 418 String trail = "\udc88"; 419 String norm = "a"; 420 String ext = "\u0275"; // theta 421 String end = ""; 422 bs = ByteBuffer.wrap(new byte[] { 0 }); 423 String[] input = new String[] { // 424 lead + lead, // malf(1) 425 lead + trail, // unmap(2) 426 lead + norm, // malf(1) 427 lead + ext, // malf(1) 428 lead + end, // malf(1) 429 trail + norm, // malf(1) 430 trail + end, // malf(1) 431 ext + norm, // unmap(1) 432 ext + end, // unmap(1) 433 }; 434 CoderResult[] result = new CoderResult[] { 435 CoderResult.malformedForLength(1), 436 CoderResult.unmappableForLength(2), 437 CoderResult.malformedForLength(1), 438 CoderResult.malformedForLength(1), 439 CoderResult.malformedForLength(1), 440 CoderResult.malformedForLength(1), 441 CoderResult.malformedForLength(1), 442 CoderResult.unmappableForLength(1), 443 CoderResult.unmappableForLength(1), 444 }; 445 446 for (int index = 0; index < input.length; index++) { 447 CharBuffer source = CharBuffer.wrap(input[index]); 448 cr = encoder.encode(source, bs, true); 449 bs.rewind(); 450 encoder.reset(); 451 452 // if cr != results[x] 453 if (!((cr.isUnderflow() && result[index].isUnderflow()) 454 || (cr.isOverflow() && result[index].isOverflow()) 455 || (cr.isMalformed() && result[index].isMalformed()) 456 || (cr.isUnmappable() && result[index].isUnmappable())) 457 || (cr.isError() && cr.length() != result[index].length())) { 458 errln("Incorrect result in " + converter + " for \"" + input[index] + "\"" + ", expected: " + result[index] + ", received: " + cr); 459 break; 460 } 461 462 source = CharBuffer.wrap(input[index].toCharArray()); 463 cr = encoder.encode(source, bs, true); 464 bs.rewind(); 465 encoder.reset(); 466 467 // if cr != results[x] 468 if (!((cr.isUnderflow() && result[index].isUnderflow()) 469 || (cr.isOverflow() && result[index].isOverflow()) 470 || (cr.isMalformed() && result[index].isMalformed()) 471 || (cr.isUnmappable() && result[index].isUnmappable())) 472 || (cr.isError() && cr.length() != result[index].length())) { 473 errln("Incorrect result in " + converter + " for \"" + input[index] + "\"" + ", expected: " + result[index] + ", received: " + cr); 474 break; 475 } 476 } 477 } 478 479 @Test TestUTF8Converter()480 public void TestUTF8Converter() { 481 String converter = "UTF-8"; 482 CharsetProvider icu = new CharsetProviderICU(); 483 Charset icuChar = icu.charsetForName(converter); 484 CharsetEncoder encoder = icuChar.newEncoder(); 485 CharsetDecoder decoder = icuChar.newDecoder(); 486 ByteBuffer bs; 487 CharBuffer us; 488 CoderResult cr; 489 490 491 int[] size = new int[] { 1<<7, 1<<11, 1<<16 }; // # of 1,2,3 byte combinations 492 byte[] bytes = new byte[size[0] + size[1]*2 + size[2]*3]; 493 char[] chars = new char[size[0] + size[1] + size[2]]; 494 int i = 0; 495 int x, y; 496 497 // 0 to 1 << 7 (1 byters) 498 for (; i < size[0]; i++) { 499 bytes[i] = (byte) i; 500 chars[i] = (char) i; 501 bs = ByteBuffer.wrap(bytes, i, 1).slice(); 502 us = CharBuffer.wrap(chars, i, 1).slice(); 503 try { 504 smBufDecode(decoder, converter, bs, us, true, false, true); 505 smBufDecode(decoder, converter, bs, us, true, false, false); 506 smBufEncode(encoder, converter, us, bs, true, false, true); 507 smBufEncode(encoder, converter, us, bs, true, false, false); 508 } catch (Exception ex) { 509 errln("Incorrect result in " + converter + " for 0x" 510 + Integer.toHexString(i)); 511 break; 512 } 513 } 514 515 // 1 << 7 to 1 << 11 (2 byters) 516 for (; i < size[1]; i++) { 517 x = size[0] + i*2; 518 y = size[0] + i; 519 bytes[x + 0] = (byte) (0xc0 | ((i >> 6) & 0x1f)); 520 bytes[x + 1] = (byte) (0x80 | ((i >> 0) & 0x3f)); 521 chars[y] = (char) i; 522 bs = ByteBuffer.wrap(bytes, x, 2).slice(); 523 us = CharBuffer.wrap(chars, y, 1).slice(); 524 try { 525 smBufDecode(decoder, converter, bs, us, true, false, true); 526 smBufDecode(decoder, converter, bs, us, true, false, false); 527 smBufEncode(encoder, converter, us, bs, true, false, true); 528 smBufEncode(encoder, converter, us, bs, true, false, false); 529 } catch (Exception ex) { 530 errln("Incorrect result in " + converter + " for 0x" 531 + Integer.toHexString(i)); 532 break; 533 } 534 } 535 536 // 1 << 11 to 1 << 16 (3 byters and surrogates) 537 for (; i < size[2]; i++) { 538 x = size[0] + size[1] * 2 + i * 3; 539 y = size[0] + size[1] + i; 540 bytes[x + 0] = (byte) (0xe0 | ((i >> 12) & 0x0f)); 541 bytes[x + 1] = (byte) (0x80 | ((i >> 6) & 0x3f)); 542 bytes[x + 2] = (byte) (0x80 | ((i >> 0) & 0x3f)); 543 chars[y] = (char) i; 544 if (!UTF16.isSurrogate((char)i)) { 545 bs = ByteBuffer.wrap(bytes, x, 3).slice(); 546 us = CharBuffer.wrap(chars, y, 1).slice(); 547 try { 548 smBufDecode(decoder, converter, bs, us, true, false, true); 549 smBufDecode(decoder, converter, bs, us, true, false, false); 550 smBufEncode(encoder, converter, us, bs, true, false, true); 551 smBufEncode(encoder, converter, us, bs, true, false, false); 552 } catch (Exception ex) { 553 errln("Incorrect result in " + converter + " for 0x" 554 + Integer.toHexString(i)); 555 break; 556 } 557 } else { 558 bs = ByteBuffer.wrap(bytes, x, 3).slice(); 559 us = CharBuffer.wrap(chars, y, 1).slice(); 560 561 decoder.reset(); 562 cr = decoder.decode(bs, us, true); 563 bs.rewind(); 564 us.rewind(); 565 if (!cr.isMalformed() || cr.length() != 1) { 566 errln("Incorrect result in " + converter + " decoder for 0x" 567 + Integer.toHexString(i) + " received " + cr); 568 break; 569 } 570 encoder.reset(); 571 cr = encoder.encode(us, bs, true); 572 bs.rewind(); 573 us.rewind(); 574 if (!cr.isMalformed() || cr.length() != 1) { 575 errln("Incorrect result in " + converter + " encoder for 0x" 576 + Integer.toHexString(i) + " received " + cr); 577 break; 578 } 579 580 bs = ByteBuffer.wrap(bytes, x, 3).slice(); 581 us = CharBuffer.wrap(new String(chars, y, 1)); 582 583 decoder.reset(); 584 cr = decoder.decode(bs, us, true); 585 bs.rewind(); 586 us.rewind(); 587 if (!cr.isMalformed() || cr.length() != 1) { 588 errln("Incorrect result in " + converter + " decoder for 0x" 589 + Integer.toHexString(i) + " received " + cr); 590 break; 591 } 592 encoder.reset(); 593 cr = encoder.encode(us, bs, true); 594 bs.rewind(); 595 us.rewind(); 596 if (!cr.isMalformed() || cr.length() != 1) { 597 errln("Incorrect result in " + converter + " encoder for 0x" 598 + Integer.toHexString(i) + " received " + cr); 599 break; 600 } 601 602 603 } 604 } 605 if (true) 606 return; 607 } 608 609 @Test TestHZ()610 public void TestHZ() { 611 /* test input */ 612 char[] in = new char[] { 613 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014, 614 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0, 615 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94, 616 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355, 617 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8, 618 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496, 619 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477, 620 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480, 621 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E, 622 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 623 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 624 0x005A, 0x005B, 0x005C, 0x000A 625 }; 626 627 String converter = "HZ"; 628 CharsetProvider icu = new CharsetProviderICU(); 629 Charset icuChar = icu.charsetForName(converter); 630 CharsetEncoder encoder = icuChar.newEncoder(); 631 CharsetDecoder decoder = icuChar.newDecoder(); 632 try { 633 CharBuffer start = CharBuffer.wrap(in); 634 ByteBuffer bytes = encoder.encode(start); 635 CharBuffer finish = decoder.decode(bytes); 636 637 if (!equals(start, finish)) { 638 errln(converter + " roundtrip test failed: start does not match finish"); 639 640 char[] finishArray = new char[finish.limit()]; 641 for (int i=0; i<finishArray.length; i++) 642 finishArray[i] = finish.get(i); 643 644 logln("start: " + hex(in)); 645 logln("finish: " + hex(finishArray)); 646 } 647 } catch (CharacterCodingException ex) { 648 // Android patch: Skip tests that fail with customized data. 649 logln(converter + " roundtrip test failed: " + ex.getMessage()); 650 // Android patch end. 651 ex.printStackTrace(System.err); 652 } 653 654 /* For better code coverage */ 655 CoderResult result = CoderResult.UNDERFLOW; 656 byte byteout[] = { 657 (byte)0x7e, (byte)0x7d, (byte)0x41, 658 (byte)0x7e, (byte)0x7b, (byte)0x21, 659 }; 660 char charin[] = { 661 (char)0x0041, (char)0x0042, (char)0x3000 662 }; 663 ByteBuffer bb = ByteBuffer.wrap(byteout); 664 CharBuffer cb = CharBuffer.wrap(charin); 665 int testLoopSize = 5; 666 int bbLimits[] = { 0, 1, 3, 4, 6}; 667 int bbPositions[] = { 0, 0, 0, 3, 3 }; 668 int ccPositions[] = { 0, 0, 0, 2, 2 }; 669 for (int i = 0; i < testLoopSize; i++) { 670 encoder.reset(); 671 bb.limit(bbLimits[i]); 672 bb.position(bbPositions[i]); 673 cb.position(ccPositions[i]); 674 result = encoder.encode(cb, bb, true); 675 676 if (i < 3) { 677 if (!result.isOverflow()) { 678 errln("Overflow buffer error should have occurred while encoding HZ (" + i + ")"); 679 } 680 } else { 681 if (result.isError()) { 682 // Android patch: Skip tests that fail with customized data. 683 logln("Error should not have occurred while encoding HZ.(" + i + ")"); 684 // Android patch end. 685 } 686 } 687 } 688 } 689 690 @Test TestUTF8Surrogates()691 public void TestUTF8Surrogates() { 692 byte[][] in = new byte[][] { 693 { (byte)0x61, }, 694 { (byte)0xc2, (byte)0x80, }, 695 { (byte)0xe0, (byte)0xa0, (byte)0x80, }, 696 { (byte)0xf0, (byte)0x90, (byte)0x80, (byte)0x80, }, 697 { (byte)0xf4, (byte)0x84, (byte)0x8c, (byte)0xa1, }, 698 { (byte)0xf0, (byte)0x90, (byte)0x90, (byte)0x81, }, 699 }; 700 701 /* expected test results */ 702 char[][] results = new char[][] { 703 /* number of bytes read, code point */ 704 { '\u0061', }, 705 { '\u0080', }, 706 { '\u0800', }, 707 { '\ud800', '\udc00', }, // 10000 708 { '\udbd0', '\udf21', }, // 104321 709 { '\ud801', '\udc01', }, // 10401 710 }; 711 712 /* error test input */ 713 byte[][] in2 = new byte[][] { 714 { (byte)0x61, }, 715 { (byte)0xc0, (byte)0x80, /* illegal non-shortest form */ 716 (byte)0xe0, (byte)0x80, (byte)0x80, /* illegal non-shortest form */ 717 (byte)0xf0, (byte)0x80, (byte)0x80, (byte)0x80, /* illegal non-shortest form */ 718 (byte)0xc0, (byte)0xc0, /* illegal trail byte */ 719 (byte)0xf4, (byte)0x90, (byte)0x80, (byte)0x80, /* 0x110000 out of range */ 720 (byte)0xf8, (byte)0x80, (byte)0x80, (byte)0x80, (byte)0x80, /* too long */ 721 (byte)0xfe, /* illegal byte altogether */ 722 (byte)0x62, }, 723 }; 724 725 /* expected error test results */ 726 char[][] results2 = new char[][] { 727 /* number of bytes read, code point */ 728 { '\u0062', }, 729 { '\u0062', }, 730 }; 731 732 String converter = "UTF-8"; 733 CharsetProvider icu = new CharsetProviderICU(); 734 Charset icuChar = icu.charsetForName(converter); 735 CharsetDecoder decoder = icuChar.newDecoder(); 736 737 int i; 738 try { 739 for (i = 0; i < in.length; i++) { 740 ByteBuffer source = ByteBuffer.wrap(in[i]); 741 CharBuffer expected = CharBuffer.wrap(results[i]); 742 smBufDecode(decoder, converter, source, expected, true, false, 743 true); 744 smBufDecode(decoder, converter, source, expected, true, false, 745 false); 746 } 747 } catch (Exception ex) { 748 errln("Incorrect result in " + converter); 749 } 750 try { 751 for (i = 0; i < in2.length; i++) { 752 ByteBuffer source = ByteBuffer.wrap(in2[i]); 753 CharBuffer expected = CharBuffer.wrap(results2[i]); 754 decoder.onMalformedInput(CodingErrorAction.IGNORE); 755 smBufDecode(decoder, converter, source, expected, true, false, 756 true); 757 smBufDecode(decoder, converter, source, expected, true, false, 758 false); 759 } 760 } catch (Exception ex) { 761 errln("Incorrect result in " + converter); 762 } 763 } 764 765 @Test TestSurrogateBehavior()766 public void TestSurrogateBehavior() { 767 CharsetProviderICU icu = new CharsetProviderICU(); 768 769 // get all the converters into an array 770 Object[] converters = CharsetProviderICU.getAvailableNames(); 771 772 String norm = "a"; 773 String ext = "\u0275"; // theta 774 String lead = "\ud835"; 775 String trail = "\udd04"; 776 // lead + trail = \U1d504 (fraktur capital A) 777 778 String input = 779 // error position 780 ext // unmap(1) 1 781 + lead // under 1 782 + lead // malf(1) 2 783 + trail // unmap(2) 4 784 + trail // malf(1) 5 785 + ext // unmap(1) 6 786 + norm // unmap(1) 7 787 ; 788 CoderResult[] results = new CoderResult[] { 789 CoderResult.unmappableForLength(1), // or underflow 790 CoderResult.UNDERFLOW, 791 CoderResult.malformedForLength(1), 792 CoderResult.unmappableForLength(2), // or underflow 793 CoderResult.malformedForLength(1), 794 CoderResult.unmappableForLength(1), // or underflow 795 CoderResult.unmappableForLength(1), // or underflow 796 }; 797 int[] positions = new int[] { 1,1,2,4,5,6,7 }; 798 int n = positions.length; 799 800 int badcount = 0; 801 int goodcount = 0; 802 int[] uhohindices = new int[n]; 803 int[] badposindices = new int[n]; 804 int[] malfindices = new int[n]; 805 int[] unmapindices = new int[n]; 806 ArrayList pass = new ArrayList(); 807 ArrayList exempt = new ArrayList(); 808 809 outer: for (int conv=0; conv<converters.length; conv++) { 810 String converter = (String)converters[conv]; 811 if (converter.equals("x-IMAP-mailbox-name") || converter.equals("UTF-7") || converter.equals("CESU-8") || converter.equals("BOCU-1") || 812 converter.equals("x-LMBCS-1")) { 813 exempt.add(converter); 814 continue; 815 } 816 817 boolean currentlybad = false; 818 Charset icuChar = icu.charsetForName(converter); 819 CharsetEncoder encoder = icuChar.newEncoder(); 820 CoderResult cr; 821 822 CharBuffer source = CharBuffer.wrap(input); 823 ByteBuffer target = ByteBuffer.allocate(30); 824 ByteBuffer expected = null; 825 try { 826 encoder.onUnmappableCharacter(CodingErrorAction.IGNORE); 827 encoder.onMalformedInput(CodingErrorAction.IGNORE); 828 expected = encoder.encode(CharBuffer.wrap(ext + lead + trail + ext + norm)); 829 encoder.reset(); 830 } catch (CharacterCodingException ex) { 831 errln("Unexpected CharacterCodingException: " + ex.getMessage()); 832 return; 833 } catch (RuntimeException ex) { 834 if (!currentlybad) {currentlybad = true; badcount++; logln(""); } 835 // Android patch: Skip tests that fail with customized data. 836 logln(converter + " " + ex.getClass().getName() + ": " + ex.getMessage()); 837 // Android patch end. 838 continue outer; 839 } 840 841 encoder.onUnmappableCharacter(CodingErrorAction.REPORT); 842 encoder.onMalformedInput(CodingErrorAction.REPORT); 843 for (int i=0; i<n; i++) { 844 source.limit(i+1); 845 cr = encoder.encode(source, target, i == n - 1); 846 if (!(equals(cr, results[i]) 847 || (results[i].isUnmappable() && cr.isUnderflow()) // mappability depends on the converter 848 )) { 849 if (!currentlybad) {currentlybad = true; badcount++; logln(""); } 850 if (results[i].isMalformed() && cr.isMalformed()) { 851 malfindices[i]++; 852 } else if (results[i].isUnmappable() && cr.isUnmappable()) { 853 unmapindices[i]++; 854 } else { 855 uhohindices[i]++; 856 } 857 errln("(index=" + i + ") " + converter + " Received: " + cr + " Expected: " + results[i]); 858 } 859 if (source.position() != positions[i]) { 860 if (!currentlybad) {currentlybad = true; badcount++; logln(""); } 861 badposindices[i]++; 862 errln("(index=" + i + ") " + converter + " Received: " + source.position() + " Expected: " + positions[i]); 863 } 864 865 } 866 encoder.reset(); 867 868 //System.out.println("\n" + hex(target.array())); 869 //System.out.println(hex(expected.array()) + "\n" + expected.limit()); 870 if (!(equals(target, expected, expected.limit()) && target.position() == expected.limit())) { 871 if (!currentlybad) {currentlybad = true; badcount++; logln(""); } 872 errln(converter + " Received: \"" + hex(target.array()) + "\" Expected: \"" + hex(expected.array()) + "\""); 873 } 874 875 if (!currentlybad) { 876 goodcount++; 877 pass.add(converter); 878 } 879 } 880 881 logln("\n" + badcount + " / " + (converters.length - exempt.size()) + " (" + goodcount + " good, " + badcount + " bad)"); 882 log("index\t"); for (int i=0; i<n; i++) log(i + "\t"); logln(""); 883 log("unmap\t"); for (int i=0; i<n; i++) log(unmapindices[i] + "\t"); logln(""); 884 log("malf \t"); for (int i=0; i<n; i++) log(malfindices[i] + "\t"); logln(""); 885 log("pos \t"); for (int i=0; i<n; i++) log(badposindices[i] + "\t"); logln(""); 886 log("uhoh \t"); for (int i=0; i<n; i++) log(uhohindices[i] + "\t"); logln(""); 887 logln(""); 888 log("The few that passed: "); for (int i=0; i<pass.size(); i++) log(pass.get(i) + ", "); logln(""); 889 log("The few that are exempt: "); for (int i=0; i<exempt.size(); i++) log(exempt.get(i) + ", "); logln(""); 890 } 891 892 // public void TestCharsetCallback() { 893 // String currentTest = "initialization"; 894 // try { 895 // Class[] params; 896 // 897 // // get the classes 898 // Class CharsetCallback = Class.forName("com.ibm.icu.charset.CharsetCallback"); 899 // Class Decoder = Class.forName("com.ibm.icu.charset.CharsetCallback$Decoder"); 900 // Class Encoder = Class.forName("com.ibm.icu.charset.CharsetCallback$Encoder"); 901 // 902 // // set up encoderCall 903 // params = new Class[] {CharsetEncoderICU.class, Object.class, 904 // CharBuffer.class, ByteBuffer.class, IntBuffer.class, 905 // char[].class, int.class, int.class, CoderResult.class }; 906 // Method encoderCall = Encoder.getDeclaredMethod("call", params); 907 // 908 // // set up decoderCall 909 // params = new Class[] {CharsetDecoderICU.class, Object.class, 910 // ByteBuffer.class, CharBuffer.class, IntBuffer.class, 911 // char[].class, int.class, CoderResult.class}; 912 // Method decoderCall = Decoder.getDeclaredMethod("call", params); 913 // 914 // // get relevant fields 915 // Object SUB_STOP_ON_ILLEGAL = getFieldValue(CharsetCallback, "SUB_STOP_ON_ILLEGAL", null); 916 // 917 // // set up a few arguments 918 // CharsetProvider provider = new CharsetProviderICU(); 919 // Charset charset = provider.charsetForName("UTF-8"); 920 // CharsetEncoderICU encoder = (CharsetEncoderICU)charset.newEncoder(); 921 // CharsetDecoderICU decoder = (CharsetDecoderICU)charset.newDecoder(); 922 // CharBuffer chars = CharBuffer.allocate(10); 923 // chars.put('o'); 924 // chars.put('k'); 925 // ByteBuffer bytes = ByteBuffer.allocate(10); 926 // bytes.put((byte)'o'); 927 // bytes.put((byte)'k'); 928 // IntBuffer offsets = IntBuffer.allocate(10); 929 // offsets.put(0); 930 // offsets.put(1); 931 // char[] buffer = null; 932 // Integer length = new Integer(2); 933 // Integer cp = new Integer(0); 934 // CoderResult unmap = CoderResult.unmappableForLength(2); 935 // CoderResult malf = CoderResult.malformedForLength(2); 936 // CoderResult under = CoderResult.UNDERFLOW; 937 // 938 // // set up error arrays 939 // Integer invalidCharLength = new Integer(1); 940 // Byte subChar1 = new Byte((byte)0); 941 // Byte subChar1_alternate = new Byte((byte)1); // for TO_U_CALLBACK_SUBSTITUTE 942 // 943 // // set up chars and bytes backups and expected values for certain cases 944 // CharBuffer charsBackup = bufferCopy(chars); 945 // ByteBuffer bytesBackup = bufferCopy(bytes); 946 // IntBuffer offsetsBackup = bufferCopy(offsets); 947 // CharBuffer encoderCharsExpected = bufferCopy(chars); 948 // ByteBuffer encoderBytesExpected = bufferCopy(bytes); 949 // IntBuffer encoderOffsetsExpected = bufferCopy(offsets); 950 // CharBuffer decoderCharsExpected1 = bufferCopy(chars); 951 // CharBuffer decoderCharsExpected2 = bufferCopy(chars); 952 // IntBuffer decoderOffsetsExpected1 = bufferCopy(offsets); 953 // IntBuffer decoderOffsetsExpected2 = bufferCopy(offsets); 954 // 955 // // initialize fields to obtain expected data 956 // setFieldValue(CharsetDecoderICU.class, "invalidCharLength", decoder, invalidCharLength); 957 // setFieldValue(CharsetICU.class, "subChar1", ((CharsetICU) decoder.charset()), subChar1); 958 // 959 // // run cbFromUWriteSub 960 // Method cbFromUWriteSub = CharsetEncoderICU.class.getDeclaredMethod("cbFromUWriteSub", new Class[] { CharsetEncoderICU.class, CharBuffer.class, ByteBuffer.class, IntBuffer.class}); 961 // cbFromUWriteSub.setAccessible(true); 962 // CoderResult encoderResultExpected = (CoderResult)cbFromUWriteSub.invoke(encoder, new Object[] {encoder, encoderCharsExpected, encoderBytesExpected, encoderOffsetsExpected}); 963 // 964 // // run toUWriteUChars with normal data 965 // Method toUWriteUChars = CharsetDecoderICU.class.getDeclaredMethod("toUWriteUChars", new Class[] { CharsetDecoderICU.class, char[].class, int.class, int.class, CharBuffer.class, IntBuffer.class, int.class}); 966 // toUWriteUChars.setAccessible(true); 967 // CoderResult decoderResultExpected1 = (CoderResult)toUWriteUChars.invoke(decoder, new Object[] {decoder, new char[] {0xFFFD}, new Integer(0), new Integer(1), decoderCharsExpected1, decoderOffsetsExpected1, new Integer(bytes.position())}); 968 // 969 // // reset certain fields 970 // setFieldValue(CharsetDecoderICU.class, "invalidCharLength", decoder, invalidCharLength); 971 // setFieldValue(CharsetICU.class, "subChar1", ((CharsetICU) decoder.charset()), subChar1_alternate); 972 // 973 // // run toUWriteUChars again 974 // CoderResult decoderResultExpected2 = (CoderResult)toUWriteUChars.invoke(decoder, new Object[] {decoder, new char[] {0x1A}, new Integer(0), new Integer(1), decoderCharsExpected2, decoderOffsetsExpected2, new Integer(bytes.position())}); 975 // 976 // // begin creating the tests array 977 // ArrayList tests = new ArrayList(); 978 // 979 // // create tests for FROM_U_CALLBACK_SKIP 0 980 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SKIP", new Object[] { encoder, null, chars, bytes, offsets, buffer, length, cp, null }, under, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 981 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SKIP", new Object[] { encoder, SUB_STOP_ON_ILLEGAL, chars, bytes, offsets, buffer, length, cp, malf }, malf, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 982 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SKIP", new Object[] { encoder, SUB_STOP_ON_ILLEGAL, chars, bytes, offsets, buffer, length, cp, unmap }, under, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 983 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SKIP", new Object[] { encoder, SUB_STOP_ON_ILLEGAL + "xx", chars, bytes, offsets, buffer, length, cp, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 984 // 985 // // create tests for TO_U_CALLBACK_SKIP 4 986 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_SKIP", new Object[] { decoder, null, bytes, chars, offsets, buffer, length, null }, under, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 987 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_SKIP", new Object[] { decoder, SUB_STOP_ON_ILLEGAL, bytes, chars, offsets, buffer, length, malf }, malf, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 988 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_SKIP", new Object[] { decoder, SUB_STOP_ON_ILLEGAL, bytes, chars, offsets, buffer, length, unmap }, under, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 989 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_SKIP", new Object[] { decoder, SUB_STOP_ON_ILLEGAL + "xx", bytes, chars, offsets, buffer, length, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 990 // 991 // // create tests for FROM_U_CALLBACK_STOP 8 992 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_STOP", new Object[] { encoder, null, chars, bytes, offsets, buffer, length, cp, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 993 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_STOP", new Object[] { encoder, SUB_STOP_ON_ILLEGAL, chars, bytes, offsets, buffer, length, cp, malf }, malf, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 994 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_STOP", new Object[] { encoder, SUB_STOP_ON_ILLEGAL, chars, bytes, offsets, buffer, length, cp, unmap }, unmap, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 995 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_STOP", new Object[] { encoder, SUB_STOP_ON_ILLEGAL + "xx", chars, bytes, offsets, buffer, length, cp, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 996 // 997 // // create tests for TO_U_CALLBACK_STOP 12 998 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_STOP", new Object[] { decoder, null, bytes, chars, offsets, buffer, length, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 999 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_STOP", new Object[] { decoder, SUB_STOP_ON_ILLEGAL, bytes, chars, offsets, buffer, length, malf }, malf, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 1000 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_STOP", new Object[] { decoder, SUB_STOP_ON_ILLEGAL, bytes, chars, offsets, buffer, length, unmap }, unmap, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 1001 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_STOP", new Object[] { decoder, SUB_STOP_ON_ILLEGAL + "xx", bytes, chars, offsets, buffer, length, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { invalidCharLength, subChar1 }}); 1002 // 1003 // // create tests for FROM_U_CALLBACK_SUBSTITUTE 16 1004 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SUBSTITUTE", new Object[] { encoder, null, chars, bytes, offsets, buffer, length, cp, null }, encoderResultExpected, encoderCharsExpected, encoderBytesExpected, encoderOffsetsExpected, new Object[] { }}); 1005 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SUBSTITUTE", new Object[] { encoder, SUB_STOP_ON_ILLEGAL, chars, bytes, offsets, buffer, length, cp, malf }, malf, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 1006 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SUBSTITUTE", new Object[] { encoder, SUB_STOP_ON_ILLEGAL, chars, bytes, offsets, buffer, length, cp, unmap }, encoderResultExpected, encoderCharsExpected, encoderBytesExpected, encoderOffsetsExpected, new Object[] { }}); 1007 // tests.add(new Object[] {encoderCall, "FROM_U_CALLBACK_SUBSTITUTE", new Object[] { encoder, SUB_STOP_ON_ILLEGAL + "xx", chars, bytes, offsets, buffer, length, cp, null }, null, charsBackup, bytesBackup, offsetsBackup, new Object[] { }}); 1008 // 1009 // // create tests for TO_U_CALLBACK_SUBSTITUTE 20 1010 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_SUBSTITUTE", new Object[] { decoder, null, bytes, chars, offsets, buffer, length, null }, decoderResultExpected1, decoderCharsExpected1, bytesBackup, decoderOffsetsExpected1, new Object[] { invalidCharLength, subChar1 }}); 1011 // tests.add(new Object[] {decoderCall, "TO_U_CALLBACK_SUBSTITUTE", new Object[] { decoder, null, bytes, chars, offsets, buffer, length, null }, decoderResultExpected2, decoderCharsExpected2, bytesBackup, decoderOffsetsExpected2, new Object[] { invalidCharLength, subChar1_alternate }}); 1012 // 1013 // Iterator iter = tests.iterator(); 1014 // for (int i=0; iter.hasNext(); i++) { 1015 // // get the data out of the map 1016 // Object[] next = (Object[])iter.next(); 1017 // 1018 // Method method = (Method)next[0]; 1019 // String fieldName = (String)next[1]; 1020 // Object field = getFieldValue(CharsetCallback, fieldName, null); 1021 // Object[] args = (Object[])next[2]; 1022 // CoderResult expected = (CoderResult)next[3]; 1023 // CharBuffer charsExpected = (CharBuffer)next[4]; 1024 // ByteBuffer bytesExpected = (ByteBuffer)next[5]; 1025 // IntBuffer offsetsExpected = (IntBuffer)next[6]; 1026 // 1027 // // set up error arrays and certain fields 1028 // Object[] values = (Object[])next[7]; 1029 // if (method == decoderCall) { 1030 // decoder.reset(); 1031 // setFieldValue(CharsetDecoderICU.class, "invalidCharLength", decoder, values[0]); 1032 // setFieldValue(CharsetICU.class, "subChar1", ((CharsetICU) decoder.charset()), values[1]); 1033 // } else if (method == encoderCall) { 1034 // encoder.reset(); 1035 // } 1036 // 1037 // try { 1038 // // invoke the method 1039 // CoderResult actual = (CoderResult)method.invoke(field, args); 1040 // 1041 // // if expected != actual 1042 // if (!coderResultsEqual(expected, actual)) { 1043 // // case #i refers to the index in the arraylist tests 1044 // errln(fieldName + " failed to return the correct result for case #" + i + "."); 1045 // } 1046 // // if the expected buffers != actual buffers 1047 // else if (!(buffersEqual(chars, charsExpected) && 1048 // buffersEqual(bytes, bytesExpected) && 1049 // buffersEqual(offsets, offsetsExpected))) { 1050 // // case #i refers to the index in the arraylist tests 1051 // errln(fieldName + " did not perform the correct operation on the buffers for case #" + i + "."); 1052 // } 1053 // } catch (InvocationTargetException ex) { 1054 // // case #i refers to the index in the arraylist tests 1055 // errln(fieldName + " threw an exception for case #" + i + ": " + ex.getCause()); 1056 // //ex.getCause().printStackTrace(); 1057 // } 1058 // 1059 // // reset the buffers 1060 // System.arraycopy(bytesBackup.array(), 0, bytes.array(), 0, 10); 1061 // System.arraycopy(charsBackup.array(), 0, chars.array(), 0, 10); 1062 // System.arraycopy(offsetsBackup.array(), 0, offsets.array(), 0, 10); 1063 // bytes.position(bytesBackup.position()); 1064 // chars.position(charsBackup.position()); 1065 // offsets.position(offsetsBackup.position()); 1066 // } 1067 // 1068 // } catch (Exception ex) { 1069 // errln("TestCharsetCallback skipped due to " + ex.toString()); 1070 // ex.printStackTrace(); 1071 // } 1072 // } 1073 // 1074 // private Object getFieldValue(Class c, String name, Object instance) throws Exception { 1075 // Field field = c.getDeclaredField(name); 1076 // field.setAccessible(true); 1077 // return field.get(instance); 1078 // } 1079 // private void setFieldValue(Class c, String name, Object instance, Object value) throws Exception { 1080 // Field field = c.getDeclaredField(name); 1081 // field.setAccessible(true); 1082 // if (value instanceof Boolean) 1083 // field.setBoolean(instance, ((Boolean)value).booleanValue()); 1084 // else if (value instanceof Byte) 1085 // field.setByte(instance, ((Byte)value).byteValue()); 1086 // else if (value instanceof Character) 1087 // field.setChar(instance, ((Character)value).charValue()); 1088 // else if (value instanceof Double) 1089 // field.setDouble(instance, ((Double)value).doubleValue()); 1090 // else if (value instanceof Float) 1091 // field.setFloat(instance, ((Float)value).floatValue()); 1092 // else if (value instanceof Integer) 1093 // field.setInt(instance, ((Integer)value).intValue()); 1094 // else if (value instanceof Long) 1095 // field.setLong(instance, ((Long)value).longValue()); 1096 // else if (value instanceof Short) 1097 // field.setShort(instance, ((Short)value).shortValue()); 1098 // else 1099 // field.set(instance, value); 1100 // } 1101 // private boolean coderResultsEqual(CoderResult a, CoderResult b) { 1102 // if (a == null && b == null) 1103 // return true; 1104 // if (a == null || b == null) 1105 // return false; 1106 // if ((a.isUnderflow() && b.isUnderflow()) || (a.isOverflow() && b.isOverflow())) 1107 // return true; 1108 // if (a.length() != b.length()) 1109 // return false; 1110 // if ((a.isMalformed() && b.isMalformed()) || (a.isUnmappable() && b.isUnmappable())) 1111 // return true; 1112 // return false; 1113 // } 1114 // private boolean buffersEqual(ByteBuffer a, ByteBuffer b) { 1115 // if (a.position() != b.position()) 1116 // return false; 1117 // int limit = a.position(); 1118 // for (int i=0; i<limit; i++) 1119 // if (a.get(i) != b.get(i)) 1120 // return false; 1121 // return true; 1122 // } 1123 // private boolean buffersEqual(CharBuffer a, CharBuffer b) { 1124 // if (a.position() != b.position()) 1125 // return false; 1126 // int limit = a.position(); 1127 // for (int i=0; i<limit; i++) 1128 // if (a.get(i) != b.get(i)) 1129 // return false; 1130 // return true; 1131 // } 1132 // private boolean buffersEqual(IntBuffer a, IntBuffer b) { 1133 // if (a.position() != b.position()) 1134 // return false; 1135 // int limit = a.position(); 1136 // for (int i=0; i<limit; i++) 1137 // if (a.get(i) != b.get(i)) 1138 // return false; 1139 // return true; 1140 // } 1141 // private ByteBuffer bufferCopy(ByteBuffer src) { 1142 // ByteBuffer dest = ByteBuffer.allocate(src.limit()); 1143 // System.arraycopy(src.array(), 0, dest.array(), 0, src.limit()); 1144 // dest.position(src.position()); 1145 // return dest; 1146 // } 1147 // private CharBuffer bufferCopy(CharBuffer src) { 1148 // CharBuffer dest = CharBuffer.allocate(src.limit()); 1149 // System.arraycopy(src.array(), 0, dest.array(), 0, src.limit()); 1150 // dest.position(src.position()); 1151 // return dest; 1152 // } 1153 // private IntBuffer bufferCopy(IntBuffer src) { 1154 // IntBuffer dest = IntBuffer.allocate(src.limit()); 1155 // System.arraycopy(src.array(), 0, dest.array(), 0, src.limit()); 1156 // dest.position(src.position()); 1157 // return dest; 1158 // } 1159 1160 1161 @Test TestAPISemantics( )1162 public void TestAPISemantics(/*String encoding*/) { 1163 String encoding = "UTF-16"; 1164 CharsetDecoder decoder = null; 1165 CharsetEncoder encoder = null; 1166 try { 1167 CharsetProviderICU provider = new CharsetProviderICU(); 1168 Charset charset = provider.charsetForName(encoding); 1169 decoder = charset.newDecoder(); 1170 encoder = charset.newEncoder(); 1171 } catch(MissingResourceException ex) { 1172 warnln("Could not load charset data: " + encoding); 1173 return; 1174 } 1175 1176 final String unistr = "abcd\ud800\udc00\u1234\u00a5\u3000\r\n"; 1177 final byte[] byteStr = { 1178 (byte) 0x00,(byte) 'a', 1179 (byte) 0x00,(byte) 'b', 1180 (byte) 0x00,(byte) 'c', 1181 (byte) 0x00,(byte) 'd', 1182 (byte) 0xd8,(byte) 0x00, 1183 (byte) 0xdc,(byte) 0x00, 1184 (byte) 0x12,(byte) 0x34, 1185 (byte) 0x00,(byte) 0xa5, 1186 (byte) 0x30,(byte) 0x00, 1187 (byte) 0x00,(byte) 0x0d, 1188 (byte) 0x00,(byte) 0x0a 1189 }; 1190 final byte[] expectedByteStr = { 1191 (byte) 0xfe,(byte) 0xff, 1192 (byte) 0x00,(byte) 'a', 1193 (byte) 0x00,(byte) 'b', 1194 (byte) 0x00,(byte) 'c', 1195 (byte) 0x00,(byte) 'd', 1196 (byte) 0xd8,(byte) 0x00, 1197 (byte) 0xdc,(byte) 0x00, 1198 (byte) 0x12,(byte) 0x34, 1199 (byte) 0x00,(byte) 0xa5, 1200 (byte) 0x30,(byte) 0x00, 1201 (byte) 0x00,(byte) 0x0d, 1202 (byte) 0x00,(byte) 0x0a 1203 }; 1204 1205 ByteBuffer byes = ByteBuffer.wrap(byteStr); 1206 CharBuffer uniVal = CharBuffer.wrap(unistr); 1207 ByteBuffer expected = ByteBuffer.wrap(expectedByteStr); 1208 1209 int rc = 0; 1210 if(decoder==null){ 1211 warnln("Could not load decoder."); 1212 return; 1213 } 1214 decoder.reset(); 1215 /* Convert the whole buffer to Unicode */ 1216 try { 1217 CharBuffer chars = CharBuffer.allocate(unistr.length()); 1218 CoderResult result = decoder.decode(byes, chars, false); 1219 1220 if (result.isError()) { 1221 errln("ToChars encountered Error"); 1222 rc = 1; 1223 } 1224 if (result.isOverflow()) { 1225 errln("ToChars encountered overflow exception"); 1226 rc = 1; 1227 } 1228 if (!equals(chars, unistr)) { 1229 errln("ToChars does not match"); 1230 printchars(chars); 1231 errln("Expected : "); 1232 printchars(unistr); 1233 rc = 2; 1234 } 1235 1236 } catch (Exception e) { 1237 errln("ToChars - exception in buffer"); 1238 rc = 5; 1239 } 1240 1241 /* Convert single bytes to Unicode */ 1242 try { 1243 CharBuffer chars = CharBuffer.allocate(unistr.length()); 1244 ByteBuffer b = ByteBuffer.wrap(byteStr); 1245 decoder.reset(); 1246 CoderResult result=null; 1247 for (int i = 1; i <= byteStr.length; i++) { 1248 b.limit(i); 1249 result = decoder.decode(b, chars, false); 1250 if(result.isOverflow()){ 1251 errln("ToChars single threw an overflow exception"); 1252 } 1253 if (result.isError()) { 1254 errln("ToChars single the result is an error "+result.toString()); 1255 } 1256 } 1257 if (unistr.length() != (chars.limit())) { 1258 errln("ToChars single len does not match"); 1259 rc = 3; 1260 } 1261 if (!equals(chars, unistr)) { 1262 errln("ToChars single does not match"); 1263 printchars(chars); 1264 rc = 4; 1265 } 1266 } catch (Exception e) { 1267 errln("ToChars - exception in single"); 1268 //e.printStackTrace(); 1269 rc = 6; 1270 } 1271 1272 /* Convert the buffer one at a time to Unicode */ 1273 try { 1274 CharBuffer chars = CharBuffer.allocate(unistr.length()); 1275 decoder.reset(); 1276 byes.rewind(); 1277 for (int i = 1; i <= byteStr.length; i++) { 1278 byes.limit(i); 1279 CoderResult result = decoder.decode(byes, chars, false); 1280 if (result.isError()) { 1281 errln("Error while decoding: "+result.toString()); 1282 } 1283 if(result.isOverflow()){ 1284 errln("ToChars Simple threw an overflow exception"); 1285 } 1286 } 1287 if (chars.limit() != unistr.length()) { 1288 errln("ToChars Simple buffer len does not match"); 1289 rc = 7; 1290 } 1291 if (!equals(chars, unistr)) { 1292 errln("ToChars Simple buffer does not match"); 1293 printchars(chars); 1294 err(" Expected : "); 1295 printchars(unistr); 1296 rc = 8; 1297 } 1298 } catch (Exception e) { 1299 errln("ToChars - exception in single buffer"); 1300 //e.printStackTrace(System.err); 1301 rc = 9; 1302 } 1303 if (rc != 0) { 1304 errln("Test Simple ToChars for encoding : FAILED"); 1305 } 1306 1307 rc = 0; 1308 /* Convert the whole buffer from unicode */ 1309 try { 1310 ByteBuffer bytes = ByteBuffer.allocate(expectedByteStr.length); 1311 encoder.reset(); 1312 CoderResult result = encoder.encode(uniVal, bytes, false); 1313 if (result.isError()) { 1314 errln("FromChars reported error: " + result.toString()); 1315 rc = 1; 1316 } 1317 if(result.isOverflow()){ 1318 errln("FromChars threw an overflow exception"); 1319 } 1320 bytes.position(0); 1321 if (!bytes.equals(expected)) { 1322 errln("FromChars does not match"); 1323 printbytes(bytes); 1324 printbytes(expected); 1325 rc = 2; 1326 } 1327 } catch (Exception e) { 1328 errln("FromChars - exception in buffer"); 1329 //e.printStackTrace(System.err); 1330 rc = 5; 1331 } 1332 1333 /* Convert the buffer one char at a time to unicode */ 1334 try { 1335 ByteBuffer bytes = ByteBuffer.allocate(expectedByteStr.length); 1336 CharBuffer c = CharBuffer.wrap(unistr); 1337 encoder.reset(); 1338 CoderResult result= null; 1339 for (int i = 1; i <= unistr.length(); i++) { 1340 c.limit(i); 1341 result = encoder.encode(c, bytes, false); 1342 if(result.isOverflow()){ 1343 errln("FromChars single threw an overflow exception"); 1344 } 1345 if(result.isError()){ 1346 errln("FromChars single threw an error: "+ result.toString()); 1347 } 1348 } 1349 if (expectedByteStr.length != bytes.limit()) { 1350 errln("FromChars single len does not match"); 1351 rc = 3; 1352 } 1353 1354 bytes.position(0); 1355 if (!bytes.equals(expected)) { 1356 errln("FromChars single does not match"); 1357 printbytes(bytes); 1358 printbytes(expected); 1359 rc = 4; 1360 } 1361 1362 } catch (Exception e) { 1363 errln("FromChars - exception in single"); 1364 //e.printStackTrace(System.err); 1365 rc = 6; 1366 } 1367 1368 /* Convert one char at a time to unicode */ 1369 try { 1370 ByteBuffer bytes = ByteBuffer.allocate(expectedByteStr.length); 1371 encoder.reset(); 1372 char[] temp = unistr.toCharArray(); 1373 CoderResult result=null; 1374 for (int i = 0; i <= temp.length; i++) { 1375 uniVal.limit(i); 1376 result = encoder.encode(uniVal, bytes, false); 1377 if(result.isOverflow()){ 1378 errln("FromChars simple threw an overflow exception"); 1379 } 1380 if(result.isError()){ 1381 errln("FromChars simple threw an error: "+ result.toString()); 1382 } 1383 } 1384 if (bytes.limit() != expectedByteStr.length) { 1385 errln("FromChars Simple len does not match"); 1386 rc = 7; 1387 } 1388 if (!bytes.equals(byes)) { 1389 errln("FromChars Simple does not match"); 1390 printbytes(bytes); 1391 printbytes(byes); 1392 rc = 8; 1393 } 1394 } catch (Exception e) { 1395 errln("FromChars - exception in single buffer"); 1396 //e.printStackTrace(System.err); 1397 rc = 9; 1398 } 1399 if (rc != 0) { 1400 errln("Test Simple FromChars " + encoding + " --FAILED"); 1401 } 1402 } 1403 printchars(CharBuffer buf)1404 void printchars(CharBuffer buf) { 1405 int i; 1406 char[] chars = new char[buf.limit()]; 1407 //save the current position 1408 int pos = buf.position(); 1409 buf.position(0); 1410 buf.get(chars); 1411 //reset to old position 1412 buf.position(pos); 1413 for (i = 0; i < chars.length; i++) { 1414 err(hex(chars[i]) + " "); 1415 } 1416 errln(""); 1417 } printchars(String str)1418 void printchars(String str) { 1419 char[] chars = str.toCharArray(); 1420 for (int i = 0; i < chars.length; i++) { 1421 err(hex(chars[i]) + " "); 1422 } 1423 errln(""); 1424 } printbytes(ByteBuffer buf)1425 void printbytes(ByteBuffer buf) { 1426 int i; 1427 byte[] bytes = new byte[buf.limit()]; 1428 //save the current position 1429 int pos = buf.position(); 1430 buf.position(0); 1431 buf.get(bytes); 1432 //reset to old position 1433 buf.position(pos); 1434 for (i = 0; i < bytes.length; i++) { 1435 System.out.print(hex(bytes[i]) + " "); 1436 } 1437 errln(""); 1438 } 1439 equals(CoderResult a, CoderResult b)1440 public boolean equals(CoderResult a, CoderResult b) { 1441 return (a.isUnderflow() && b.isUnderflow()) 1442 || (a.isOverflow() && b.isOverflow()) 1443 || (a.isMalformed() && b.isMalformed() && a.length() == b.length()) 1444 || (a.isUnmappable() && b.isUnmappable() && a.length() == b.length()); 1445 } equals(CharBuffer buf, String str)1446 public boolean equals(CharBuffer buf, String str) { 1447 return equals(buf, str.toCharArray()); 1448 } equals(CharBuffer buf, CharBuffer str)1449 public boolean equals(CharBuffer buf, CharBuffer str) { 1450 if (buf.limit() != str.limit()) 1451 return false; 1452 int limit = buf.limit(); 1453 for (int i = 0; i < limit; i++) 1454 if (buf.get(i) != str.get(i)) 1455 return false; 1456 return true; 1457 } equals(CharBuffer buf, CharBuffer str, int limit)1458 public boolean equals(CharBuffer buf, CharBuffer str, int limit) { 1459 if (limit > buf.limit() || limit > str.limit()) 1460 return false; 1461 for (int i = 0; i < limit; i++) 1462 if (buf.get(i) != str.get(i)) 1463 return false; 1464 return true; 1465 } equals(CharBuffer buf, char[] compareTo)1466 public boolean equals(CharBuffer buf, char[] compareTo) { 1467 char[] chars = new char[buf.limit()]; 1468 //save the current position 1469 int pos = buf.position(); 1470 buf.position(0); 1471 buf.get(chars); 1472 //reset to old position 1473 buf.position(pos); 1474 return equals(chars, compareTo); 1475 } 1476 equals(char[] chars, char[] compareTo)1477 public boolean equals(char[] chars, char[] compareTo) { 1478 if (chars.length != compareTo.length) { 1479 errln( 1480 "Length does not match chars: " 1481 + chars.length 1482 + " compareTo: " 1483 + compareTo.length); 1484 return false; 1485 } else { 1486 boolean result = true; 1487 for (int i = 0; i < chars.length; i++) { 1488 if (chars[i] != compareTo[i]) { 1489 logln( 1490 "Got: " 1491 + hex(chars[i]) 1492 + " Expected: " 1493 + hex(compareTo[i]) 1494 + " At: " 1495 + i); 1496 result = false; 1497 } 1498 } 1499 return result; 1500 } 1501 } 1502 equals(ByteBuffer buf, byte[] compareTo)1503 public boolean equals(ByteBuffer buf, byte[] compareTo) { 1504 byte[] chars = new byte[buf.limit()]; 1505 //save the current position 1506 int pos = buf.position(); 1507 buf.position(0); 1508 buf.get(chars); 1509 //reset to old position 1510 buf.position(pos); 1511 return equals(chars, compareTo); 1512 } equals(ByteBuffer buf, ByteBuffer compareTo)1513 public boolean equals(ByteBuffer buf, ByteBuffer compareTo) { 1514 if (buf.limit() != compareTo.limit()) 1515 return false; 1516 int limit = buf.limit(); 1517 for (int i = 0; i < limit; i++) 1518 if (buf.get(i) != compareTo.get(i)) 1519 return false; 1520 return true; 1521 } equals(ByteBuffer buf, ByteBuffer compareTo, int limit)1522 public boolean equals(ByteBuffer buf, ByteBuffer compareTo, int limit) { 1523 if (limit > buf.limit() || limit > compareTo.limit()) 1524 return false; 1525 for (int i = 0; i < limit; i++) 1526 if (buf.get(i) != compareTo.get(i)) 1527 return false; 1528 return true; 1529 } equals(byte[] chars, byte[] compareTo)1530 public boolean equals(byte[] chars, byte[] compareTo) { 1531 if (false/*chars.length != compareTo.length*/) { 1532 errln( 1533 "Length does not match chars: " 1534 + chars.length 1535 + " compareTo: " 1536 + compareTo.length); 1537 return false; 1538 } else { 1539 boolean result = true; 1540 for (int i = 0; i < chars.length; i++) { 1541 if (chars[i] != compareTo[i]) { 1542 logln( 1543 "Got: " 1544 + hex(chars[i]) 1545 + " Expected: " 1546 + hex(compareTo[i]) 1547 + " At: " 1548 + i); 1549 result = false; 1550 } 1551 } 1552 return result; 1553 } 1554 } 1555 1556 // TODO 1557 /* 1558 @Test 1559 public void TestCallback(String encoding) throws Exception { 1560 1561 byte[] gbSource = 1562 { 1563 (byte) 0x81, 1564 (byte) 0x36, 1565 (byte) 0xDE, 1566 (byte) 0x36, 1567 (byte) 0x81, 1568 (byte) 0x36, 1569 (byte) 0xDE, 1570 (byte) 0x37, 1571 (byte) 0x81, 1572 (byte) 0x36, 1573 (byte) 0xDE, 1574 (byte) 0x38, 1575 (byte) 0xe3, 1576 (byte) 0x32, 1577 (byte) 0x9a, 1578 (byte) 0x36 }; 1579 1580 char[] subChars = { 'P', 'I' }; 1581 1582 decoder.reset(); 1583 1584 decoder.replaceWith(new String(subChars)); 1585 ByteBuffer mySource = ByteBuffer.wrap(gbSource); 1586 CharBuffer myTarget = CharBuffer.allocate(5); 1587 1588 decoder.decode(mySource, myTarget, true); 1589 char[] expectedResult = 1590 { '\u22A6', '\u22A7', '\u22A8', '\u0050', '\u0049', }; 1591 1592 if (!equals(myTarget, new String(expectedResult))) { 1593 errln("Test callback GB18030 to Unicode : FAILED"); 1594 } 1595 1596 } 1597 */ 1598 1599 @Test TestCanConvert( )1600 public void TestCanConvert(/*String encoding*/)throws Exception { 1601 char[] mySource = { 1602 '\ud800', '\udc00',/*surrogate pair */ 1603 '\u22A6','\u22A7','\u22A8','\u22A9','\u22AA', 1604 '\u22AB','\u22AC','\u22AD','\u22AE','\u22AF', 1605 '\u22B0','\u22B1','\u22B2','\u22B3','\u22B4', 1606 '\ud800','\udc00',/*surrogate pair */ 1607 '\u22B5','\u22B6','\u22B7','\u22B8','\u22B9', 1608 '\u22BA','\u22BB','\u22BC','\u22BD','\u22BE' 1609 }; 1610 String encoding = "UTF-16"; 1611 CharsetEncoder encoder = null; 1612 try { 1613 CharsetProviderICU provider = new CharsetProviderICU(); 1614 Charset charset = provider.charsetForName(encoding); 1615 encoder = charset.newEncoder(); 1616 } catch(MissingResourceException ex) { 1617 warnln("Could not load charset data: " + encoding); 1618 return; 1619 } 1620 if (!encoder.canEncode(new String(mySource))) { 1621 errln("Test canConvert() " + encoding + " failed. "+encoder); 1622 } 1623 1624 } 1625 1626 @Test TestAvailableCharsets()1627 public void TestAvailableCharsets() { 1628 SortedMap map = Charset.availableCharsets(); 1629 Set keySet = map.keySet(); 1630 Iterator iter = keySet.iterator(); 1631 while(iter.hasNext()){ 1632 logln("Charset name: "+iter.next().toString()); 1633 } 1634 Object[] charsets = CharsetProviderICU.getAvailableNames(); 1635 int mapSize = map.size(); 1636 if(mapSize < charsets.length){ 1637 errln("Charset.availableCharsets() returned a number less than the number returned by icu. ICU: " + charsets.length 1638 + " JDK: " + mapSize); 1639 } 1640 logln("Total Number of chasets = " + map.size()); 1641 } 1642 1643 @Test TestWindows936()1644 public void TestWindows936(){ 1645 CharsetProviderICU icu = new CharsetProviderICU(); 1646 Charset cs = icu.charsetForName("windows-936-2000"); 1647 String canonicalName = cs.name(); 1648 if(!canonicalName.equals("GBK")){ 1649 errln("Did not get the expected canonical name. Got: "+canonicalName); //get the canonical name 1650 } 1651 } 1652 1653 @Test TestICUAvailableCharsets()1654 public void TestICUAvailableCharsets() { 1655 CharsetProviderICU icu = new CharsetProviderICU(); 1656 Object[] charsets = CharsetProviderICU.getAvailableNames(); 1657 for(int i=0;i<charsets.length;i++){ 1658 Charset cs = icu.charsetForName((String)charsets[i]); 1659 try{ 1660 CharsetEncoder encoder = cs.newEncoder(); 1661 if(encoder!=null){ 1662 logln("Creation of encoder succeeded. "+cs.toString()); 1663 } 1664 }catch(Exception ex){ 1665 errln("Could not instantiate encoder for "+charsets[i]+". Error: "+ex.toString()); 1666 } 1667 try{ 1668 CharsetDecoder decoder = cs.newDecoder(); 1669 if(decoder!=null){ 1670 logln("Creation of decoder succeeded. "+cs.toString()); 1671 } 1672 }catch(Exception ex){ 1673 errln("Could not instantiate decoder for "+charsets[i]+". Error: "+ex.toString()); 1674 } 1675 } 1676 } 1677 1678 /* jitterbug 4312 */ 1679 @Test TestUnsupportedCharset()1680 public void TestUnsupportedCharset(){ 1681 CharsetProvider icu = new CharsetProviderICU(); 1682 Charset icuChar = icu.charsetForName("impossible"); 1683 if(icuChar != null){ 1684 errln("ICU does not conform to the spec"); 1685 } 1686 } 1687 1688 @Test TestEncoderCreation()1689 public void TestEncoderCreation(){ 1690 // Use CharsetICU.forNameICU() so that we get the ICU version 1691 // even if the system or another provider also supports this charset. 1692 String encoding = "GB_2312-80"; 1693 try{ 1694 Charset cs = CharsetICU.forNameICU(encoding); 1695 CharsetEncoder enc = cs.newEncoder(); 1696 if(enc!=null){ 1697 logln("Successfully created an encoder for " + encoding + ": " + enc); 1698 if(!(enc instanceof CharsetEncoderICU)) { 1699 errln("Expected " + encoding + 1700 " to be implemented by ICU but got an instance of " + enc.getClass()); 1701 } 1702 }else{ 1703 errln("Error creating charset encoder for " + encoding); 1704 } 1705 }catch(Exception e){ 1706 warnln("Error creating charset encoder for " + encoding + ": " + e); 1707 } 1708 // Use Charset.forName() which may return an ICU Charset or some other implementation. 1709 encoding = "x-ibm-971_P100-1995"; 1710 try{ 1711 Charset cs = Charset.forName(encoding); 1712 CharsetEncoder enc = cs.newEncoder(); 1713 if(enc!=null){ 1714 logln("Successfully created an encoder for " + encoding + ": " + enc + 1715 " which is implemented by ICU? " + (enc instanceof CharsetEncoderICU)); 1716 }else{ 1717 errln("Error creating charset encoder for " + encoding); 1718 } 1719 }catch(Exception e){ 1720 warnln("Error creating charset encoder for " + encoding + ": " + e); 1721 } 1722 } 1723 1724 @Test TestSubBytes()1725 public void TestSubBytes(){ 1726 try{ 1727 //create utf-8 decoder 1728 CharsetDecoder decoder = new CharsetProviderICU().charsetForName("utf-8").newDecoder(); 1729 1730 //create a valid byte array, which can be decoded to " buffer" 1731 byte[] unibytes = new byte[] { 0x0020, 0x0062, 0x0075, 0x0066, 0x0066, 0x0065, 0x0072 }; 1732 1733 ByteBuffer buffer = ByteBuffer.allocate(20); 1734 1735 //add a evil byte to make the byte buffer be malformed input 1736 buffer.put((byte)0xd8); 1737 1738 //put the valid byte array 1739 buffer.put(unibytes); 1740 1741 //reset postion 1742 buffer.flip(); 1743 1744 decoder.onMalformedInput(CodingErrorAction.REPLACE); 1745 CharBuffer out = decoder.decode(buffer); 1746 String expected = "\ufffd buffer"; 1747 if(!expected.equals(new String(out.array()))){ 1748 errln("Did not get the expected result for substitution chars. Got: "+ 1749 new String(out.array()) + "("+ hex(out.array())+")"); 1750 } 1751 logln("Output: "+ new String(out.array()) + "("+ hex(out.array())+")"); 1752 }catch (CharacterCodingException ex){ 1753 errln("Unexpected exception: "+ex.toString()); 1754 } 1755 } 1756 /* 1757 1758 @Test 1759 public void TestImplFlushFailure(){ 1760 1761 try{ 1762 CharBuffer in = CharBuffer.wrap("\u3005\u3006\u3007\u30FC\u2015\u2010\uFF0F"); 1763 CharsetEncoder encoder = new CharsetProviderICU().charsetForName("iso-2022-jp").newEncoder(); 1764 ByteBuffer out = ByteBuffer.allocate(30); 1765 encoder.encode(in, out, true); 1766 encoder.flush(out); 1767 if(out.position()!= 20){ 1768 errln("Did not get the expected position from flush"); 1769 } 1770 1771 }catch (Exception ex){ 1772 errln("Could not create encoder for iso-2022-jp exception: "+ex.toString()); 1773 } 1774 } 1775 */ 1776 1777 @Test TestISO88591()1778 public void TestISO88591() { 1779 1780 Charset cs = new CharsetProviderICU().charsetForName("iso-8859-1"); 1781 if(cs!=null){ 1782 CharsetEncoder encoder = cs.newEncoder(); 1783 if(encoder!=null){ 1784 encoder.canEncode("\uc2a3"); 1785 }else{ 1786 errln("Could not create encoder for iso-8859-1"); 1787 } 1788 }else{ 1789 errln("Could not create Charset for iso-8859-1"); 1790 } 1791 1792 } 1793 1794 @Test TestUTF8Encode()1795 public void TestUTF8Encode() { 1796 // Test with a lead surrogate in the middle of the input text. 1797 // Java API behavior is unclear for surrogates at the end, see ticket #11546. 1798 CharBuffer in = CharBuffer.wrap("\ud800a"); 1799 ByteBuffer out = ByteBuffer.allocate(30); 1800 CharsetEncoder encoderICU = new CharsetProviderICU().charsetForName("utf-8").newEncoder(); 1801 CoderResult result = encoderICU.encode(in, out, true); 1802 1803 if (result.isMalformed()) { 1804 logln("\\ud800 is malformed for ICU4JNI utf-8 encoder"); 1805 } else if (result.isUnderflow()) { 1806 errln("FAIL: \\ud800 is OK for ICU4JNI utf-8 encoder"); 1807 } 1808 1809 in.position(0); 1810 out.clear(); 1811 1812 CharsetEncoder encoderJDK = Charset.forName("utf-8").newEncoder(); 1813 result = encoderJDK.encode(in, out, true); 1814 if (result.isMalformed()) { 1815 logln("\\ud800 is malformed for JDK utf-8 encoder"); 1816 } else if (result.isUnderflow()) { 1817 errln("BAD: \\ud800 is OK for JDK utf-8 encoder"); 1818 } 1819 } 1820 1821 /* private void printCB(CharBuffer buf){ 1822 buf.rewind(); 1823 while(buf.hasRemaining()){ 1824 System.out.println(hex(buf.get())); 1825 } 1826 buf.rewind(); 1827 } 1828 */ 1829 1830 @Test TestUTF8()1831 public void TestUTF8() throws CharacterCodingException{ 1832 try{ 1833 CharsetEncoder encoderICU = new CharsetProviderICU().charsetForName("utf-8").newEncoder(); 1834 encoderICU.encode(CharBuffer.wrap("\ud800")); 1835 errln("\\ud800 is OK for ICU4JNI utf-8 encoder"); 1836 }catch (Exception e) { 1837 logln("\\ud800 is malformed for JDK utf-8 encoder"); 1838 //e.printStackTrace(); 1839 } 1840 1841 CharsetEncoder encoderJDK = Charset.forName("utf-8").newEncoder(); 1842 try { 1843 encoderJDK.encode(CharBuffer.wrap("\ud800")); 1844 errln("\\ud800 is OK for JDK utf-8 encoder"); 1845 } catch (Exception e) { 1846 logln("\\ud800 is malformed for JDK utf-8 encoder"); 1847 //e.printStackTrace(); 1848 } 1849 } 1850 1851 @Test TestUTF16Bom()1852 public void TestUTF16Bom(){ 1853 1854 Charset cs = (new CharsetProviderICU()).charsetForName("UTF-16"); 1855 char[] in = new char[] { 0x1122, 0x2211, 0x3344, 0x4433, 1856 0x5566, 0x6655, 0x7788, 0x8877, 0x9900 }; 1857 CharBuffer inBuf = CharBuffer.allocate(in.length); 1858 inBuf.put(in); 1859 CharsetEncoder encoder = cs.newEncoder(); 1860 ByteBuffer outBuf = ByteBuffer.allocate(in.length*2+2); 1861 inBuf.rewind(); 1862 encoder.encode(inBuf, outBuf, true); 1863 outBuf.rewind(); 1864 if(outBuf.get(0)!= (byte)0xFE && outBuf.get(1)!= (byte)0xFF){ 1865 errln("The UTF16 encoder did not appended bom. Length returned: " + outBuf.remaining()); 1866 } 1867 while(outBuf.hasRemaining()){ 1868 logln("0x"+hex(outBuf.get())); 1869 } 1870 CharsetDecoder decoder = cs.newDecoder(); 1871 outBuf.rewind(); 1872 CharBuffer rt = CharBuffer.allocate(in.length); 1873 CoderResult cr = decoder.decode(outBuf, rt, true); 1874 if(cr.isError()){ 1875 errln("Decoding with BOM failed. Error: "+ cr.toString()); 1876 } 1877 equals(rt, in); 1878 { 1879 rt.clear(); 1880 outBuf.rewind(); 1881 Charset utf16 = Charset.forName("UTF-16"); 1882 CharsetDecoder dc = utf16.newDecoder(); 1883 cr = dc.decode(outBuf, rt, true); 1884 equals(rt, in); 1885 } 1886 } 1887 smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, boolean throwException, boolean flush)1888 private void smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, 1889 boolean throwException, boolean flush) throws BufferOverflowException, Exception { 1890 smBufDecode(decoder, encoding, source, target, throwException, flush, true); 1891 } 1892 smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, boolean throwException, boolean flush, boolean backedByArray)1893 private void smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, 1894 boolean throwException, boolean flush, boolean backedByArray) throws BufferOverflowException, Exception { 1895 smBufDecode(decoder, encoding, source, target, throwException, flush, backedByArray, -1); 1896 } 1897 smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, boolean throwException, boolean flush, boolean backedByArray, int targetLimit)1898 private void smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, 1899 boolean throwException, boolean flush, boolean backedByArray, int targetLimit) 1900 throws BufferOverflowException, Exception { 1901 ByteBuffer mySource; 1902 CharBuffer myTarget; 1903 if (backedByArray) { 1904 mySource = ByteBuffer.allocate(source.capacity()); 1905 myTarget = CharBuffer.allocate(target.capacity()); 1906 } else { 1907 // this does not guarantee by any means that mySource and myTarget 1908 // are not backed by arrays 1909 mySource = ByteBuffer.allocateDirect(source.capacity()); 1910 myTarget = ByteBuffer.allocateDirect(target.capacity() * 2).asCharBuffer(); 1911 } 1912 mySource.position(source.position()); 1913 for (int i = source.position(); i < source.limit(); i++) 1914 mySource.put(i, source.get(i)); 1915 1916 { 1917 decoder.reset(); 1918 myTarget.limit(target.limit()); 1919 mySource.limit(source.limit()); 1920 mySource.position(source.position()); 1921 CoderResult result = CoderResult.UNDERFLOW; 1922 result = decoder.decode(mySource, myTarget, true); 1923 if (flush) { 1924 result = decoder.flush(myTarget); 1925 } 1926 if (result.isError()) { 1927 if (throwException) { 1928 throw new Exception(); 1929 } 1930 errln("Test complete buffers while decoding failed. " + result.toString()); 1931 return; 1932 } 1933 if (result.isOverflow()) { 1934 if (throwException) { 1935 throw new BufferOverflowException(); 1936 } 1937 errln("Test complete buffers while decoding threw overflow exception"); 1938 return; 1939 } 1940 myTarget.limit(myTarget.position()); 1941 myTarget.position(0); 1942 target.position(0); 1943 if (result.isUnderflow() && !equals(myTarget, target, targetLimit)) { 1944 errln(" Test complete buffers while decoding " + encoding + " TO Unicode--failed"); 1945 } 1946 } 1947 if (isQuick()) { 1948 return; 1949 } 1950 { 1951 decoder.reset(); 1952 myTarget.limit(target.position()); 1953 mySource.limit(source.position()); 1954 mySource.position(source.position()); 1955 myTarget.clear(); 1956 myTarget.position(0); 1957 1958 int inputLen = mySource.remaining(); 1959 1960 CoderResult result = CoderResult.UNDERFLOW; 1961 for (int i = 1; i <= inputLen; i++) { 1962 mySource.limit(i); 1963 if (i == inputLen) { 1964 result = decoder.decode(mySource, myTarget, true); 1965 } else { 1966 result = decoder.decode(mySource, myTarget, false); 1967 } 1968 if (result.isError()) { 1969 errln("Test small input buffers while decoding failed. " + result.toString()); 1970 break; 1971 } 1972 if (result.isOverflow()) { 1973 if (throwException) { 1974 throw new BufferOverflowException(); 1975 } 1976 errln("Test small input buffers while decoding threw overflow exception"); 1977 break; 1978 } 1979 1980 } 1981 if (result.isUnderflow() && !equals(myTarget, target, targetLimit)) { 1982 errln("Test small input buffers while decoding " + encoding + " TO Unicode--failed"); 1983 } 1984 } 1985 { 1986 decoder.reset(); 1987 myTarget.limit(0); 1988 mySource.limit(0); 1989 mySource.position(source.position()); 1990 myTarget.clear(); 1991 while (true) { 1992 CoderResult result = decoder.decode(mySource, myTarget, false); 1993 if (result.isUnderflow()) { 1994 if (mySource.limit() < source.limit()) 1995 mySource.limit(mySource.limit() + 1); 1996 } else if (result.isOverflow()) { 1997 if (myTarget.limit() < target.limit()) 1998 myTarget.limit(myTarget.limit() + 1); 1999 else 2000 break; 2001 } else /*if (result.isError())*/ { 2002 errln("Test small output buffers while decoding " + result.toString()); 2003 } 2004 if (mySource.position() == mySource.limit()) { 2005 result = decoder.decode(mySource, myTarget, true); 2006 if (result.isError()) { 2007 errln("Test small output buffers while decoding " + result.toString()); 2008 } 2009 result = decoder.flush(myTarget); 2010 if (result.isError()) { 2011 errln("Test small output buffers while decoding " + result.toString()); 2012 } 2013 break; 2014 } 2015 } 2016 2017 if (!equals(myTarget, target, targetLimit)) { 2018 errln("Test small output buffers " + encoding + " TO Unicode failed"); 2019 } 2020 } 2021 } 2022 smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, boolean throwException, boolean flush)2023 private void smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, 2024 boolean throwException, boolean flush) throws Exception, BufferOverflowException { 2025 smBufEncode(encoder, encoding, source, target, throwException, flush, true); 2026 } 2027 smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, boolean throwException, boolean flush, boolean backedByArray)2028 private void smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, 2029 boolean throwException, boolean flush, boolean backedByArray) throws Exception, BufferOverflowException { 2030 smBufEncode(encoder, encoding, source, target, throwException, flush, true, -1); 2031 } 2032 smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, boolean throwException, boolean flush, boolean backedByArray, int targetLimit)2033 private void smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, 2034 boolean throwException, boolean flush, boolean backedByArray, int targetLimit) throws Exception, 2035 BufferOverflowException { 2036 logln("Running smBufEncode for " + encoding + " with class " + encoder); 2037 2038 CharBuffer mySource; 2039 ByteBuffer myTarget; 2040 if (backedByArray) { 2041 mySource = CharBuffer.allocate(source.capacity()); 2042 myTarget = ByteBuffer.allocate(target.capacity()); 2043 } else { 2044 mySource = ByteBuffer.allocateDirect(source.capacity() * 2).asCharBuffer(); 2045 myTarget = ByteBuffer.allocateDirect(target.capacity()); 2046 } 2047 mySource.position(source.position()); 2048 for (int i = source.position(); i < source.limit(); i++) 2049 mySource.put(i, source.get(i)); 2050 2051 myTarget.clear(); 2052 { 2053 logln("Running tests on small input buffers for " + encoding); 2054 encoder.reset(); 2055 myTarget.limit(target.limit()); 2056 mySource.limit(source.limit()); 2057 mySource.position(source.position()); 2058 CoderResult result = null; 2059 2060 result = encoder.encode(mySource, myTarget, true); 2061 if (flush) { 2062 result = encoder.flush(myTarget); 2063 } 2064 2065 if (result.isError()) { 2066 if (throwException) { 2067 throw new Exception(); 2068 } 2069 errln("Test complete while encoding failed. " + result.toString()); 2070 } 2071 if (result.isOverflow()) { 2072 if (throwException) { 2073 throw new BufferOverflowException(); 2074 } 2075 errln("Test complete while encoding threw overflow exception"); 2076 } 2077 if (!equals(myTarget, target, targetLimit)) { 2078 errln("Test complete buffers while encoding for " + encoding + " failed"); 2079 2080 } else { 2081 logln("Tests complete buffers for " + encoding + " passed"); 2082 } 2083 } 2084 if (isQuick()) { 2085 return; 2086 } 2087 { 2088 logln("Running tests on small input buffers for " + encoding); 2089 encoder.reset(); 2090 myTarget.clear(); 2091 myTarget.limit(target.limit()); 2092 mySource.limit(source.limit()); 2093 mySource.position(source.position()); 2094 int inputLen = mySource.limit(); 2095 CoderResult result = null; 2096 for (int i = 1; i <= inputLen; i++) { 2097 mySource.limit(i); 2098 result = encoder.encode(mySource, myTarget, false); 2099 if (result.isError()) { 2100 errln("Test small input buffers while encoding failed. " + result.toString()); 2101 } 2102 if (result.isOverflow()) { 2103 if (throwException) { 2104 throw new BufferOverflowException(); 2105 } 2106 errln("Test small input buffers while encoding threw overflow exception"); 2107 } 2108 } 2109 if (!equals(myTarget, target, targetLimit)) { 2110 errln("Test small input buffers " + encoding + " From Unicode failed"); 2111 } else { 2112 logln("Tests on small input buffers for " + encoding + " passed"); 2113 } 2114 } 2115 { 2116 logln("Running tests on small output buffers for " + encoding); 2117 encoder.reset(); 2118 myTarget.clear(); 2119 myTarget.limit(target.limit()); 2120 mySource.limit(source.limit()); 2121 mySource.position(source.position()); 2122 mySource.position(0); 2123 myTarget.position(0); 2124 2125 logln("myTarget.limit: " + myTarget.limit() + " myTarget.capcity: " + myTarget.capacity()); 2126 2127 while (true) { 2128 int pos = myTarget.position(); 2129 2130 CoderResult result = encoder.encode(mySource, myTarget, false); 2131 logln("myTarget.Position: " + pos + " myTarget.limit: " + myTarget.limit()); 2132 logln("mySource.position: " + mySource.position() + " mySource.limit: " + mySource.limit()); 2133 2134 if (result.isError()) { 2135 errln("Test small output buffers while encoding " + result.toString()); 2136 } 2137 if (mySource.position() == mySource.limit()) { 2138 result = encoder.encode(mySource, myTarget, true); 2139 if (result.isError()) { 2140 errln("Test small output buffers while encoding " + result.toString()); 2141 } 2142 2143 myTarget.limit(myTarget.capacity()); 2144 result = encoder.flush(myTarget); 2145 if (result.isError()) { 2146 errln("Test small output buffers while encoding " + result.toString()); 2147 } 2148 break; 2149 } 2150 } 2151 if (!equals(myTarget, target, targetLimit)) { 2152 errln("Test small output buffers " + encoding + " From Unicode failed."); 2153 } 2154 logln("Tests on small output buffers for " + encoding + " passed"); 2155 } 2156 } 2157 2158 2159 //TODO 2160 /* 2161 @Test 2162 public void TestString(ByteBuffer bSource, CharBuffer uSource) throws Exception { 2163 try { 2164 { 2165 String source = uSource.toString(); 2166 byte[] target = source.getBytes(m_encoding); 2167 if (!equals(target, bSource.array())) { 2168 errln("encode using string API failed"); 2169 } 2170 } 2171 { 2172 2173 String target = new String(bSource.array(), m_encoding); 2174 if (!equals(uSource, target.toCharArray())) { 2175 errln("decode using string API failed"); 2176 } 2177 } 2178 } catch (Exception e) { 2179 //e.printStackTrace(); 2180 errln(e.getMessage()); 2181 } 2182 } 2183 2184 /*private void fromUnicodeTest() throws Exception { 2185 2186 logln("Loaded Charset: " + charset.getClass().toString()); 2187 logln("Loaded CharsetEncoder: " + encoder.getClass().toString()); 2188 logln("Loaded CharsetDecoder: " + decoder.getClass().toString()); 2189 2190 ByteBuffer myTarget = ByteBuffer.allocate(gbSource.length); 2191 logln("Created ByteBuffer of length: " + uSource.length); 2192 CharBuffer mySource = CharBuffer.wrap(uSource); 2193 logln("Wrapped ByteBuffer with CharBuffer "); 2194 encoder.reset(); 2195 logln("Test Unicode to " + encoding ); 2196 encoder.encode(mySource, myTarget, true); 2197 if (!equals(myTarget, gbSource)) { 2198 errln("--Test Unicode to " + encoding + ": FAILED"); 2199 } 2200 logln("Test Unicode to " + encoding +" passed"); 2201 } 2202 2203 @Test 2204 public void TestToUnicode( ) throws Exception { 2205 2206 logln("Loaded Charset: " + charset.getClass().toString()); 2207 logln("Loaded CharsetEncoder: " + encoder.getClass().toString()); 2208 logln("Loaded CharsetDecoder: " + decoder.getClass().toString()); 2209 2210 CharBuffer myTarget = CharBuffer.allocate(uSource.length); 2211 ByteBuffer mySource = ByteBuffer.wrap(getByteArray(gbSource)); 2212 decoder.reset(); 2213 CoderResult result = decoder.decode(mySource, myTarget, true); 2214 if (result.isError()) { 2215 errln("Test ToUnicode -- FAILED"); 2216 } 2217 if (!equals(myTarget, uSource)) { 2218 errln("--Test " + encoding + " to Unicode :FAILED"); 2219 } 2220 } 2221 2222 public static byte[] getByteArray(char[] source) { 2223 byte[] target = new byte[source.length]; 2224 int i = source.length; 2225 for (; --i >= 0;) { 2226 target[i] = (byte) source[i]; 2227 } 2228 return target; 2229 } 2230 /* 2231 private void smBufCharset(Charset charset) { 2232 try { 2233 ByteBuffer bTarget = charset.encode(CharBuffer.wrap(uSource)); 2234 CharBuffer uTarget = 2235 charset.decode(ByteBuffer.wrap(getByteArray(gbSource))); 2236 2237 if (!equals(uTarget, uSource)) { 2238 errln("Test " + charset.toString() + " to Unicode :FAILED"); 2239 } 2240 if (!equals(bTarget, gbSource)) { 2241 errln("Test " + charset.toString() + " from Unicode :FAILED"); 2242 } 2243 } catch (Exception ex) { 2244 errln("Encountered exception in smBufCharset"); 2245 } 2246 } 2247 2248 @Test 2249 public void TestMultithreaded() throws Exception { 2250 final Charset cs = Charset.forName(encoding); 2251 if (cs == charset) { 2252 errln("The objects are equal"); 2253 } 2254 smBufCharset(cs); 2255 try { 2256 final Thread t1 = new Thread() { 2257 public void run() { 2258 // commented out since the mehtods on 2259 // Charset API are supposed to be thread 2260 // safe ... to test it we dont sync 2261 2262 // synchronized(charset){ 2263 while (!interrupted()) { 2264 try { 2265 smBufCharset(cs); 2266 } catch (UnsupportedCharsetException ueEx) { 2267 errln(ueEx.toString()); 2268 } 2269 } 2270 2271 // } 2272 } 2273 }; 2274 final Thread t2 = new Thread() { 2275 public void run() { 2276 // synchronized(charset){ 2277 while (!interrupted()) { 2278 try { 2279 smBufCharset(cs); 2280 } catch (UnsupportedCharsetException ueEx) { 2281 errln(ueEx.toString()); 2282 } 2283 } 2284 2285 //} 2286 } 2287 }; 2288 t1.start(); 2289 t2.start(); 2290 int i = 0; 2291 for (;;) { 2292 if (i > 1000000000) { 2293 try { 2294 t1.interrupt(); 2295 } catch (Exception e) { 2296 } 2297 try { 2298 t2.interrupt(); 2299 } catch (Exception e) { 2300 } 2301 break; 2302 } 2303 i++; 2304 } 2305 } catch (Exception e) { 2306 throw e; 2307 } 2308 } 2309 2310 @Test 2311 public void TestSynchronizedMultithreaded() throws Exception { 2312 // Methods on CharsetDecoder and CharsetEncoder classes 2313 // are inherently unsafe if accessed by multiple concurrent 2314 // thread so we synchronize them 2315 final Charset charset = Charset.forName(encoding); 2316 final CharsetDecoder decoder = charset.newDecoder(); 2317 final CharsetEncoder encoder = charset.newEncoder(); 2318 try { 2319 final Thread t1 = new Thread() { 2320 public void run() { 2321 while (!interrupted()) { 2322 try { 2323 synchronized (encoder) { 2324 smBufEncode(encoder, encoding); 2325 } 2326 synchronized (decoder) { 2327 smBufDecode(decoder, encoding); 2328 } 2329 } catch (UnsupportedCharsetException ueEx) { 2330 errln(ueEx.toString()); 2331 } 2332 } 2333 2334 } 2335 }; 2336 final Thread t2 = new Thread() { 2337 public void run() { 2338 while (!interrupted()) { 2339 try { 2340 synchronized (encoder) { 2341 smBufEncode(encoder, encoding); 2342 } 2343 synchronized (decoder) { 2344 smBufDecode(decoder, encoding); 2345 } 2346 } catch (UnsupportedCharsetException ueEx) { 2347 errln(ueEx.toString()); 2348 } 2349 } 2350 } 2351 }; 2352 t1.start(); 2353 t2.start(); 2354 int i = 0; 2355 for (;;) { 2356 if (i > 1000000000) { 2357 try { 2358 t1.interrupt(); 2359 } catch (Exception e) { 2360 } 2361 try { 2362 t2.interrupt(); 2363 } catch (Exception e) { 2364 } 2365 break; 2366 } 2367 i++; 2368 } 2369 } catch (Exception e) { 2370 throw e; 2371 } 2372 } 2373 */ 2374 2375 @Test TestMBCS()2376 public void TestMBCS(){ 2377 { 2378 // Encoder: from Unicode conversion 2379 CharsetEncoder encoderICU = new CharsetProviderICU().charsetForName("ibm-971").newEncoder(); 2380 ByteBuffer out = ByteBuffer.allocate(6); 2381 encoderICU.onUnmappableCharacter(CodingErrorAction.REPLACE); 2382 CoderResult result = encoderICU.encode(CharBuffer.wrap("\u0131\u0061\u00a1"), out, true); 2383 if(!result.isError()){ 2384 byte[] expected = {(byte)0xA9, (byte)0xA5, (byte)0xAF, (byte)0xFE, (byte)0xA2, (byte)0xAE}; 2385 if(!equals(expected, out.array())){ 2386 // Android patch: Skip tests that fail with customized data. 2387 logln("Did not get the expected result for substitution bytes. Got: "+ 2388 hex(out.array())); 2389 // Android patch end. 2390 } 2391 logln("Output: "+ hex(out.array())); 2392 }else{ 2393 errln("Encode operation failed for encoder: "+encoderICU.toString()); 2394 } 2395 } 2396 { 2397 // Decoder: to Unicode conversion 2398 CharsetDecoder decoderICU = new CharsetProviderICU().charsetForName("ibm-971").newDecoder(); 2399 CharBuffer out = CharBuffer.allocate(3); 2400 decoderICU.onMalformedInput(CodingErrorAction.REPLACE); 2401 CoderResult result = decoderICU.decode(ByteBuffer.wrap(new byte[] { (byte)0xA2, (byte)0xAE, (byte)0x12, (byte)0x34, (byte)0xEF, (byte)0xDC }), out, true); 2402 if(!result.isError()){ 2403 char[] expected = {'\u00a1', '\ufffd', '\u6676'}; 2404 if(!equals(expected, out.array())){ 2405 errln("Did not get the expected result for substitution chars. Got: "+ 2406 hex(out.array())); 2407 } 2408 logln("Output: "+ hex(out.array())); 2409 }else{ 2410 errln("Decode operation failed for encoder: "+decoderICU.toString()); 2411 } 2412 } 2413 } 2414 2415 @Test TestJB4897()2416 public void TestJB4897(){ 2417 CharsetProviderICU provider = new CharsetProviderICU(); 2418 Charset charset = provider.charsetForName("x-abracadabra"); 2419 if(charset!=null && charset.canEncode()== true){ 2420 errln("provider.charsetForName() does not validate the charset names" ); 2421 } 2422 } 2423 2424 @Test TestJB5027()2425 public void TestJB5027() { 2426 CharsetProviderICU provider= new CharsetProviderICU(); 2427 2428 Charset fake = provider.charsetForName("doesNotExist"); 2429 if(fake != null){ 2430 errln("\"doesNotExist\" returned " + fake); 2431 } 2432 Charset xfake = provider.charsetForName("x-doesNotExist"); 2433 if(xfake!=null){ 2434 errln("\"x-doesNotExist\" returned " + xfake); 2435 } 2436 } 2437 2438 //test to make sure that number of aliases and canonical names are in the charsets that are in 2439 @Test TestAllNames()2440 public void TestAllNames() { 2441 2442 CharsetProviderICU provider= new CharsetProviderICU(); 2443 Object[] available = CharsetProviderICU.getAvailableNames(); 2444 for(int i=0; i<available.length;i++){ 2445 try{ 2446 String canon = CharsetProviderICU.getICUCanonicalName((String)available[i]); 2447 2448 // ',' is not allowed by Java's charset name checker 2449 if(canon.indexOf(',')>=0){ 2450 continue; 2451 } 2452 Charset cs = provider.charsetForName((String)available[i]); 2453 2454 Object[] javaAliases = cs.aliases().toArray(); 2455 //seach for ICU canonical name in javaAliases 2456 boolean inAliasList = false; 2457 for(int j=0; j<javaAliases.length; j++){ 2458 String java = (String) javaAliases[j]; 2459 if(java.equals(canon)){ 2460 logln("javaAlias: " + java + " canon: " + canon); 2461 inAliasList = true; 2462 } 2463 } 2464 if(inAliasList == false){ 2465 errln("Could not find ICU canonical name: "+canon+ " for java canonical name: "+ available[i]+ " "+ i); 2466 } 2467 }catch(UnsupportedCharsetException ex){ 2468 errln("could no load charset "+ available[i]+" "+ex.getMessage()); 2469 continue; 2470 } 2471 } 2472 } 2473 2474 @Test TestDecoderImplFlush()2475 public void TestDecoderImplFlush() { 2476 CharsetProviderICU provider = new CharsetProviderICU(); 2477 Charset ics = provider.charsetForName("UTF-16"); 2478 Charset jcs = Charset.forName("UTF-16"); // Java's UTF-16 charset 2479 execDecoder(jcs); 2480 execDecoder(ics); 2481 } 2482 2483 @Test TestEncoderImplFlush()2484 public void TestEncoderImplFlush() { 2485 CharsetProviderICU provider = new CharsetProviderICU(); 2486 Charset ics = provider.charsetForName("UTF-16"); 2487 Charset jcs = Charset.forName("UTF-16"); // Java's UTF-16 charset 2488 execEncoder(jcs); 2489 execEncoder(ics); 2490 } execDecoder(Charset cs)2491 private void execDecoder(Charset cs){ 2492 CharsetDecoder decoder = cs.newDecoder(); 2493 decoder.onMalformedInput(CodingErrorAction.REPORT); 2494 decoder.onUnmappableCharacter(CodingErrorAction.REPORT); 2495 CharBuffer out = CharBuffer.allocate(10); 2496 CoderResult result = decoder.decode(ByteBuffer.wrap(new byte[] { -1, 2497 -2, 32, 0, 98 }), out, false); 2498 result = decoder.decode(ByteBuffer.wrap(new byte[] { 98 }), out, true); 2499 2500 logln(cs.getClass().toString()+ ":" +result.toString()); 2501 try { 2502 result = decoder.flush(out); 2503 logln(cs.getClass().toString()+ ":" +result.toString()); 2504 } catch (Exception e) { 2505 errln(e.getMessage()+" "+cs.getClass().toString()); 2506 } 2507 } execEncoder(Charset cs)2508 private void execEncoder(Charset cs){ 2509 CharsetEncoder encoder = cs.newEncoder(); 2510 encoder.onMalformedInput(CodingErrorAction.REPORT); 2511 encoder.onUnmappableCharacter(CodingErrorAction.REPORT); 2512 ByteBuffer out = ByteBuffer.allocate(10); 2513 CoderResult result = encoder.encode(CharBuffer.wrap(new char[] { '\uFFFF', 2514 '\u2345', 32, 98 }), out, false); 2515 logln(cs.getClass().toString()+ ":" +result.toString()); 2516 result = encoder.encode(CharBuffer.wrap(new char[] { 98 }), out, true); 2517 2518 logln(cs.getClass().toString()+ ":" +result.toString()); 2519 try { 2520 result = encoder.flush(out); 2521 logln(cs.getClass().toString()+ ":" +result.toString()); 2522 } catch (Exception e) { 2523 errln(e.getMessage()+" "+cs.getClass().toString()); 2524 } 2525 } 2526 2527 @Test TestDecodeMalformed()2528 public void TestDecodeMalformed() { 2529 CharsetProviderICU provider = new CharsetProviderICU(); 2530 Charset ics = provider.charsetForName("UTF-16BE"); 2531 //Use SUN's charset 2532 Charset jcs = Charset.forName("UTF-16"); 2533 CoderResult ir = execMalformed(ics); 2534 CoderResult jr = execMalformed(jcs); 2535 if(ir!=jr){ 2536 errln("ICU's decoder did not return the same result as Sun. ICU: "+ir.toString()+" Sun: "+jr.toString()); 2537 } 2538 } 2539 execMalformed(Charset cs)2540 private CoderResult execMalformed(Charset cs){ 2541 CharsetDecoder decoder = cs.newDecoder(); 2542 decoder.onMalformedInput(CodingErrorAction.IGNORE); 2543 decoder.onUnmappableCharacter(CodingErrorAction.REPORT); 2544 ByteBuffer in = ByteBuffer.wrap(new byte[] { 0x00, 0x41, 0x00, 0x42, 0x01 }); 2545 CharBuffer out = CharBuffer.allocate(3); 2546 return decoder.decode(in, out, true); 2547 } 2548 2549 @Test TestJavaUTF16Decoder()2550 public void TestJavaUTF16Decoder(){ 2551 CharsetProviderICU provider = new CharsetProviderICU(); 2552 Charset ics = provider.charsetForName("UTF-16BE"); 2553 //Use SUN's charset 2554 Charset jcs = Charset.forName("UTF-16"); 2555 Exception ie = execConvertAll(ics); 2556 Exception je = execConvertAll(jcs); 2557 if(ie!=je){ 2558 errln("ICU's decoder did not return the same result as Sun. ICU: "+ie.toString()+" Sun: "+je.toString()); 2559 } 2560 } execConvertAll(Charset cs)2561 private Exception execConvertAll(Charset cs){ 2562 ByteBuffer in = ByteBuffer.allocate(400); 2563 int i=0; 2564 while(in.position()!=in.capacity()){ 2565 in.put((byte)0xD8); 2566 in.put((byte)i); 2567 in.put((byte)0xDC); 2568 in.put((byte)i); 2569 i++; 2570 } 2571 in.limit(in.position()); 2572 in.position(0); 2573 CharsetDecoder decoder = cs.newDecoder(); 2574 decoder.onMalformedInput(CodingErrorAction.IGNORE); 2575 decoder.onUnmappableCharacter(CodingErrorAction.REPORT); 2576 try{ 2577 CharBuffer out = decoder.decode(in); 2578 if(out!=null){ 2579 logln(cs.toString()+" encoing succeeded as expected!"); 2580 } 2581 }catch ( Exception ex){ 2582 errln("Did not get expected exception for encoding: "+cs.toString()); 2583 return ex; 2584 } 2585 return null; 2586 } 2587 2588 @Test TestUTF32BOM()2589 public void TestUTF32BOM(){ 2590 2591 Charset cs = (new CharsetProviderICU()).charsetForName("UTF-32"); 2592 char[] in = new char[] { 0xd800, 0xdc00, 2593 0xd801, 0xdc01, 2594 0xdbff, 0xdfff, 2595 0xd900, 0xdd00, 2596 0x0000, 0x0041, 2597 0x0000, 0x0042, 2598 0x0000, 0x0043}; 2599 2600 CharBuffer inBuf = CharBuffer.allocate(in.length); 2601 inBuf.put(in); 2602 CharsetEncoder encoder = cs.newEncoder(); 2603 ByteBuffer outBuf = ByteBuffer.allocate(in.length*4+4); 2604 inBuf.rewind(); 2605 encoder.encode(inBuf, outBuf, true); 2606 outBuf.rewind(); 2607 if(outBuf.get(0)!= (byte)0x00 && outBuf.get(1)!= (byte)0x00 && 2608 outBuf.get(2)!= (byte)0xFF && outBuf.get(3)!= (byte)0xFE){ 2609 errln("The UTF32 encoder did not appended bom. Length returned: " + outBuf.remaining()); 2610 } 2611 while(outBuf.hasRemaining()){ 2612 logln("0x"+hex(outBuf.get())); 2613 } 2614 CharsetDecoder decoder = cs.newDecoder(); 2615 outBuf.limit(outBuf.position()); 2616 outBuf.rewind(); 2617 CharBuffer rt = CharBuffer.allocate(in.length); 2618 CoderResult cr = decoder.decode(outBuf, rt, true); 2619 if(cr.isError()){ 2620 errln("Decoding with BOM failed. Error: "+ cr.toString()); 2621 } 2622 equals(rt, in); 2623 try{ 2624 rt.clear(); 2625 outBuf.rewind(); 2626 Charset utf16 = Charset.forName("UTF-32"); 2627 CharsetDecoder dc = utf16.newDecoder(); 2628 cr = dc.decode(outBuf, rt, true); 2629 equals(rt, in); 2630 }catch(UnsupportedCharsetException ex){ 2631 // swallow the expection. 2632 } 2633 } 2634 2635 /* 2636 * Michael Ow 2637 * Modified 070424 2638 */ 2639 /*The following two methods provides the option of exceptions when Decoding 2640 * and Encoding if needed for testing purposes. 2641 */ smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target)2642 private void smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target) { 2643 smBufDecode(decoder, encoding, source, target, true); 2644 } smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, boolean backedByArray)2645 private void smBufDecode(CharsetDecoder decoder, String encoding, ByteBuffer source, CharBuffer target, boolean backedByArray) { 2646 try { 2647 smBufDecode(decoder, encoding, source, target, false, false, backedByArray); 2648 } 2649 catch (Exception ex) { 2650 System.out.println("!exception!"); 2651 } 2652 } smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target)2653 private void smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target) { 2654 smBufEncode(encoder, encoding, source, target, true); 2655 } smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, boolean backedByArray)2656 private void smBufEncode(CharsetEncoder encoder, String encoding, CharBuffer source, ByteBuffer target, boolean backedByArray) { 2657 try { 2658 smBufEncode(encoder, encoding, source, target, false, false); 2659 } 2660 catch (Exception ex) { 2661 System.out.println("!exception!"); 2662 } 2663 } 2664 2665 //Test CharsetICUProvider 2666 @Test TestNullCanonicalName()2667 public void TestNullCanonicalName() { 2668 String enc = null; 2669 String canonicalName = CharsetProviderICU.getICUCanonicalName(enc); 2670 2671 if (canonicalName != null) { 2672 errln("getICUCanonicalName return a non-null string for given null string"); 2673 } 2674 } 2675 2676 @Test TestGetAllNames()2677 public void TestGetAllNames() { 2678 String[] names = null; 2679 2680 names = CharsetProviderICU.getAllNames(); 2681 2682 if (names == null) { 2683 errln("getAllNames returned a null string."); 2684 } 2685 } 2686 2687 //Test CharsetICU 2688 @Test TestCharsetContains()2689 public void TestCharsetContains() { 2690 boolean test; 2691 2692 CharsetProvider provider = new CharsetProviderICU(); 2693 Charset cs1 = provider.charsetForName("UTF-32"); 2694 Charset cs2 = null; 2695 2696 test = cs1.contains(cs2); 2697 2698 if (test != false) { 2699 errln("Charset.contains returned true for a null charset."); 2700 } 2701 2702 cs2 = CharsetICU.forNameICU("UTF-32"); 2703 2704 test = cs1.contains(cs2); 2705 2706 if (test != true) { 2707 errln("Charset.contains returned false for an identical charset."); 2708 } 2709 2710 cs2 = provider.charsetForName("UTF-8"); 2711 2712 test = cs1.contains(cs2); 2713 2714 if (test != false) { 2715 errln("Charset.contains returned true for a different charset."); 2716 } 2717 } 2718 2719 @Test TestCharsetICUNullCharsetName()2720 public void TestCharsetICUNullCharsetName() { 2721 String charsetName = null; 2722 2723 try { 2724 CharsetICU.forNameICU(charsetName); 2725 errln("CharsetICU.forName should have thown an exception after getting a null charsetName."); 2726 } 2727 catch(Exception ex) { 2728 } 2729 } 2730 2731 //Test CharsetASCII 2732 @Test TestCharsetASCIIOverFlow()2733 public void TestCharsetASCIIOverFlow() { 2734 int byteBufferLimit; 2735 int charBufferLimit; 2736 2737 CharsetProvider provider = new CharsetProviderICU(); 2738 Charset cs = provider.charsetForName("ASCII"); 2739 CharsetEncoder encoder = cs.newEncoder(); 2740 CharsetDecoder decoder = cs.newDecoder(); 2741 2742 CharBuffer charBuffer = CharBuffer.allocate(0x90); 2743 ByteBuffer byteBuffer = ByteBuffer.allocate(0x90); 2744 2745 CharBuffer charBufferTest = CharBuffer.allocate(0xb0); 2746 ByteBuffer byteBufferTest = ByteBuffer.allocate(0xb0); 2747 2748 for(int j=0;j<=0x7f; j++){ 2749 charBuffer.put((char)j); 2750 byteBuffer.put((byte)j); 2751 } 2752 2753 byteBuffer.limit(byteBufferLimit = byteBuffer.position()); 2754 byteBuffer.position(0); 2755 charBuffer.limit(charBufferLimit = charBuffer.position()); 2756 charBuffer.position(0); 2757 2758 //test for overflow 2759 byteBufferTest.limit(byteBufferLimit - 5); 2760 byteBufferTest.position(0); 2761 charBufferTest.limit(charBufferLimit - 5); 2762 charBufferTest.position(0); 2763 try { 2764 smBufDecode(decoder, "ASCII", byteBuffer, charBufferTest, true, false); 2765 errln("Overflow exception while decoding ASCII should have been thrown."); 2766 } 2767 catch(Exception ex) { 2768 } 2769 try { 2770 smBufEncode(encoder, "ASCII", charBuffer, byteBufferTest, true, false); 2771 errln("Overflow exception while encoding ASCII should have been thrown."); 2772 } 2773 catch (Exception ex) { 2774 } 2775 2776 // For better code coverage 2777 /* For better code coverage */ 2778 byte byteout[] = { 2779 (byte)0x01 2780 }; 2781 char charin[] = { 2782 (char)0x0001, (char)0x0002 2783 }; 2784 ByteBuffer bb = ByteBuffer.wrap(byteout); 2785 CharBuffer cb = CharBuffer.wrap(charin); 2786 // Cast up to CharSequence to insulate against the CharBuffer.subSequence() return type change 2787 // which makes code compiled for a newer JDK not run on an older one. 2788 CharBuffer cb2 = CharBuffer.wrap(((CharSequence)cb).subSequence(0, 2)); 2789 encoder.reset(); 2790 if (!(encoder.encode(cb2, bb, true)).isOverflow()) { 2791 errln("Overflow error while encoding ASCII should have occurred."); 2792 } 2793 } 2794 2795 //Test CharsetUTF7 2796 @Test TestCharsetUTF7()2797 public void TestCharsetUTF7() { 2798 CoderResult result = CoderResult.UNDERFLOW; 2799 CharsetProvider provider = new CharsetProviderICU(); 2800 Charset cs = provider.charsetForName("UTF-7"); 2801 CharsetEncoder encoder = cs.newEncoder(); 2802 CharsetDecoder decoder = cs.newDecoder(); 2803 2804 CharBuffer us = CharBuffer.allocate(0x100); 2805 ByteBuffer bs = ByteBuffer.allocate(0x100); 2806 2807 /* Unicode : A<not equal to Alpha Lamda>. */ 2808 /* UTF7: AImIDkQ. */ 2809 us.put((char)0x41); us.put((char)0x2262); us.put((char)0x391); us.put((char)0x39B); us.put((char)0x2e); 2810 bs.put((byte)0x41); bs.put((byte)0x2b); bs.put((byte)0x49); bs.put((byte)0x6d); 2811 bs.put((byte)0x49); bs.put((byte)0x44); bs.put((byte)0x6b); bs.put((byte)0x51); 2812 bs.put((byte)0x4f); bs.put((byte)0x62); bs.put((byte)0x2e); 2813 2814 bs.limit(bs.position()); 2815 bs.position(0); 2816 us.limit(us.position()); 2817 us.position(0); 2818 2819 smBufDecode(decoder, "UTF-7", bs, us); 2820 smBufEncode(encoder, "UTF-7", us, bs); 2821 2822 /* Testing UTF-7 toUnicode with substitute callbacks */ 2823 { 2824 byte [] bytesTestErrorConsumption = { 2825 /* a~ a+AB~ a+AB\x0c a+AB- a+AB. a+. */ 2826 0x61, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x0c, 0x61, 0x2b, 0x41, 0x42, 0x2d, 0x61, 0x2b, 0x41, 0x42, 0x2e, 0x61, 0x2b, 0x2e 2827 2828 }; 2829 char [] unicodeTestErrorConsumption = { 2830 0x61, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x2e, 0x61, 0xfffd, 0x2e 2831 }; 2832 bs = ByteBuffer.wrap(bytesTestErrorConsumption); 2833 us = CharBuffer.wrap(unicodeTestErrorConsumption); 2834 2835 CodingErrorAction savedMal = decoder.malformedInputAction(); 2836 CodingErrorAction savedUMap = decoder.unmappableCharacterAction(); 2837 decoder.onMalformedInput(CodingErrorAction.REPLACE); 2838 decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 2839 smBufDecode(decoder, "UTF-7 DE Error Consumption", bs, us); 2840 decoder.onMalformedInput(savedMal); 2841 decoder.onUnmappableCharacter(savedUMap); 2842 } 2843 /* ticket 6151 */ 2844 CharBuffer smallus = CharBuffer.allocate(1); 2845 ByteBuffer bigbs = ByteBuffer.allocate(3); 2846 bigbs.put((byte)0x41); bigbs.put((byte)0x41); bigbs.put((byte)0x41); 2847 bigbs.position(0); 2848 try { 2849 smBufDecode(decoder, "UTF-7-DE-Overflow", bigbs, smallus, true, false); 2850 errln("Buffer Overflow exception should have been thrown while decoding UTF-7."); 2851 } catch (Exception ex) { 2852 } 2853 2854 //The rest of the code in this method is to provide better code coverage 2855 CharBuffer ccus = CharBuffer.allocate(0x10); 2856 ByteBuffer ccbs = ByteBuffer.allocate(0x10); 2857 2858 //start of charset decoder code coverage code 2859 //test for accurate illegal and control character checking 2860 ccbs.put((byte)0x0D); ccbs.put((byte)0x05); 2861 ccus.put((char)0x0000); 2862 2863 ccbs.limit(ccbs.position()); 2864 ccbs.position(0); 2865 ccus.limit(ccus.position()); 2866 ccus.position(0); 2867 2868 try { 2869 smBufDecode(decoder, "UTF-7-CC-DE-1", ccbs, ccus, true, false); 2870 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2871 } 2872 catch (Exception ex) { 2873 } 2874 2875 ccbs.clear(); 2876 ccus.clear(); 2877 2878 //test for illegal base64 character 2879 ccbs.put((byte)0x2b); ccbs.put((byte)0xff); 2880 ccus.put((char)0x0000); 2881 2882 ccbs.limit(ccbs.position()); 2883 ccbs.position(0); 2884 ccus.limit(ccus.position()); 2885 ccus.position(0); 2886 2887 try { 2888 smBufDecode(decoder, "UTF-7-CC-DE-2", ccbs, ccus, true, false); 2889 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2890 } 2891 catch (Exception ex) { 2892 } 2893 2894 ccbs.clear(); 2895 ccus.clear(); 2896 2897 //test for illegal order of the base64 character sequence 2898 ccbs.put((byte)0x2b); ccbs.put((byte)0x2d); ccbs.put((byte)0x2b); ccbs.put((byte)0x49); ccbs.put((byte)0x2d); 2899 ccus.put((char)0x0000); ccus.put((char)0x0000); 2900 2901 ccbs.limit(ccbs.position()); 2902 ccbs.position(0); 2903 ccus.limit(ccus.position()); 2904 ccus.position(0); 2905 2906 try { 2907 smBufDecode(decoder, "UTF-7-CC-DE-3", ccbs, ccus, true, false); 2908 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2909 } 2910 catch (Exception ex) { 2911 } 2912 2913 ccbs.clear(); 2914 ccus.clear(); 2915 2916 //test for illegal order of the base64 character sequence 2917 ccbs.put((byte)0x2b); ccbs.put((byte)0x0a); ccbs.put((byte)0x09); 2918 ccus.put((char)0x0000); 2919 2920 ccbs.limit(ccbs.position()); 2921 ccbs.position(0); 2922 ccus.limit(ccus.position()); 2923 ccus.position(0); 2924 2925 try { 2926 smBufDecode(decoder, "UTF-7-CC-DE-4", ccbs, ccus, true, false); 2927 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2928 } 2929 catch (Exception ex) { 2930 } 2931 2932 ccbs.clear(); 2933 ccus.clear(); 2934 2935 //test for illegal order of the base64 character sequence 2936 ccbs.put((byte)0x2b); ccbs.put((byte)0x49); ccbs.put((byte)0x0a); 2937 ccus.put((char)0x0000); 2938 2939 ccbs.limit(ccbs.position()); 2940 ccbs.position(0); 2941 ccus.limit(ccus.position()); 2942 ccus.position(0); 2943 2944 try { 2945 smBufDecode(decoder, "UTF-7-CC-DE-5", ccbs, ccus, true, false); 2946 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2947 } 2948 catch (Exception ex) { 2949 } 2950 2951 ccbs.clear(); 2952 ccus.clear(); 2953 2954 //test for illegal order of the base64 character sequence 2955 ccbs.put((byte)0x2b); ccbs.put((byte)0x00); 2956 ccus.put((char)0x0000); 2957 2958 ccbs.limit(ccbs.position()); 2959 ccbs.position(0); 2960 ccus.limit(ccus.position()); 2961 ccus.position(0); 2962 2963 try { 2964 smBufDecode(decoder, "UTF-7-CC-DE-6", ccbs, ccus, true, false); 2965 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2966 } 2967 catch (Exception ex) { 2968 } 2969 2970 ccbs.clear(); 2971 ccus.clear(); 2972 2973 //test for overflow buffer error 2974 ccbs.put((byte)0x2b); ccbs.put((byte)0x49); 2975 2976 ccbs.limit(ccbs.position()); 2977 ccbs.position(0); 2978 ccus.limit(0); 2979 ccus.position(0); 2980 2981 try { 2982 smBufDecode(decoder, "UTF-7-CC-DE-7", ccbs, ccus, true, false); 2983 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 2984 } 2985 catch (Exception ex) { 2986 } 2987 2988 ccbs.clear(); 2989 ccus.clear(); 2990 2991 //test for overflow buffer error 2992 ccbs.put((byte)0x0c); ccbs.put((byte)0x0c); 2993 2994 ccbs.limit(ccbs.position()); 2995 ccbs.position(0); 2996 ccus.limit(0); 2997 ccus.position(0); 2998 2999 try { 3000 smBufDecode(decoder, "UTF-7-CC-DE-8", ccbs, ccus, true, false); 3001 errln("Exception while decoding UTF-7 code coverage test should have been thrown."); 3002 } 3003 catch (Exception ex) { 3004 } 3005 //end of charset decoder code coverage code 3006 3007 //start of charset encoder code coverage code 3008 ccbs.clear(); 3009 ccus.clear(); 3010 //test for overflow buffer error 3011 ccus.put((char)0x002b); 3012 ccbs.put((byte)0x2b); 3013 3014 ccbs.limit(ccbs.position()); 3015 ccbs.position(0); 3016 ccus.limit(ccus.position()); 3017 ccus.position(0); 3018 3019 try { 3020 smBufEncode(encoder, "UTF-7-CC-EN-1", ccus, ccbs, true, false); 3021 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3022 } 3023 catch (Exception ex) { 3024 } 3025 3026 ccbs.clear(); 3027 ccus.clear(); 3028 3029 //test for overflow buffer error 3030 ccus.put((char)0x002b); ccus.put((char)0x2262); 3031 ccbs.put((byte)0x2b); ccbs.put((byte)0x2d); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3032 3033 ccbs.limit(ccbs.position()); 3034 ccbs.position(0); 3035 ccus.limit(ccus.position()); 3036 ccus.position(0); 3037 3038 try { 3039 smBufEncode(encoder, "UTF-7-CC-EN-2", ccus, ccbs, true, false); 3040 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3041 } 3042 catch (Exception ex) { 3043 } 3044 3045 ccbs.clear(); 3046 ccus.clear(); 3047 3048 //test for overflow buffer error 3049 ccus.put((char)0x2262); ccus.put((char)0x0049); 3050 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3051 ccbs.limit(ccbs.position()); 3052 ccbs.position(0); 3053 ccus.limit(ccus.position()); 3054 ccus.position(0); 3055 3056 try { 3057 smBufEncode(encoder, "UTF-7-CC-EN-3", ccus, ccbs, true, false); 3058 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3059 } 3060 catch (Exception ex) { 3061 } 3062 3063 ccbs.clear(); 3064 ccus.clear(); 3065 3066 //test for overflow buffer error 3067 ccus.put((char)0x2262); ccus.put((char)0x0395); 3068 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3069 ccbs.limit(ccbs.position()); 3070 ccbs.position(0); 3071 ccus.limit(ccus.position()); 3072 ccus.position(0); 3073 3074 try { 3075 smBufEncode(encoder, "UTF-7-CC-EN-4", ccus, ccbs, true, false); 3076 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3077 } 3078 catch (Exception ex) { 3079 } 3080 3081 ccbs.clear(); 3082 ccus.clear(); 3083 3084 //test for overflow buffer error 3085 ccus.put((char)0x2262); ccus.put((char)0x0395); 3086 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3087 ccbs.limit(ccbs.position()); 3088 ccbs.position(0); 3089 ccus.limit(ccus.position()); 3090 ccus.position(0); 3091 3092 try { 3093 smBufEncode(encoder, "UTF-7-CC-EN-5", ccus, ccbs, true, false); 3094 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3095 } 3096 catch (Exception ex) { 3097 } 3098 3099 ccbs.clear(); 3100 ccus.clear(); 3101 3102 //test for overflow buffer error 3103 ccus.put((char)0x2262); ccus.put((char)0x0395); ccus.put((char)0x0391); 3104 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3105 ccbs.limit(ccbs.position()); 3106 ccbs.position(0); 3107 ccus.limit(ccus.position()); 3108 ccus.position(0); 3109 3110 try { 3111 smBufEncode(encoder, "UTF-7-CC-EN-6", ccus, ccbs, true, false); 3112 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3113 } 3114 catch (Exception ex) { 3115 } 3116 3117 ccbs.clear(); 3118 ccus.clear(); 3119 3120 //test for overflow buffer error 3121 ccus.put((char)0x2262); ccus.put((char)0x0395); ccus.put((char)0x0391); 3122 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3123 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3124 ccbs.limit(ccbs.position()); 3125 ccbs.position(0); 3126 ccus.limit(ccus.position()); 3127 ccus.position(0); 3128 3129 try { 3130 smBufEncode(encoder, "UTF-7-CC-EN-7", ccus, ccbs, true, false); 3131 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3132 } 3133 catch (Exception ex) { 3134 } 3135 3136 ccbs.clear(); 3137 ccus.clear(); 3138 3139 //test for overflow buffer error 3140 ccus.put((char)0x0049); ccus.put((char)0x0048); 3141 ccbs.put((byte)0x00); 3142 ccbs.limit(ccbs.position()); 3143 ccbs.position(0); 3144 ccus.limit(ccus.position()); 3145 ccus.position(0); 3146 3147 try { 3148 smBufEncode(encoder, "UTF-7-CC-EN-8", ccus, ccbs, true, false); 3149 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3150 } 3151 catch (Exception ex) { 3152 } 3153 3154 ccbs.clear(); 3155 ccus.clear(); 3156 3157 //test for overflow buffer error 3158 ccus.put((char)0x2262); 3159 ccbs.put((byte)0x00); 3160 ccbs.limit(ccbs.position()); 3161 ccbs.position(0); 3162 ccus.limit(ccus.position()); 3163 ccus.position(0); 3164 3165 try { 3166 smBufEncode(encoder, "UTF-7-CC-EN-9", ccus, ccbs, true, false); 3167 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3168 } 3169 catch (Exception ex) { 3170 } 3171 3172 ccbs.clear(); 3173 ccus.clear(); 3174 3175 //test for overflow buffer error 3176 ccus.put((char)0x2262); ccus.put((char)0x0049); 3177 ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3178 ccbs.limit(ccbs.position()); 3179 ccbs.position(0); 3180 ccus.limit(ccus.position()); 3181 ccus.position(0); 3182 3183 try { 3184 smBufEncode(encoder, "UTF-7-CC-EN-10", ccus, ccbs, true, false); 3185 errln("Exception while encoding UTF-7 code coverage test should have been thrown."); 3186 } 3187 catch (Exception ex) { 3188 } 3189 3190 ccbs.clear(); 3191 ccus.clear(); 3192 3193 //test for overflow buffer error 3194 ccus.put((char)0x2262); 3195 ccbs.put((byte)0x2b); ccbs.put((byte)0x49); ccbs.put((byte)0x6d); ccbs.put((byte)0x49); ccbs.put((byte)0x2d); 3196 3197 ccbs.limit(ccbs.position()); 3198 ccbs.position(0); 3199 ccus.limit(ccus.position()); 3200 ccus.position(0); 3201 try { 3202 smBufEncode(encoder, "UTF-7-CC-EN-11", ccus, ccbs, false, true); 3203 } catch (Exception ex) { 3204 errln("Exception while encoding UTF-7 code coverage test should not have been thrown."); 3205 } 3206 3207 ccbs.clear(); 3208 ccus.clear(); 3209 3210 //test for overflow buffer error 3211 encoder.reset(); 3212 ccus.put((char)0x3980); ccus.put((char)0x2715); 3213 ccbs.put((byte)0x2b); ccbs.put((byte)0x4f); ccbs.put((byte)0x59); ccbs.put((byte)0x2d); 3214 3215 ccbs.limit(ccbs.position()); 3216 ccbs.position(0); 3217 ccus.limit(ccus.position()); 3218 ccus.position(0); 3219 3220 result = encoder.encode(ccus, ccbs, true); 3221 result = encoder.flush(ccbs); 3222 if (!result.isOverflow()) { 3223 errln("Overflow buffer while encoding UTF-7 should have occurred."); 3224 } 3225 //end of charset encoder code coverage code 3226 } 3227 3228 //Test Charset ISCII 3229 @Test TestCharsetISCII()3230 public void TestCharsetISCII() { 3231 CharsetProvider provider = new CharsetProviderICU(); 3232 Charset cs = provider.charsetForName("ISCII,version=0"); 3233 CharsetEncoder encoder = cs.newEncoder(); 3234 CharsetDecoder decoder = cs.newDecoder(); 3235 3236 CharBuffer us = CharBuffer.allocate(0x100); 3237 ByteBuffer bs = ByteBuffer.allocate(0x100); 3238 ByteBuffer bsr = ByteBuffer.allocate(0x100); 3239 3240 //test full range of Devanagari 3241 us.put((char)0x0901); us.put((char)0x0902); us.put((char)0x0903); us.put((char)0x0905); us.put((char)0x0906); us.put((char)0x0907); 3242 us.put((char)0x0908); us.put((char)0x0909); us.put((char)0x090A); us.put((char)0x090B); us.put((char)0x090E); us.put((char)0x090F); 3243 us.put((char)0x0910); us.put((char)0x090D); us.put((char)0x0912); us.put((char)0x0913); us.put((char)0x0914); us.put((char)0x0911); 3244 us.put((char)0x0915); us.put((char)0x0916); us.put((char)0x0917); us.put((char)0x0918); us.put((char)0x0919); us.put((char)0x091A); 3245 us.put((char)0x091B); us.put((char)0x091C); us.put((char)0x091D); us.put((char)0x091E); us.put((char)0x091F); us.put((char)0x0920); 3246 us.put((char)0x0921); us.put((char)0x0922); us.put((char)0x0923); us.put((char)0x0924); us.put((char)0x0925); us.put((char)0x0926); 3247 us.put((char)0x0927); us.put((char)0x0928); us.put((char)0x0929); us.put((char)0x092A); us.put((char)0x092B); us.put((char)0x092C); 3248 us.put((char)0x092D); us.put((char)0x092E); us.put((char)0x092F); us.put((char)0x095F); us.put((char)0x0930); us.put((char)0x0931); 3249 us.put((char)0x0932); us.put((char)0x0933); us.put((char)0x0934); us.put((char)0x0935); us.put((char)0x0936); us.put((char)0x0937); 3250 us.put((char)0x0938); us.put((char)0x0939); us.put((char)0x200D); us.put((char)0x093E); us.put((char)0x093F); us.put((char)0x0940); 3251 us.put((char)0x0941); us.put((char)0x0942); us.put((char)0x0943); us.put((char)0x0946); us.put((char)0x0947); us.put((char)0x0948); 3252 us.put((char)0x0945); us.put((char)0x094A); us.put((char)0x094B); us.put((char)0x094C); us.put((char)0x0949); us.put((char)0x094D); 3253 us.put((char)0x093D); us.put((char)0x0966); us.put((char)0x0967); us.put((char)0x0968); us.put((char)0x0969); us.put((char)0x096A); 3254 us.put((char)0x096B); us.put((char)0x096C); us.put((char)0x096D); us.put((char)0x096E); us.put((char)0x096F); 3255 3256 bs.put((byte)0xEF); bs.put((byte)0x42); 3257 bs.put((byte)0xA1); bs.put((byte)0xA2); bs.put((byte)0xA3); bs.put((byte)0xA4); bs.put((byte)0xA5); bs.put((byte)0xA6); 3258 bs.put((byte)0xA7); bs.put((byte)0xA8); bs.put((byte)0xA9); bs.put((byte)0xAA); bs.put((byte)0xAB); bs.put((byte)0xAC); 3259 bs.put((byte)0xAD); bs.put((byte)0xAE); bs.put((byte)0xAF); bs.put((byte)0xB0); bs.put((byte)0xB1); bs.put((byte)0xB2); 3260 bs.put((byte)0xB3); bs.put((byte)0xB4); bs.put((byte)0xB5); bs.put((byte)0xB6); bs.put((byte)0xB7); bs.put((byte)0xB8); 3261 bs.put((byte)0xB9); bs.put((byte)0xBA); bs.put((byte)0xBB); bs.put((byte)0xBC); bs.put((byte)0xBD); bs.put((byte)0xBE); 3262 bs.put((byte)0xBF); bs.put((byte)0xC0); bs.put((byte)0xC1); bs.put((byte)0xC2); bs.put((byte)0xC3); bs.put((byte)0xC4); 3263 bs.put((byte)0xC5); bs.put((byte)0xC6); bs.put((byte)0xC7); bs.put((byte)0xC8); bs.put((byte)0xC9); bs.put((byte)0xCA); 3264 bs.put((byte)0xCB); bs.put((byte)0xCC); bs.put((byte)0xCD); bs.put((byte)0xCE); bs.put((byte)0xCF); bs.put((byte)0xD0); 3265 bs.put((byte)0xD1); bs.put((byte)0xD2); bs.put((byte)0xD3); bs.put((byte)0xD4); bs.put((byte)0xD5); bs.put((byte)0xD6); 3266 bs.put((byte)0xD7); bs.put((byte)0xD8); bs.put((byte)0xD9); bs.put((byte)0xDA); bs.put((byte)0xDB); bs.put((byte)0xDC); 3267 bs.put((byte)0xDD); bs.put((byte)0xDE); bs.put((byte)0xDF); bs.put((byte)0xE0); bs.put((byte)0xE1); bs.put((byte)0xE2); 3268 bs.put((byte)0xE3); bs.put((byte)0xE4); bs.put((byte)0xE5); bs.put((byte)0xE6); bs.put((byte)0xE7); bs.put((byte)0xE8); 3269 bs.put((byte)0xEA); bs.put((byte)0xE9); bs.put((byte)0xF1); bs.put((byte)0xF2); bs.put((byte)0xF3); bs.put((byte)0xF4); 3270 bs.put((byte)0xF5); bs.put((byte)0xF6); bs.put((byte)0xF7); bs.put((byte)0xF8); bs.put((byte)0xF9); bs.put((byte)0xFA); 3271 3272 bsr.put((byte)0xA1); bsr.put((byte)0xA2); bsr.put((byte)0xA3); bsr.put((byte)0xA4); bsr.put((byte)0xA5); bsr.put((byte)0xA6); 3273 bsr.put((byte)0xA7); bsr.put((byte)0xA8); bsr.put((byte)0xA9); bsr.put((byte)0xAA); bsr.put((byte)0xAB); bsr.put((byte)0xAC); 3274 bsr.put((byte)0xAD); bsr.put((byte)0xAE); bsr.put((byte)0xAF); bsr.put((byte)0xB0); bsr.put((byte)0xB1); bsr.put((byte)0xB2); 3275 bsr.put((byte)0xB3); bsr.put((byte)0xB4); bsr.put((byte)0xB5); bsr.put((byte)0xB6); bsr.put((byte)0xB7); bsr.put((byte)0xB8); 3276 bsr.put((byte)0xB9); bsr.put((byte)0xBA); bsr.put((byte)0xBB); bsr.put((byte)0xBC); bsr.put((byte)0xBD); bsr.put((byte)0xBE); 3277 bsr.put((byte)0xBF); bsr.put((byte)0xC0); bsr.put((byte)0xC1); bsr.put((byte)0xC2); bsr.put((byte)0xC3); bsr.put((byte)0xC4); 3278 bsr.put((byte)0xC5); bsr.put((byte)0xC6); bsr.put((byte)0xC7); bsr.put((byte)0xC8); bsr.put((byte)0xC9); bsr.put((byte)0xCA); 3279 bsr.put((byte)0xCB); bsr.put((byte)0xCC); bsr.put((byte)0xCD); bsr.put((byte)0xCE); bsr.put((byte)0xCF); bsr.put((byte)0xD0); 3280 bsr.put((byte)0xD1); bsr.put((byte)0xD2); bsr.put((byte)0xD3); bsr.put((byte)0xD4); bsr.put((byte)0xD5); bsr.put((byte)0xD6); 3281 bsr.put((byte)0xD7); bsr.put((byte)0xD8); bsr.put((byte)0xD9); bsr.put((byte)0xDA); bsr.put((byte)0xDB); bsr.put((byte)0xDC); 3282 bsr.put((byte)0xDD); bsr.put((byte)0xDE); bsr.put((byte)0xDF); bsr.put((byte)0xE0); bsr.put((byte)0xE1); bsr.put((byte)0xE2); 3283 bsr.put((byte)0xE3); bsr.put((byte)0xE4); bsr.put((byte)0xE5); bsr.put((byte)0xE6); bsr.put((byte)0xE7); bsr.put((byte)0xE8); 3284 bsr.put((byte)0xEA); bsr.put((byte)0xE9); bsr.put((byte)0xF1); bsr.put((byte)0xF2); bsr.put((byte)0xF3); bsr.put((byte)0xF4); 3285 bsr.put((byte)0xF5); bsr.put((byte)0xF6); bsr.put((byte)0xF7); bsr.put((byte)0xF8); bsr.put((byte)0xF9); bsr.put((byte)0xFA); 3286 3287 //test Soft Halant 3288 us.put((char)0x0915); us.put((char)0x094d); us.put((char)0x200D); 3289 bs.put((byte)0xB3); bs.put((byte)0xE8); bs.put((byte)0xE9); 3290 bsr.put((byte)0xB3); bsr.put((byte)0xE8); bsr.put((byte)0xE9); 3291 3292 //test explicit halant 3293 us.put((char)0x0915); us.put((char)0x094D); us.put((char)0x200C); 3294 bs.put((byte)0xB3); bs.put((byte)0xE8); bs.put((byte)0xE8); 3295 bsr.put((byte)0xB3); bsr.put((byte)0xE8); bsr.put((byte)0xE8); 3296 3297 //test double danda 3298 us.put((char)0x0965); 3299 bs.put((byte)0xEA); bs.put((byte)0xEA); 3300 bsr.put((byte)0xEA); bsr.put((byte)0xEA); 3301 3302 //test ASCII 3303 us.put((char)0x1B); us.put((char)0x24); us.put((char)0x29); us.put((char)0x47); us.put((char)0x0E); us.put((char)0x23); 3304 us.put((char)0x21); us.put((char)0x23); us.put((char)0x22); us.put((char)0x23); us.put((char)0x23); us.put((char)0x23); 3305 us.put((char)0x24); us.put((char)0x23); us.put((char)0x25); us.put((char)0x23); us.put((char)0x26); us.put((char)0x23); 3306 us.put((char)0x27); us.put((char)0x23); us.put((char)0x28); us.put((char)0x23); us.put((char)0x29); us.put((char)0x23); 3307 us.put((char)0x2A); us.put((char)0x23); us.put((char)0x2B); us.put((char)0x0F); us.put((char)0x2F); us.put((char)0x2A); 3308 3309 bs.put((byte)0x1B); bs.put((byte)0x24); bs.put((byte)0x29); bs.put((byte)0x47); bs.put((byte)0x0E); bs.put((byte)0x23); 3310 bs.put((byte)0x21); bs.put((byte)0x23); bs.put((byte)0x22); bs.put((byte)0x23); bs.put((byte)0x23); bs.put((byte)0x23); 3311 bs.put((byte)0x24); bs.put((byte)0x23); bs.put((byte)0x25); bs.put((byte)0x23); bs.put((byte)0x26); bs.put((byte)0x23); 3312 bs.put((byte)0x27); bs.put((byte)0x23); bs.put((byte)0x28); bs.put((byte)0x23); bs.put((byte)0x29); bs.put((byte)0x23); 3313 bs.put((byte)0x2A); bs.put((byte)0x23); bs.put((byte)0x2B); bs.put((byte)0x0F); bs.put((byte)0x2F); bs.put((byte)0x2A); 3314 3315 bsr.put((byte)0x1B); bsr.put((byte)0x24); bsr.put((byte)0x29); bsr.put((byte)0x47); bsr.put((byte)0x0E); bsr.put((byte)0x23); 3316 bsr.put((byte)0x21); bsr.put((byte)0x23); bsr.put((byte)0x22); bsr.put((byte)0x23); bsr.put((byte)0x23); bsr.put((byte)0x23); 3317 bsr.put((byte)0x24); bsr.put((byte)0x23); bsr.put((byte)0x25); bsr.put((byte)0x23); bsr.put((byte)0x26); bsr.put((byte)0x23); 3318 bsr.put((byte)0x27); bsr.put((byte)0x23); bsr.put((byte)0x28); bsr.put((byte)0x23); bsr.put((byte)0x29); bsr.put((byte)0x23); 3319 bsr.put((byte)0x2A); bsr.put((byte)0x23); bsr.put((byte)0x2B); bsr.put((byte)0x0F); bsr.put((byte)0x2F); bsr.put((byte)0x2A); 3320 3321 //test from Lotus 3322 //Some of the Lotus ISCII code points have been changed or commented out. 3323 us.put((char)0x0061); us.put((char)0x0915); us.put((char)0x000D); us.put((char)0x000A); us.put((char)0x0996); us.put((char)0x0043); 3324 us.put((char)0x0930); us.put((char)0x094D); us.put((char)0x200D); us.put((char)0x0901); us.put((char)0x000D); us.put((char)0x000A); 3325 us.put((char)0x0905); us.put((char)0x0985); us.put((char)0x0043); us.put((char)0x0915); us.put((char)0x0921); us.put((char)0x002B); 3326 us.put((char)0x095F); 3327 bs.put((byte)0x61); bs.put((byte)0xB3); 3328 bs.put((byte)0x0D); bs.put((byte)0x0A); 3329 bs.put((byte)0xEF); bs.put((byte)0x42); 3330 bs.put((byte)0xEF); bs.put((byte)0x43); bs.put((byte)0xB4); bs.put((byte)0x43); 3331 bs.put((byte)0xEF); bs.put((byte)0x42); bs.put((byte)0xCF); bs.put((byte)0xE8); bs.put((byte)0xE9); bs.put((byte)0xA1); bs.put((byte)0x0D); bs.put((byte)0x0A); bs.put((byte)0xEF); bs.put((byte)0x42); 3332 bs.put((byte)0xA4); bs.put((byte)0xEF); bs.put((byte)0x43); bs.put((byte)0xA4); bs.put((byte)0x43); bs.put((byte)0xEF); 3333 bs.put((byte)0x42); bs.put((byte)0xB3); bs.put((byte)0xBF); bs.put((byte)0x2B); 3334 bs.put((byte)0xCE); 3335 bsr.put((byte)0x61); bsr.put((byte)0xEF); bsr.put((byte)0x42); bsr.put((byte)0xEF); bsr.put((byte)0x30); bsr.put((byte)0xB3); 3336 bsr.put((byte)0x0D); bsr.put((byte)0x0A); bsr.put((byte)0xEF); bsr.put((byte)0x43); bsr.put((byte)0xB4); bsr.put((byte)0x43); 3337 bsr.put((byte)0xEF); bsr.put((byte)0x42); bsr.put((byte)0xCF); bsr.put((byte)0xE8); bsr.put((byte)0xD9); bsr.put((byte)0xEF); 3338 bsr.put((byte)0x42); bsr.put((byte)0xA1); bsr.put((byte)0x0D); bsr.put((byte)0x0A); bsr.put((byte)0xEF); bsr.put((byte)0x42); 3339 bsr.put((byte)0xA4); bsr.put((byte)0xEF); bsr.put((byte)0x43); bsr.put((byte)0xA4); bsr.put((byte)0x43); bsr.put((byte)0xEF); 3340 bsr.put((byte)0x42); bsr.put((byte)0xB3); bsr.put((byte)0xBF); bsr.put((byte)0x2B); bsr.put((byte)0xEF); bsr.put((byte)0x42); 3341 bsr.put((byte)0xCE); 3342 //end of test from Lotus 3343 3344 //tamil range 3345 us.put((char)0x0B86); us.put((char)0x0B87); us.put((char)0x0B88); 3346 bs.put((byte)0xEF); bs.put((byte)0x44); bs.put((byte)0xA5); bs.put((byte)0xA6); bs.put((byte)0xA7); 3347 bsr.put((byte)0xEF); bsr.put((byte)0x44); bsr.put((byte)0xA5); bsr.put((byte)0xA6); bsr.put((byte)0xA7); 3348 3349 //telugu range 3350 us.put((char)0x0C05); us.put((char)0x0C02); us.put((char)0x0C03); us.put((char)0x0C31); 3351 bs.put((byte)0xEF); bs.put((byte)0x45); bs.put((byte)0xA4); bs.put((byte)0xA2); bs.put((byte)0xA3); bs.put((byte)0xD0); 3352 bsr.put((byte)0xEF); bsr.put((byte)0x45); bsr.put((byte)0xA4); bsr.put((byte)0xA2); bsr.put((byte)0xA3); bsr.put((byte)0xD0); 3353 3354 //kannada range 3355 us.put((char)0x0C85); us.put((char)0x0C82); us.put((char)0x0C83); 3356 bs.put((byte)0xEF); bs.put((byte)0x48); bs.put((byte)0xA4); bs.put((byte)0xA2); bs.put((byte)0xA3); 3357 bsr.put((byte)0xEF); bsr.put((byte)0x48); bsr.put((byte)0xA4); bsr.put((byte)0xA2); bsr.put((byte)0xA3); 3358 3359 //test Abbr sign and Anudatta 3360 us.put((char)0x0970); us.put((char)0x0952); us.put((char)0x0960); us.put((char)0x0944); us.put((char)0x090C); us.put((char)0x0962); 3361 us.put((char)0x0961); us.put((char)0x0963); us.put((char)0x0950); us.put((char)0x093D); us.put((char)0x0958); us.put((char)0x0959); 3362 us.put((char)0x095A); us.put((char)0x095B); us.put((char)0x095C); us.put((char)0x095D); us.put((char)0x095E); us.put((char)0x0020); 3363 us.put((char)0x094D); us.put((char)0x0930); us.put((char)0x0000); us.put((char)0x00A0); 3364 bs.put((byte)0xEF); bs.put((byte)0x42); bs.put((byte)0xF0); bs.put((byte)0xBF); bs.put((byte)0xF0); bs.put((byte)0xB8); 3365 bs.put((byte)0xAA); bs.put((byte)0xE9); bs.put((byte)0xDF); bs.put((byte)0xE9); bs.put((byte)0xA6); bs.put((byte)0xE9); 3366 bs.put((byte)0xDB); bs.put((byte)0xE9); bs.put((byte)0xA7); bs.put((byte)0xE9); bs.put((byte)0xDC); bs.put((byte)0xE9); 3367 bs.put((byte)0xA1); bs.put((byte)0xE9); bs.put((byte)0xEA); bs.put((byte)0xE9); bs.put((byte)0xB3); bs.put((byte)0xE9); 3368 bs.put((byte)0xB4); bs.put((byte)0xE9); bs.put((byte)0xB5); bs.put((byte)0xE9); bs.put((byte)0xBA); bs.put((byte)0xE9); 3369 bs.put((byte)0xBF); bs.put((byte)0xE9); bs.put((byte)0xC0); bs.put((byte)0xE9); bs.put((byte)0xC9); bs.put((byte)0xE9); 3370 bs.put((byte)0x20); bs.put((byte)0xE8); bs.put((byte)0xCF); bs.put((byte)0x00); bs.put((byte)0xA0); 3371 //bs.put((byte)0xEF); bs.put((byte)0x30); 3372 bsr.put((byte)0xEF); bsr.put((byte)0x42); bsr.put((byte)0xF0); bsr.put((byte)0xBF); bsr.put((byte)0xF0); bsr.put((byte)0xB8); 3373 bsr.put((byte)0xAA); bsr.put((byte)0xE9); bsr.put((byte)0xDF); bsr.put((byte)0xE9); bsr.put((byte)0xA6); bsr.put((byte)0xE9); 3374 bsr.put((byte)0xDB); bsr.put((byte)0xE9); bsr.put((byte)0xA7); bsr.put((byte)0xE9); bsr.put((byte)0xDC); bsr.put((byte)0xE9); 3375 bsr.put((byte)0xA1); bsr.put((byte)0xE9); bsr.put((byte)0xEA); bsr.put((byte)0xE9); bsr.put((byte)0xB3); bsr.put((byte)0xE9); 3376 bsr.put((byte)0xB4); bsr.put((byte)0xE9); bsr.put((byte)0xB5); bsr.put((byte)0xE9); bsr.put((byte)0xBA); bsr.put((byte)0xE9); 3377 bsr.put((byte)0xBF); bsr.put((byte)0xE9); bsr.put((byte)0xC0); bsr.put((byte)0xE9); bsr.put((byte)0xC9); bsr.put((byte)0xE9); 3378 bsr.put((byte)0xD9); bsr.put((byte)0xE8); bsr.put((byte)0xCF); bsr.put((byte)0x00); bsr.put((byte)0xA0); 3379 3380 bs.limit(bs.position()); 3381 bs.position(0); 3382 us.limit(us.position()); 3383 us.position(0); 3384 bsr.limit(bsr.position()); 3385 bsr.position(0); 3386 3387 //round trip test 3388 try { 3389 smBufDecode(decoder, "ISCII-part1", bsr, us, false, true); 3390 smBufEncode(encoder, "ISCII-part2", us, bs); 3391 smBufDecode(decoder, "ISCII-part3", bs, us, false, true); 3392 } catch (Exception ex) { 3393 errln("ISCII round trip test failed."); 3394 } 3395 3396 //Test new characters in the ISCII charset 3397 encoder = provider.charsetForName("ISCII,version=0").newEncoder(); 3398 decoder = provider.charsetForName("ISCII,version=0").newDecoder(); 3399 char u_pts[] = { 3400 /* DEV */ (char)0x0904, 3401 /* PNJ */ (char)0x0A01, (char)0x0A03, (char)0x0A33, (char)0x0A70 3402 }; 3403 byte b_pts[] = { 3404 (byte)0xef, (byte)0x42, 3405 /* DEV */ (byte)0xa4, (byte)0xe0, 3406 /* PNJ */ (byte)0xef, (byte)0x4b, (byte)0xa1, (byte)0xa3, (byte)0xd2, (byte)0xf0, (byte)0xbf 3407 }; 3408 us = CharBuffer.allocate(u_pts.length); 3409 bs = ByteBuffer.allocate(b_pts.length); 3410 us.put(u_pts); 3411 bs.put(b_pts); 3412 3413 bs.limit(bs.position()); 3414 bs.position(0); 3415 us.limit(us.position()); 3416 us.position(0); 3417 3418 try { 3419 smBufDecode(decoder, "ISCII-update", bs, us, true, true); 3420 bs.position(0); 3421 us.position(0); 3422 smBufEncode(encoder, "ISCII-update", us, bs, true, true); 3423 } catch (Exception ex) { 3424 errln("Error occurred while encoding/decoding ISCII with the new characters."); 3425 } 3426 3427 //The rest of the code in this method is to provide better code coverage 3428 CharBuffer ccus = CharBuffer.allocate(0x10); 3429 ByteBuffer ccbs = ByteBuffer.allocate(0x10); 3430 3431 //start of charset decoder code coverage code 3432 //test overflow buffer 3433 ccbs.put((byte)0x49); 3434 3435 ccbs.limit(ccbs.position()); 3436 ccbs.position(0); 3437 ccus.limit(0); 3438 ccus.position(0); 3439 3440 try { 3441 smBufDecode(decoder, "ISCII-CC-DE-1", ccbs, ccus, true, false); 3442 errln("Exception while decoding ISCII should have been thrown."); 3443 } 3444 catch (Exception ex) { 3445 } 3446 3447 ccbs.clear(); 3448 ccus.clear(); 3449 3450 //test atr overflow buffer 3451 ccbs.put((byte)0xEF); ccbs.put((byte)0x40); ccbs.put((byte)0xEF); ccbs.put((byte)0x20); 3452 ccus.put((char)0x00); 3453 3454 ccbs.limit(ccbs.position()); 3455 ccbs.position(0); 3456 ccus.limit(ccus.position()); 3457 ccus.position(0); 3458 3459 try { 3460 smBufDecode(decoder, "ISCII-CC-DE-2", ccbs, ccus, true, false); 3461 errln("Exception while decoding ISCII should have been thrown."); 3462 } 3463 catch (Exception ex) { 3464 } 3465 3466 //end of charset decoder code coverage code 3467 3468 ccbs.clear(); 3469 ccus.clear(); 3470 3471 //start of charset encoder code coverage code 3472 //test ascii overflow buffer 3473 ccus.put((char)0x41); 3474 3475 ccus.limit(ccus.position()); 3476 ccus.position(0); 3477 ccbs.limit(0); 3478 ccbs.position(0); 3479 3480 try { 3481 smBufEncode(encoder, "ISCII-CC-EN-1", ccus, ccbs, true, false); 3482 errln("Exception while encoding ISCII should have been thrown."); 3483 } 3484 catch (Exception ex) { 3485 } 3486 3487 ccbs.clear(); 3488 ccus.clear(); 3489 3490 //test ascii overflow buffer 3491 ccus.put((char)0x0A); ccus.put((char)0x0043); 3492 ccbs.put((byte)0x00); ccbs.put((byte)0x00); 3493 3494 ccus.limit(ccus.position()); 3495 ccus.position(0); 3496 ccbs.limit(ccbs.position()); 3497 ccbs.position(0); 3498 3499 try { 3500 smBufEncode(encoder, "ISCII-CC-EN-2", ccus, ccbs, true, false); 3501 errln("Exception while encoding ISCII should have been thrown."); 3502 } 3503 catch (Exception ex) { 3504 } 3505 3506 ccbs.clear(); 3507 ccus.clear(); 3508 3509 //test surrogate malform 3510 ccus.put((char)0x06E3); 3511 ccbs.put((byte)0x00); 3512 3513 ccus.limit(ccus.position()); 3514 ccus.position(0); 3515 ccbs.limit(ccbs.position()); 3516 ccbs.position(0); 3517 3518 try { 3519 smBufEncode(encoder, "ISCII-CC-EN-3", ccus, ccbs, true, false); 3520 errln("Exception while encoding ISCII should have been thrown."); 3521 } 3522 catch (Exception ex) { 3523 } 3524 3525 ccbs.clear(); 3526 ccus.clear(); 3527 3528 //test surrogate malform 3529 ccus.put((char)0xD801); ccus.put((char)0xDD01); 3530 ccbs.put((byte)0x00); 3531 3532 ccus.limit(ccus.position()); 3533 ccus.position(0); 3534 ccbs.limit(ccbs.position()); 3535 ccbs.position(0); 3536 3537 try { 3538 smBufEncode(encoder, "ISCII-CC-EN-4", ccus, ccbs, true, false); 3539 errln("Exception while encoding ISCII should have been thrown."); 3540 } 3541 catch (Exception ex) { 3542 } 3543 3544 ccbs.clear(); 3545 ccus.clear(); 3546 3547 //test trail surrogate malform 3548 ccus.put((char)0xDD01); 3549 ccbs.put((byte)0x00); 3550 3551 ccus.limit(ccus.position()); 3552 ccus.position(0); 3553 ccbs.limit(ccbs.position()); 3554 ccbs.position(0); 3555 3556 try { 3557 smBufEncode(encoder, "ISCII-CC-EN-5", ccus, ccbs, true, false); 3558 errln("Exception while encoding ISCII should have been thrown."); 3559 } 3560 catch (Exception ex) { 3561 } 3562 3563 ccbs.clear(); 3564 ccus.clear(); 3565 3566 //test lead surrogates malform 3567 ccus.put((char)0xD801); ccus.put((char)0xD802); 3568 ccbs.put((byte)0x00); 3569 3570 ccus.limit(ccus.position()); 3571 ccus.position(0); 3572 ccbs.limit(ccbs.position()); 3573 ccbs.position(0); 3574 3575 try { 3576 smBufEncode(encoder, "ISCII-CC-EN-6", ccus, ccbs, true, false); 3577 errln("Exception while encoding ISCII should have been thrown."); 3578 } 3579 catch (Exception ex) { 3580 } 3581 3582 ccus.clear(); 3583 ccbs.clear(); 3584 3585 //test overflow buffer 3586 ccus.put((char)0x0901); 3587 ccbs.put((byte)0x00); 3588 3589 ccus.limit(ccus.position()); 3590 ccus.position(0); 3591 ccbs.limit(ccbs.position()); 3592 ccbs.position(0); 3593 3594 cs = provider.charsetForName("ISCII,version=0"); 3595 encoder = cs.newEncoder(); 3596 3597 try { 3598 smBufEncode(encoder, "ISCII-CC-EN-7", ccus, ccbs, true, false); 3599 errln("Exception while encoding ISCII should have been thrown."); 3600 } 3601 catch (Exception ex) { 3602 } 3603 //end of charset encoder code coverage code 3604 } 3605 3606 //Test for the IMAP Charset 3607 @Test TestCharsetIMAP()3608 public void TestCharsetIMAP() { 3609 CharsetProvider provider = new CharsetProviderICU(); 3610 Charset cs = provider.charsetForName("IMAP-mailbox-name"); 3611 CharsetEncoder encoder = cs.newEncoder(); 3612 CharsetDecoder decoder = cs.newDecoder(); 3613 3614 CharBuffer us = CharBuffer.allocate(0x20); 3615 ByteBuffer bs = ByteBuffer.allocate(0x20); 3616 3617 us.put((char)0x00A3); us.put((char)0x2020); us.put((char)0x41); 3618 3619 bs.put((byte)0x26); bs.put((byte)0x41); bs.put((byte)0x4B); bs.put((byte)0x4D); bs.put((byte)0x67); bs.put((byte)0x49); 3620 bs.put((byte)0x41); bs.put((byte)0x2D); bs.put((byte)0x41); 3621 3622 3623 bs.limit(bs.position()); 3624 bs.position(0); 3625 us.limit(us.position()); 3626 us.position(0); 3627 3628 smBufDecode(decoder, "IMAP", bs, us); 3629 smBufEncode(encoder, "IMAP", us, bs); 3630 3631 //the rest of the code in this method is for better code coverage 3632 us.clear(); 3633 bs.clear(); 3634 3635 //start of charset encoder code coverage 3636 //test buffer overflow 3637 us.put((char)0x0026); us.put((char)0x17A9); 3638 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3639 3640 bs.limit(bs.position()); 3641 bs.position(0); 3642 us.limit(us.position()); 3643 us.position(0); 3644 3645 try { 3646 smBufEncode(encoder, "IMAP-EN-1", us, bs, true, false); 3647 errln("Exception while encoding IMAP (1) should have been thrown."); 3648 } catch(Exception ex) { 3649 } 3650 3651 us.clear(); 3652 bs.clear(); 3653 3654 //test buffer overflow 3655 us.put((char)0x17A9); us.put((char)0x0941); 3656 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3657 3658 bs.limit(bs.position()); 3659 bs.position(0); 3660 us.limit(us.position()); 3661 us.position(0); 3662 3663 try { 3664 smBufEncode(encoder, "IMAP-EN-2", us, bs, true, false); 3665 errln("Exception while encoding IMAP (2) should have been thrown."); 3666 } catch(Exception ex) { 3667 } 3668 3669 us.clear(); 3670 bs.clear(); 3671 3672 //test buffer overflow 3673 us.put((char)0x17A9); us.put((char)0x0941); 3674 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3675 3676 bs.limit(bs.position()); 3677 bs.position(0); 3678 us.limit(us.position()); 3679 us.position(0); 3680 3681 try { 3682 smBufEncode(encoder, "IMAP-EN-3", us, bs, true, false); 3683 errln("Exception while encoding IMAP (3) should have been thrown."); 3684 } catch(Exception ex) { 3685 } 3686 3687 us.clear(); 3688 bs.clear(); 3689 3690 //test buffer overflow 3691 us.put((char)0x17A9); us.put((char)0x0941); us.put((char)0x0955); 3692 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3693 bs.put((byte)0x00); 3694 3695 bs.limit(bs.position()); 3696 bs.position(0); 3697 us.limit(us.position()); 3698 us.position(0); 3699 3700 try { 3701 smBufEncode(encoder, "IMAP-EN-4", us, bs, true, false); 3702 errln("Exception while encoding IMAP (4) should have been thrown."); 3703 } catch(Exception ex) { 3704 } 3705 3706 us.clear(); 3707 bs.clear(); 3708 3709 //test buffer overflow 3710 us.put((char)0x17A9); us.put((char)0x0941); us.put((char)0x0955); 3711 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3712 bs.put((byte)0x00); bs.put((byte)0x00); 3713 3714 bs.limit(bs.position()); 3715 bs.position(0); 3716 us.limit(us.position()); 3717 us.position(0); 3718 3719 try { 3720 smBufEncode(encoder, "IMAP-EN-5", us, bs, true, false); 3721 errln("Exception while encoding IMAP (5) should have been thrown."); 3722 } catch(Exception ex) { 3723 } 3724 3725 us.clear(); 3726 bs.clear(); 3727 3728 //test buffer overflow 3729 us.put((char)0x17A9); us.put((char)0x0941); us.put((char)0x0955); us.put((char)0x0970); 3730 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3731 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3732 3733 bs.limit(bs.position()); 3734 bs.position(0); 3735 us.limit(us.position()); 3736 us.position(0); 3737 3738 try { 3739 smBufEncode(encoder, "IMAP-EN-6", us, bs, true, false); 3740 errln("Exception while encoding IMAP (6) should have been thrown."); 3741 } catch(Exception ex) { 3742 } 3743 3744 us.clear(); 3745 bs.clear(); 3746 3747 //test buffer overflow 3748 us.put((char)0x17A9); us.put((char)0x0941); 3749 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3750 bs.put((byte)0x00); 3751 3752 bs.limit(bs.position()); 3753 bs.position(0); 3754 us.limit(us.position()); 3755 us.position(0); 3756 3757 try { 3758 smBufEncode(encoder, "IMAP-EN-7", us, bs, true, true); 3759 errln("Exception while encoding IMAP (7) should have been thrown."); 3760 } catch(Exception ex) { 3761 } 3762 3763 us.clear(); 3764 bs.clear(); 3765 3766 //test flushing 3767 us.put((char)0x17A9); us.put((char)0x0941); 3768 bs.put((byte)0x26); bs.put((byte)0x46); bs.put((byte)0x36); bs.put((byte)0x6b); bs.put((byte)0x4a); bs.put((byte)0x51); 3769 bs.put((byte)0x51); bs.put((byte)0x2d); 3770 3771 bs.limit(bs.position()); 3772 bs.position(0); 3773 us.limit(us.position()); 3774 us.position(0); 3775 3776 try { 3777 smBufEncode(encoder, "IMAP-EN-8", us, bs, true, true); 3778 } catch(Exception ex) { 3779 errln("Exception while encoding IMAP (8) should not have been thrown."); 3780 } 3781 3782 us = CharBuffer.allocate(0x08); 3783 bs = ByteBuffer.allocate(0x08); 3784 3785 //test flushing buffer overflow 3786 us.put((char)0x0061); 3787 bs.put((byte)0x61); bs.put((byte)0x00); 3788 3789 bs.limit(bs.position()); 3790 bs.position(0); 3791 us.limit(us.position()); 3792 us.position(0); 3793 3794 try { 3795 smBufEncode(encoder, "IMAP-EN-9", us, bs, true, true); 3796 } catch(Exception ex) { 3797 errln("Exception while encoding IMAP (9) should not have been thrown."); 3798 } 3799 //end of charset encoder code coverage 3800 3801 us = CharBuffer.allocate(0x10); 3802 bs = ByteBuffer.allocate(0x10); 3803 3804 //start of charset decoder code coverage 3805 //test malform case 2 3806 us.put((char)0x0000); us.put((char)0x0000); 3807 bs.put((byte)0x26); bs.put((byte)0x41); bs.put((byte)0x43); bs.put((byte)0x41); 3808 3809 bs.limit(bs.position()); 3810 bs.position(0); 3811 us.limit(us.position()); 3812 us.position(0); 3813 3814 try { 3815 smBufDecode(decoder, "IMAP-DE-1", bs, us, true, false); 3816 errln("Exception while decoding IMAP (1) should have been thrown."); 3817 } catch(Exception ex) { 3818 } 3819 3820 us.clear(); 3821 bs.clear(); 3822 3823 //test malform case 5 3824 us.put((char)0x0000); us.put((char)0x0000); us.put((char)0x0000); 3825 bs.put((byte)0x26); bs.put((byte)0x41); bs.put((byte)0x41); bs.put((byte)0x41); 3826 bs.put((byte)0x41); bs.put((byte)0x49); bs.put((byte)0x41); 3827 3828 bs.limit(bs.position()); 3829 bs.position(0); 3830 us.limit(us.position()); 3831 us.position(0); 3832 3833 try { 3834 smBufDecode(decoder, "IMAP-DE-2", bs, us, true, false); 3835 errln("Exception while decoding IMAP (2) should have been thrown."); 3836 } catch(Exception ex) { 3837 } 3838 3839 us.clear(); 3840 bs.clear(); 3841 3842 //test malform case 7 3843 us.put((char)0x0000); us.put((char)0x0000); us.put((char)0x0000); us.put((char)0x0000); 3844 bs.put((byte)0x26); bs.put((byte)0x41); bs.put((byte)0x41); bs.put((byte)0x41); 3845 bs.put((byte)0x41); bs.put((byte)0x41); bs.put((byte)0x41); bs.put((byte)0x42); 3846 bs.put((byte)0x41); 3847 3848 bs.limit(bs.position()); 3849 bs.position(0); 3850 us.limit(us.position()); 3851 us.position(0); 3852 3853 try { 3854 smBufDecode(decoder, "IMAP-DE-3", bs, us, true, false); 3855 errln("Exception while decoding IMAP (3) should have been thrown."); 3856 } catch(Exception ex) { 3857 } 3858 //end of charset decoder coder coverage 3859 } 3860 3861 //Test for charset UTF32LE to provide better code coverage 3862 @Test TestCharsetUTF32LE()3863 public void TestCharsetUTF32LE() { 3864 CoderResult result = CoderResult.UNDERFLOW; 3865 CharsetProvider provider = new CharsetProviderICU(); 3866 Charset cs = provider.charsetForName("UTF-32LE"); 3867 CharsetEncoder encoder = cs.newEncoder(); 3868 //CharsetDecoder decoder = cs.newDecoder(); 3869 3870 CharBuffer us = CharBuffer.allocate(0x10); 3871 ByteBuffer bs = ByteBuffer.allocate(0x10); 3872 3873 3874 //test malform surrogate 3875 us.put((char)0xD901); 3876 bs.put((byte)0x00); 3877 3878 bs.limit(bs.position()); 3879 bs.position(0); 3880 us.limit(us.position()); 3881 us.position(0); 3882 3883 try { 3884 smBufEncode(encoder, "UTF32LE-EN-1", us, bs, true, false); 3885 errln("Exception while encoding UTF32LE (1) should have been thrown."); 3886 } catch (Exception ex) { 3887 } 3888 3889 bs.clear(); 3890 us.clear(); 3891 3892 //test malform surrogate 3893 us.put((char)0xD901); us.put((char)0xD902); 3894 bs.put((byte)0x00); 3895 3896 bs.limit(bs.position()); 3897 bs.position(0); 3898 us.limit(us.position()); 3899 us.position(0); 3900 3901 result = encoder.encode(us, bs, true); 3902 3903 if (!result.isError() && !result.isOverflow()) { 3904 errln("Error while encoding UTF32LE (2) should have occurred."); 3905 } 3906 3907 bs.clear(); 3908 us.clear(); 3909 3910 //test overflow trail surrogate 3911 us.put((char)0xDD01); us.put((char)0xDD0E); us.put((char)0xDD0E); 3912 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3913 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 3914 3915 bs.limit(bs.position()); 3916 bs.position(0); 3917 us.limit(us.position()); 3918 us.position(0); 3919 3920 result = encoder.encode(us, bs, true); 3921 3922 if (!result.isError() && !result.isOverflow()) { 3923 errln("Error while encoding UTF32LE (3) should have occurred."); 3924 } 3925 3926 bs.clear(); 3927 us.clear(); 3928 3929 //test malform lead surrogate 3930 us.put((char)0xD90D); us.put((char)0xD90E); 3931 bs.put((byte)0x00); 3932 3933 bs.limit(bs.position()); 3934 bs.position(0); 3935 us.limit(us.position()); 3936 us.position(0); 3937 3938 try { 3939 smBufEncode(encoder, "UTF32LE-EN-4", us, bs, true, false); 3940 errln("Exception while encoding UTF32LE (4) should have been thrown."); 3941 } catch (Exception ex) { 3942 } 3943 3944 bs.clear(); 3945 us.clear(); 3946 3947 //test overflow buffer 3948 us.put((char)0x0061); 3949 bs.put((byte)0x00); 3950 3951 bs.limit(bs.position()); 3952 bs.position(0); 3953 us.limit(us.position()); 3954 us.position(0); 3955 3956 try { 3957 smBufEncode(encoder, "UTF32LE-EN-5", us, bs, true, false); 3958 errln("Exception while encoding UTF32LE (5) should have been thrown."); 3959 } catch (Exception ex) { 3960 } 3961 3962 bs.clear(); 3963 us.clear(); 3964 3965 //test malform trail surrogate 3966 us.put((char)0xDD01); 3967 bs.put((byte)0x00); 3968 3969 bs.limit(bs.position()); 3970 bs.position(0); 3971 us.limit(us.position()); 3972 us.position(0); 3973 3974 try { 3975 smBufEncode(encoder, "UTF32LE-EN-6", us, bs, true, false); 3976 errln("Exception while encoding UTF32LE (6) should have been thrown."); 3977 } catch (Exception ex) { 3978 } 3979 } 3980 3981 //Test for charset UTF16LE to provide better code coverage 3982 @Test TestCharsetUTF16LE()3983 public void TestCharsetUTF16LE() { 3984 CoderResult result = CoderResult.UNDERFLOW; 3985 CharsetProvider provider = new CharsetProviderICU(); 3986 Charset cs = provider.charsetForName("UTF-16LE"); 3987 CharsetEncoder encoder = cs.newEncoder(); 3988 //CharsetDecoder decoder = cs.newDecoder(); 3989 3990 // Test for malform and change fromUChar32 for next call 3991 char u_pts1[] = { 3992 (char)0xD805, 3993 (char)0xDC01, (char)0xDC02, (char)0xDC03, 3994 (char)0xD901, (char)0xD902 3995 }; 3996 byte b_pts1[] = { 3997 (byte)0x00, 3998 (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00, (byte)0x00 3999 }; 4000 4001 CharBuffer us = CharBuffer.allocate(u_pts1.length); 4002 ByteBuffer bs = ByteBuffer.allocate(b_pts1.length); 4003 4004 us.put(u_pts1); 4005 bs.put(b_pts1); 4006 4007 us.limit(1); 4008 us.position(0); 4009 bs.limit(1); 4010 bs.position(0); 4011 4012 result = encoder.encode(us, bs, true); 4013 4014 if (!result.isMalformed()) { 4015 // LE should not output BOM, so this should be malformed 4016 errln("Malformed while encoding UTF-16LE (1) should have occured."); 4017 } 4018 4019 // Test for malform surrogate from previous buffer 4020 us.limit(4); 4021 us.position(1); 4022 bs.limit(7); 4023 bs.position(1); 4024 4025 result = encoder.encode(us, bs, true); 4026 4027 if (!result.isMalformed()) { 4028 errln("Error while encoding UTF-16LE (2) should have occured."); 4029 } 4030 4031 // Test for malform trail surrogate 4032 encoder.reset(); 4033 4034 us.limit(1); 4035 us.position(0); 4036 bs.limit(1); 4037 bs.position(0); 4038 4039 result = encoder.encode(us, bs, true); 4040 4041 us.limit(6); 4042 us.position(4); 4043 bs.limit(4); 4044 bs.position(1); 4045 4046 result = encoder.encode(us, bs, true); 4047 4048 if (!result.isMalformed()) { 4049 errln("Error while encoding UTF-16LE (3) should have occured."); 4050 } 4051 } 4052 4053 //provide better code coverage for the generic charset UTF32 4054 @Test TestCharsetUTF32()4055 public void TestCharsetUTF32() { 4056 CoderResult result = CoderResult.UNDERFLOW; 4057 CharsetProvider provider = new CharsetProviderICU(); 4058 Charset cs = provider.charsetForName("UTF-32"); 4059 CharsetDecoder decoder = cs.newDecoder(); 4060 CharsetEncoder encoder = cs.newEncoder(); 4061 4062 //start of decoding code coverage 4063 char us_array[] = { 4064 0x0000, 0x0000, 0x0000, 0x0000, 4065 }; 4066 4067 byte bs_array1[] = { 4068 (byte)0x00, (byte)0x00, (byte)0xFE, (byte)0xFF, 4069 (byte)0x00, (byte)0x00, (byte)0x04, (byte)0x43, 4070 (byte)0xFF, (byte)0xFE, (byte)0x00, (byte)0x00, 4071 (byte)0x43, (byte)0x04, (byte)0x00, (byte)0x00, 4072 }; 4073 4074 byte bs_array2[] = { 4075 (byte)0xFF, (byte)0xFE, (byte)0x00, (byte)0x00, 4076 (byte)0x43, (byte)0x04, (byte)0x00, (byte)0x00, 4077 }; 4078 4079 CharBuffer us = CharBuffer.allocate(us_array.length); 4080 ByteBuffer bs = ByteBuffer.allocate(bs_array1.length); 4081 4082 us.put(us_array); 4083 bs.put(bs_array1); 4084 4085 us.limit(us.position()); 4086 us.position(0); 4087 bs.limit(bs.position()); 4088 bs.position(0); 4089 4090 try { 4091 smBufDecode(decoder, "UTF32-DE-1", bs, us, true, false); 4092 errln("Malform exception while decoding UTF32 charset (1) should have been thrown."); 4093 } catch (Exception ex) { 4094 } 4095 4096 decoder = cs.newDecoder(); 4097 4098 bs = ByteBuffer.allocate(bs_array2.length); 4099 bs.put(bs_array2); 4100 4101 us.limit(4); 4102 us.position(0); 4103 bs.limit(bs.position()); 4104 bs.position(0); 4105 4106 try { 4107 smBufDecode(decoder, "UTF32-DE-2", bs, us, true, false); 4108 } catch (Exception ex) { 4109 // should recognize little endian BOM 4110 errln("Exception while decoding UTF32 charset (2) should not have been thrown."); 4111 } 4112 4113 //Test malform exception 4114 bs.clear(); 4115 us.clear(); 4116 4117 bs.put((byte)0x00); bs.put((byte)0xFE); bs.put((byte)0xFF); bs.put((byte)0x00); bs.put((byte)0x00); 4118 us.put((char)0x0000); 4119 4120 us.limit(us.position()); 4121 us.position(0); 4122 bs.limit(bs.position()); 4123 bs.position(0); 4124 4125 try { 4126 smBufDecode(decoder, "UTF32-DE-3", bs, us, true, false); 4127 errln("Malform exception while decoding UTF32 charset (3) should have been thrown."); 4128 } catch (Exception ex) { 4129 } 4130 4131 //Test BOM testing 4132 bs.clear(); 4133 us.clear(); 4134 4135 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0xFF); bs.put((byte)0xFE); 4136 us.put((char)0x0000); 4137 4138 us.limit(us.position()); 4139 us.position(0); 4140 bs.limit(bs.position()); 4141 bs.position(0); 4142 4143 try { 4144 smBufDecode(decoder, "UTF32-DE-4", bs, us, true, false); 4145 } catch (Exception ex) { 4146 // should recognize big endian BOM 4147 errln("Exception while decoding UTF32 charset (4) should not have been thrown."); 4148 } 4149 //end of decoding code coverage 4150 4151 //start of encoding code coverage 4152 us = CharBuffer.allocate(0x10); 4153 bs = ByteBuffer.allocate(0x10); 4154 4155 //test wite BOM overflow error 4156 us.put((char)0xDC01); 4157 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4158 4159 us.limit(us.position()); 4160 us.position(0); 4161 bs.limit(bs.position()); 4162 bs.position(0); 4163 4164 result = encoder.encode(us, bs, true); 4165 // must try to output BOM first for UTF-32 (not UTF-32BE or UTF-32LE) 4166 if (!result.isOverflow()) { 4167 errln("Buffer overflow error while encoding UTF32 charset (1) should have occurred."); 4168 } 4169 4170 us.clear(); 4171 bs.clear(); 4172 4173 //test malform surrogate and store value in fromChar32 4174 us.put((char)0xD801); us.put((char)0xD802); 4175 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4176 4177 us.limit(us.position()); 4178 us.position(0); 4179 bs.limit(bs.position()); 4180 bs.position(0); 4181 4182 result = encoder.encode(us, bs, true); 4183 if (!result.isMalformed()) { 4184 errln("Malformed error while encoding UTF32 charset (2) should have occurred."); 4185 } 4186 4187 us.clear(); 4188 bs.clear(); 4189 4190 //test malform surrogate 4191 us.put((char)0x0000); us.put((char)0xD902); 4192 4193 us.limit(us.position()); 4194 us.position(0); 4195 bs.limit(bs.position()); 4196 bs.position(0); 4197 4198 result = encoder.encode(us, bs, true); 4199 if (!result.isOverflow()) { 4200 errln("Overflow error while encoding UTF32 charset (3) should have occurred."); 4201 } 4202 4203 us.clear(); 4204 bs.clear(); 4205 4206 //test malform surrogate 4207 encoder.reset(); 4208 us.put((char)0xD801); 4209 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4210 4211 us.limit(us.position()); 4212 us.position(0); 4213 bs.limit(bs.position()); 4214 bs.position(0); 4215 4216 result = encoder.encode(us, bs, true); 4217 if (!result.isMalformed()) { 4218 errln("Malform error while encoding UTF32 charset (4) should have occurred."); 4219 } 4220 4221 us.clear(); 4222 bs.clear(); 4223 4224 //test overflow surrogate 4225 us.put((char)0x0000); us.put((char)0xDDE1); us.put((char)0xD915); us.put((char)0xDDF2); 4226 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4227 4228 us.limit(us.position()); 4229 us.position(0); 4230 bs.limit(bs.position()); 4231 bs.position(0); 4232 4233 result = encoder.encode(us, bs, true); 4234 if (!result.isOverflow()) { 4235 errln("Overflow error while encoding UTF32 charset (5) should have occurred."); 4236 } 4237 4238 us.clear(); 4239 bs.clear(); 4240 4241 //test malform surrogate 4242 encoder.reset(); 4243 us.put((char)0xDDE1); 4244 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4245 4246 us.limit(us.position()); 4247 us.position(0); 4248 bs.limit(bs.position()); 4249 bs.position(0); 4250 4251 result = encoder.encode(us, bs, true); 4252 if (!result.isMalformed()) { 4253 errln("Malform error while encoding UTF32 charset (6) should have occurred."); 4254 } 4255 //end of encoding code coverage 4256 } 4257 4258 //this method provides better code coverage decoding UTF32 LE/BE 4259 @Test TestDecodeUTF32LEBE()4260 public void TestDecodeUTF32LEBE() { 4261 CoderResult result = CoderResult.UNDERFLOW; 4262 CharsetProvider provider = new CharsetProviderICU(); 4263 CharsetDecoder decoder; 4264 CharBuffer us = CharBuffer.allocate(0x10); 4265 ByteBuffer bs = ByteBuffer.allocate(0x10); 4266 4267 //decode UTF32LE 4268 decoder = provider.charsetForName("UTF-32LE").newDecoder(); 4269 //test overflow buffer 4270 bs.put((byte)0x41); bs.put((byte)0xFF); bs.put((byte)0x01); bs.put((byte)0x00); 4271 us.put((char)0x0000); 4272 4273 us.limit(us.position()); 4274 us.position(0); 4275 bs.limit(bs.position()); 4276 bs.position(0); 4277 4278 try { 4279 smBufDecode(decoder, "UTF-32LE", bs, us, true, false); 4280 errln("Overflow exception while decoding UTF32LE (1) should have been thrown."); 4281 } catch (Exception ex) { 4282 } 4283 // test overflow buffer handling in CharsetDecoderICU 4284 bs.position(0); 4285 us.position(0); 4286 decoder.reset(); 4287 result = decoder.decode(bs, us, true); 4288 if (result.isOverflow()) { 4289 result = decoder.decode(bs, us, true); 4290 if (!result.isOverflow()) { 4291 errln("Overflow buffer error while decoding UTF32LE should have occurred."); 4292 } 4293 } else { 4294 errln("Overflow buffer error while decoding UTF32LE should have occurred."); 4295 } 4296 4297 us.clear(); 4298 bs.clear(); 4299 //test malform buffer 4300 bs.put((byte)0x02); bs.put((byte)0xD9); bs.put((byte)0x00); bs.put((byte)0x00); 4301 us.put((char)0x0000); 4302 4303 us.limit(us.position()); 4304 us.position(0); 4305 bs.limit(bs.position()); 4306 bs.position(0); 4307 4308 try { 4309 smBufDecode(decoder, "UTF-32LE", bs, us, true, false); 4310 errln("Malform exception while decoding UTF32LE (2) should have been thrown."); 4311 } catch (Exception ex) { 4312 } 4313 4314 us.clear(); 4315 bs.clear(); 4316 //test malform buffer 4317 bs.put((byte)0xFF); bs.put((byte)0xFE); bs.put((byte)0x00); bs.put((byte)0x00); 4318 bs.put((byte)0xFF); bs.put((byte)0xDF); bs.put((byte)0x10); 4319 us.put((char)0x0000); 4320 4321 us.limit(us.position()); 4322 us.position(0); 4323 bs.limit(bs.position()); 4324 bs.position(0); 4325 4326 try { 4327 // must flush in order to exhibit malformed behavior 4328 smBufDecode(decoder, "UTF-32LE", bs, us, true, true); 4329 errln("Malform exception while decoding UTF32LE (3) should have been thrown."); 4330 } catch (Exception ex) { 4331 } 4332 4333 us.clear(); 4334 bs.clear(); 4335 //test malform buffer 4336 bs.put((byte)0xFF); bs.put((byte)0xFE); bs.put((byte)0x00); bs.put((byte)0x00); 4337 bs.put((byte)0x02); bs.put((byte)0xD9); bs.put((byte)0x00); bs.put((byte)0x00); 4338 us.put((char)0x0000); 4339 4340 us.limit(us.position()); 4341 us.position(0); 4342 bs.limit(bs.position()); 4343 bs.position(0); 4344 4345 try { 4346 smBufDecode(decoder, "UTF-32LE", bs, us, true, false); 4347 errln("Malform exception while decoding UTF32LE (4) should have been thrown."); 4348 } catch (Exception ex) { 4349 } 4350 4351 us.clear(); 4352 bs.clear(); 4353 //test overflow buffer 4354 bs.put((byte)0xFF); bs.put((byte)0xFE); bs.put((byte)0x00); bs.put((byte)0x00); 4355 bs.put((byte)0xDD); bs.put((byte)0xFF); bs.put((byte)0x10); bs.put((byte)0x00); 4356 us.put((char)0x0000); 4357 4358 us.limit(us.position()); 4359 us.position(0); 4360 bs.limit(bs.position()); 4361 bs.position(0); 4362 4363 try { 4364 smBufDecode(decoder, "UTF-32LE", bs, us, true, false); 4365 errln("Overflow exception while decoding UTF32LE (5) should have been thrown."); 4366 } catch (Exception ex) { 4367 } 4368 //end of decode UTF32LE 4369 4370 bs.clear(); 4371 us.clear(); 4372 4373 //decode UTF32BE 4374 decoder = provider.charsetForName("UTF-32BE").newDecoder(); 4375 //test overflow buffer 4376 bs.put((byte)0x00); bs.put((byte)0x01); bs.put((byte)0xFF); bs.put((byte)0x41); 4377 us.put((char)0x0000); 4378 4379 us.limit(us.position()); 4380 us.position(0); 4381 bs.limit(bs.position()); 4382 bs.position(0); 4383 4384 try { 4385 smBufDecode(decoder, "UTF-32BE", bs, us, true, false); 4386 errln("Overflow exception while decoding UTF32BE (1) should have been thrown."); 4387 } catch (Exception ex) { 4388 } 4389 4390 bs.clear(); 4391 us.clear(); 4392 //test malform buffer 4393 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0xD9); bs.put((byte)0x02); 4394 us.put((char)0x0000); 4395 4396 us.limit(us.position()); 4397 us.position(0); 4398 bs.limit(bs.position()); 4399 bs.position(0); 4400 4401 try { 4402 smBufDecode(decoder, "UTF-32BE", bs, us, true, false); 4403 errln("Malform exception while decoding UTF32BE (2) should have been thrown."); 4404 } catch (Exception ex) { 4405 } 4406 4407 bs.clear(); 4408 us.clear(); 4409 //test malform buffer 4410 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0xFE); bs.put((byte)0xFF); 4411 bs.put((byte)0x10); bs.put((byte)0xFF); bs.put((byte)0xDF); 4412 us.put((char)0x0000); 4413 4414 us.limit(us.position()); 4415 us.position(0); 4416 bs.limit(bs.position()); 4417 bs.position(0); 4418 4419 try { 4420 // must flush to exhibit malformed behavior 4421 smBufDecode(decoder, "UTF-32BE", bs, us, true, true); 4422 errln("Malform exception while decoding UTF32BE (3) should have been thrown."); 4423 } catch (Exception ex) { 4424 } 4425 4426 bs.clear(); 4427 us.clear(); 4428 //test overflow buffer 4429 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0xFE); bs.put((byte)0xFF); 4430 bs.put((byte)0x00); bs.put((byte)0x10); bs.put((byte)0xFF); bs.put((byte)0xDD); 4431 us.put((char)0x0000); 4432 4433 us.limit(us.position()); 4434 us.position(0); 4435 bs.limit(bs.position()); 4436 bs.position(0); 4437 4438 try { 4439 smBufDecode(decoder, "UTF-32BE", bs, us, true, false); 4440 errln("Overflow exception while decoding UTF32BE (4) should have been thrown."); 4441 } catch (Exception ex) { 4442 } 4443 4444 bs.clear(); 4445 us.clear(); 4446 //test malform buffer 4447 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0xFE); 4448 us.put((char)0x0000); 4449 4450 us.limit(us.position()); 4451 us.position(0); 4452 bs.limit(bs.position()); 4453 bs.position(0); 4454 4455 try { 4456 // must flush to exhibit malformed behavior 4457 smBufDecode(decoder, "UTF-32BE", bs, us, true, true); 4458 errln("Malform exception while decoding UTF32BE (5) should have been thrown."); 4459 } catch (Exception ex) { 4460 } 4461 //end of decode UTF32BE 4462 } 4463 4464 //provide better code coverage for UTF8 4465 @Test TestCharsetUTF8()4466 public void TestCharsetUTF8() { 4467 CoderResult result = CoderResult.UNDERFLOW; 4468 CharsetProvider provider = new CharsetProviderICU(); 4469 CharsetDecoder decoder = provider.charsetForName("UTF-8").newDecoder(); 4470 CharsetEncoder encoder = provider.charsetForName("UTF-8").newEncoder(); 4471 4472 CharBuffer us = CharBuffer.allocate(0x10); 4473 ByteBuffer bs = ByteBuffer.allocate(0x10); 4474 ByteBuffer bs2; 4475 CharBuffer us2; 4476 int limit_us; 4477 int limit_bs; 4478 4479 //encode and decode using read only buffer 4480 encoder.reset(); 4481 decoder.reset(); 4482 us.put((char)0x0041); us.put((char)0x0081); us.put((char)0xEF65); us.put((char)0xD902); 4483 bs.put((byte)0x41); bs.put((byte)0xc2); bs.put((byte)0x81); bs.put((byte)0xee); bs.put((byte)0xbd); bs.put((byte)0xa5); 4484 bs.put((byte)0x00); 4485 limit_us = us.position(); 4486 limit_bs = bs.position(); 4487 4488 us.limit(limit_us); 4489 us.position(0); 4490 bs.limit(limit_bs); 4491 bs.position(0); 4492 bs2 = bs.asReadOnlyBuffer(); 4493 us2 = us.asReadOnlyBuffer(); 4494 4495 result = decoder.decode(bs2, us, true); 4496 if (!result.isUnderflow() || !equals(us, us2)) { 4497 errln("Error while decoding UTF-8 (1) should not have occured."); 4498 } 4499 4500 us2.limit(limit_us); 4501 us2.position(0); 4502 bs.limit(limit_bs); 4503 bs.position(0); 4504 4505 result = encoder.encode(us2, bs, true); 4506 if (!result.isUnderflow() || !equals(bs, bs2)) { 4507 errln("Error while encoding UTF-8 (1) should not have occured."); 4508 } 4509 4510 us.clear(); 4511 bs.clear(); 4512 4513 //test overflow buffer while encoding 4514 //readonly buffer 4515 encoder.reset(); 4516 us.put((char)0x0081); us.put((char)0xEF65); 4517 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4518 limit_us = us.position(); 4519 us2 = us.asReadOnlyBuffer(); 4520 us2.limit(limit_us); 4521 us2.position(0); 4522 bs.limit(1); 4523 bs.position(0); 4524 result = encoder.encode(us2, bs, true); 4525 if (!result.isOverflow()) { 4526 errln("Overflow Error should have occured while encoding UTF-8 (2)."); 4527 } 4528 4529 encoder.reset(); 4530 4531 us2.limit(limit_us); 4532 us2.position(1); 4533 bs.limit(1); 4534 bs.position(0); 4535 result = encoder.encode(us2, bs, true); 4536 if (!result.isOverflow()) { 4537 errln("Overflow Error should have occured while encoding UTF-8 (3)."); 4538 } 4539 4540 encoder.reset(); 4541 4542 us2.limit(limit_us); 4543 us2.position(1); 4544 bs.limit(2); 4545 bs.position(0); 4546 result = encoder.encode(us2, bs, true); 4547 if (!result.isOverflow()) { 4548 errln("Overflow Error should have occured while encoding UTF-8 (4)."); 4549 } 4550 4551 encoder.reset(); 4552 4553 us2.limit(limit_us); 4554 us2.position(0); 4555 bs.limit(2); 4556 bs.position(0); 4557 result = encoder.encode(us2, bs, true); 4558 if (!result.isOverflow()) { 4559 errln("Overflow Error should have occured while encoding UTF-8 (5)."); 4560 } 4561 4562 //not readonly buffer 4563 encoder.reset(); 4564 4565 us.limit(limit_us); 4566 us.position(0); 4567 bs.limit(1); 4568 bs.position(0); 4569 result = encoder.encode(us, bs, true); 4570 if (!result.isOverflow()) { 4571 errln("Overflow Error should have occured while encoding UTF-8 (6)."); 4572 } 4573 4574 encoder.reset(); 4575 4576 us.limit(limit_us); 4577 us.position(0); 4578 bs.limit(3); 4579 bs.position(0); 4580 result = encoder.encode(us, bs, true); 4581 if (!result.isOverflow()) { 4582 errln("Overflow Error should have occured while encoding UTF-8 (7)."); 4583 } 4584 4585 encoder.reset(); 4586 4587 us.limit(limit_us); 4588 us.position(1); 4589 bs.limit(2); 4590 bs.position(0); 4591 result = encoder.encode(us, bs, true); 4592 if (!result.isOverflow()) { 4593 errln("Overflow Error should have occured while encoding UTF-8 (8)."); 4594 } 4595 4596 encoder.reset(); 4597 4598 us.limit(limit_us + 1); 4599 us.position(1); 4600 bs.limit(3); 4601 bs.position(0); 4602 result = encoder.encode(us, bs, true); 4603 if (!result.isOverflow()) { 4604 errln("Overflow Error should have occured while encoding UTF-8 (9)."); 4605 } 4606 4607 us.clear(); 4608 bs.clear(); 4609 4610 //test encoding 4 byte characters 4611 encoder.reset(); 4612 us.put((char)0xD902); us.put((char)0xDD02); us.put((char)0x0041); 4613 bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); bs.put((byte)0x00); 4614 limit_us = us.position(); 4615 us2 = us.asReadOnlyBuffer(); 4616 us2.limit(limit_us); 4617 us2.position(0); 4618 bs.limit(1); 4619 bs.position(0); 4620 result = encoder.encode(us2, bs, true); 4621 if (!result.isOverflow()) { 4622 errln("Overflow Error should have occured while encoding UTF-8 (10)."); 4623 } 4624 4625 encoder.reset(); 4626 4627 us2.limit(limit_us); 4628 us2.position(0); 4629 bs.limit(2); 4630 bs.position(0); 4631 result = encoder.encode(us2, bs, true); 4632 if (!result.isOverflow()) { 4633 errln("Overflow Error should have occured while encoding UTF-8 (11)."); 4634 } 4635 4636 encoder.reset(); 4637 4638 us2.limit(limit_us); 4639 us2.position(0); 4640 bs.limit(3); 4641 bs.position(0); 4642 result = encoder.encode(us2, bs, true); 4643 if (!result.isOverflow()) { 4644 errln("Overflow Error should have occured while encoding UTF-8 (12)."); 4645 } 4646 4647 encoder.reset(); 4648 4649 us2.limit(limit_us); 4650 us2.position(0); 4651 bs.limit(4); 4652 bs.position(0); 4653 result = encoder.encode(us2, bs, true); 4654 if (!result.isOverflow()) { 4655 errln("Overflow Error should have occured while encoding UTF-8 (13)."); 4656 } 4657 4658 us.clear(); 4659 bs.clear(); 4660 4661 //decoding code coverage 4662 //test malform error 4663 decoder.reset(); 4664 bs.put((byte)0xC2); bs.put((byte)0xC2); 4665 us.put((char)0x0000); 4666 bs2 = bs.asReadOnlyBuffer(); 4667 4668 us.limit(1); 4669 us.position(0); 4670 bs2.limit(1); 4671 bs2.position(0); 4672 4673 result = decoder.decode(bs2, us, true); 4674 result = decoder.flush(us); 4675 if (!result.isMalformed()) { 4676 errln("Malform error should have occurred while decoding UTF-8 (1)."); 4677 } 4678 4679 us.limit(1); 4680 us.position(0); 4681 bs2.limit(1); 4682 bs2.position(0); 4683 4684 decoder.reset(); 4685 4686 result = decoder.decode(bs2, us, true); 4687 us.limit(1); 4688 us.position(0); 4689 bs2.limit(2); 4690 bs2.position(0); 4691 result = decoder.decode(bs2, us, true); 4692 if (!result.isMalformed()) { 4693 errln("Malform error should have occurred while decoding UTF-8 (2)."); 4694 } 4695 4696 us.clear(); 4697 bs.clear(); 4698 4699 //test overflow buffer 4700 bs.put((byte)0x01); bs.put((byte)0x41); 4701 us.put((char)0x0000); 4702 bs2 = bs.asReadOnlyBuffer(); 4703 us.limit(1); 4704 us.position(0); 4705 bs2.limit(2); 4706 bs2.position(0); 4707 4708 result = decoder.decode(bs2, us, true); 4709 if (!result.isOverflow()) { 4710 errln("Overflow error should have occurred while decoding UTF-8 (3)."); 4711 } 4712 4713 us.clear(); 4714 bs.clear(); 4715 4716 //test malform string 4717 decoder.reset(); 4718 bs.put((byte)0xF5); bs.put((byte)0xB4); bs.put((byte)0x8A); bs.put((byte)0x8C); 4719 us.put((char)0x0000); 4720 bs2 = bs.asReadOnlyBuffer(); 4721 us.limit(1); 4722 us.position(0); 4723 bs2.limit(4); 4724 bs2.position(0); 4725 4726 result = decoder.decode(bs2, us, true); 4727 if (!result.isMalformed()) { 4728 errln("Malform error should have occurred while decoding UTF-8 (4)."); 4729 } 4730 4731 bs.clear(); 4732 4733 //test overflow 4734 decoder.reset(); 4735 bs.put((byte)0xF3); bs.put((byte)0xB4); bs.put((byte)0x8A); bs.put((byte)0x8C); 4736 bs2 = bs.asReadOnlyBuffer(); 4737 us.limit(1); 4738 us.position(0); 4739 bs2.limit(4); 4740 bs2.position(0); 4741 4742 result = decoder.decode(bs2, us, true); 4743 if (!result.isOverflow()) { 4744 errln("Overflow error should have occurred while decoding UTF-8 (5)."); 4745 } 4746 4747 //test overflow 4748 decoder.reset(); 4749 us.limit(2); 4750 us.position(0); 4751 bs2.limit(5); 4752 bs2.position(0); 4753 4754 result = decoder.decode(bs2, us, true); 4755 if (!result.isOverflow()) { 4756 errln("Overflow error should have occurred while decoding UTF-8 (5)."); 4757 } 4758 4759 //test overflow 4760 decoder.reset(); 4761 us.limit(1); 4762 us.position(0); 4763 bs.limit(5); 4764 bs.position(0); 4765 4766 result = decoder.decode(bs, us, true); 4767 if (!result.isOverflow()) { 4768 errln("Overflow error should have occurred while decoding UTF-8 (6)."); 4769 } 4770 4771 bs.clear(); 4772 4773 //test overflow 4774 decoder.reset(); 4775 bs.put((byte)0x41); bs.put((byte)0x42); 4776 us.limit(1); 4777 us.position(0); 4778 bs.limit(2); 4779 bs.position(0); 4780 4781 result = decoder.decode(bs, us, true); 4782 if (!result.isOverflow()) { 4783 errln("Overflow error should have occurred while decoding UTF-8 (7)."); 4784 } 4785 4786 } 4787 4788 //provide better code coverage for Charset UTF16 4789 @Test TestCharsetUTF16()4790 public void TestCharsetUTF16() { 4791 CoderResult result = CoderResult.UNDERFLOW; 4792 CharsetProvider provider = new CharsetProviderICU(); 4793 CharsetDecoder decoder = provider.charsetForName("UTF-16").newDecoder(); 4794 CharsetEncoder encoder = provider.charsetForName("UTF-16").newEncoder(); 4795 4796 CharBuffer us = CharBuffer.allocate(0x10); 4797 ByteBuffer bs = ByteBuffer.allocate(0x10); 4798 4799 //test flush buffer and malform string 4800 bs.put((byte)0xFF); 4801 us.put((char)0x0000); 4802 4803 us.limit(us.position()); 4804 us.position(0); 4805 bs.limit(bs.position()); 4806 bs.position(0); 4807 4808 result = decoder.decode(bs, us, true); 4809 result = decoder.flush(us); 4810 if (!result.isMalformed()) { 4811 errln("Malform error while decoding UTF-16 should have occurred."); 4812 } 4813 4814 us.clear(); 4815 bs.clear(); 4816 4817 us.put((char)0xD902); us.put((char)0xDD01); us.put((char)0x0041); 4818 4819 us.limit(1); 4820 us.position(0); 4821 bs.limit(4); 4822 bs.position(0); 4823 4824 result = encoder.encode(us, bs, true); 4825 us.limit(3); 4826 us.position(0); 4827 bs.limit(3); 4828 bs.position(0); 4829 result = encoder.encode(us, bs, true); 4830 if (!result.isOverflow()) { 4831 errln("Overflow buffer while encoding UTF-16 should have occurred."); 4832 } 4833 4834 us.clear(); 4835 bs.clear(); 4836 4837 //test overflow buffer 4838 decoder.reset(); 4839 decoder = provider.charsetForName("UTF-16BE").newDecoder(); 4840 4841 bs.put((byte)0xFF); bs.put((byte)0xFE); bs.put((byte)0x41); 4842 4843 us.limit(0); 4844 us.position(0); 4845 bs.limit(3); 4846 bs.position(0); 4847 4848 result = decoder.decode(bs, us, true); 4849 if (!result.isOverflow()) { 4850 errln("Overflow buffer while decoding UTF-16 should have occurred."); 4851 } 4852 } 4853 4854 //provide better code coverage for Charset ISO-2022-KR 4855 @Test TestCharsetISO2022KR()4856 public void TestCharsetISO2022KR() { 4857 CoderResult result = CoderResult.UNDERFLOW; 4858 CharsetProvider provider = new CharsetProviderICU(); 4859 CharsetDecoder decoder = provider.charsetForName("ISO-2022-KR").newDecoder(); 4860 4861 byte bytearray[] = { 4862 (byte)0x1b, (byte)0x24, (byte)0x29, (byte)0x43, (byte)0x41, (byte)0x42, 4863 }; 4864 char chararray[] = { 4865 (char)0x0041 4866 }; 4867 ByteBuffer bb = ByteBuffer.wrap(bytearray); 4868 CharBuffer cb = CharBuffer.wrap(chararray); 4869 4870 result = decoder.decode(bb, cb, true); 4871 4872 if (!result.isOverflow()) { 4873 errln("Overflow buffer while decoding ISO-2022-KR should have occurred."); 4874 } 4875 } 4876 4877 //provide better code coverage for Charset ISO-2022-JP 4878 @Test TestCharsetISO2022JP()4879 public void TestCharsetISO2022JP() { 4880 CoderResult result = CoderResult.UNDERFLOW; 4881 CharsetProvider provider = new CharsetProviderICU(); 4882 CharsetDecoder decoder = provider.charsetForName("ISO-2022-JP-2").newDecoder(); 4883 4884 byte bytearray[] = { 4885 (byte)0x1b, (byte)0x24, (byte)0x28, (byte)0x44, (byte)0x0A, (byte)0x41, 4886 }; 4887 char chararray[] = { 4888 (char)0x000A 4889 }; 4890 ByteBuffer bb = ByteBuffer.wrap(bytearray); 4891 CharBuffer cb = CharBuffer.wrap(chararray); 4892 4893 result = decoder.decode(bb, cb, true); 4894 4895 if (!result.isOverflow()) { 4896 errln("Overflow buffer while decoding ISO-2022-KR should have occurred."); 4897 } 4898 } 4899 4900 //provide better code coverage for Charset ASCII 4901 @Test TestCharsetASCII()4902 public void TestCharsetASCII() { 4903 CoderResult result = CoderResult.UNDERFLOW; 4904 CharsetProvider provider = new CharsetProviderICU(); 4905 CharsetDecoder decoder = provider.charsetForName("US-ASCII").newDecoder(); 4906 4907 byte bytearray[] = { 4908 (byte)0x41 4909 }; 4910 char chararray[] = { 4911 (char)0x0041 4912 }; 4913 4914 ByteBuffer bb = ByteBuffer.wrap(bytearray); 4915 CharBuffer cb = CharBuffer.wrap(chararray); 4916 4917 result = decoder.decode(bb, cb, true); 4918 result = decoder.flush(cb); 4919 4920 if (result.isError()) { 4921 errln("Error occurred while decoding US-ASCII."); 4922 } 4923 } 4924 4925 // provide better code coverage for Charset Callbacks 4926 /* Different aspects of callbacks are being tested including using different context available */ 4927 @Test TestCharsetCallbacks()4928 public void TestCharsetCallbacks() { 4929 CoderResult result = CoderResult.UNDERFLOW; 4930 CharsetProvider provider = new CharsetProviderICU(); 4931 CharsetEncoder encoder = provider.charsetForName("iso-2022-jp").newEncoder(); 4932 CharsetDecoder decoder = provider.charsetForName("iso-2022-jp").newDecoder(); 4933 4934 String context3[] = { 4935 "i", 4936 "J" 4937 }; 4938 4939 // Testing encoder escape callback 4940 String context1[] = { 4941 "J", 4942 "C", 4943 "D", 4944 null 4945 }; 4946 char chararray[] = { 4947 (char)0xd122 4948 }; 4949 ByteBuffer bb = ByteBuffer.allocate(20); 4950 CharBuffer cb = CharBuffer.wrap(chararray); 4951 4952 ((CharsetEncoderICU)encoder).setFromUCallback(CoderResult.OVERFLOW, CharsetCallback.FROM_U_CALLBACK_ESCAPE, null); // This callback is not valid. 4953 for (int i = 0; i < context1.length; i++) { 4954 encoder.reset(); 4955 cb.position(0); 4956 bb.position(0); 4957 ((CharsetEncoderICU)encoder).setFromUCallback(CoderResult.unmappableForLength(1), CharsetCallback.FROM_U_CALLBACK_ESCAPE, context1[i]); // This callback is valid. 4958 4959 result = encoder.encode(cb, bb, true); 4960 if (result.isError()) { 4961 errln("Error occurred while testing of callbacks for ISO-2022-JP encoder."); 4962 } 4963 } 4964 4965 // Testing encoder skip callback 4966 for (int i = 0; i < context3.length; i++) { 4967 encoder.reset(); 4968 cb.position(0); 4969 bb.position(0); 4970 ((CharsetEncoderICU)encoder).setFromUCallback(CoderResult.unmappableForLength(1), CharsetCallback.FROM_U_CALLBACK_SKIP, context3[i]); 4971 4972 result = encoder.encode(cb, bb, true); 4973 if (result.isError() && i == 0) { 4974 errln("Error occurred while testing of callbacks for ISO-2022-JP encoder."); 4975 } 4976 } 4977 4978 // Testing encoder sub callback 4979 for (int i = 0; i < context3.length; i++) { 4980 encoder.reset(); 4981 cb.position(0); 4982 bb.position(0); 4983 ((CharsetEncoderICU)encoder).setFromUCallback(CoderResult.unmappableForLength(1), CharsetCallback.FROM_U_CALLBACK_SUBSTITUTE, context3[i]); 4984 4985 result = encoder.encode(cb, bb, true); 4986 if (result.isError() && i == 0) { 4987 errln("Error occurred while testing of callbacks for ISO-2022-JP encoder."); 4988 } 4989 } 4990 4991 // Testing decoder escape callback 4992 String context2[] = { 4993 "X", 4994 "C", 4995 "D", 4996 null 4997 }; 4998 byte bytearray[] = { 4999 (byte)0x1b, (byte)0x2e, (byte)0x43 5000 }; 5001 bb = ByteBuffer.wrap(bytearray); 5002 cb = CharBuffer.allocate(20); 5003 5004 ((CharsetDecoderICU)decoder).setToUCallback(CoderResult.OVERFLOW, CharsetCallback.TO_U_CALLBACK_ESCAPE, null); // This callback is not valid. 5005 for (int i = 0; i < context2.length; i++) { 5006 decoder.reset(); 5007 cb.position(0); 5008 bb.position(0); 5009 ((CharsetDecoderICU)decoder).setToUCallback(CoderResult.malformedForLength(1), CharsetCallback.TO_U_CALLBACK_ESCAPE, context2[i]); // This callback is valid. 5010 5011 result = decoder.decode(bb, cb, true); 5012 if (result.isError()) { 5013 errln("Error occurred while testing of callbacks for ISO-2022-JP decoder."); 5014 } 5015 } 5016 5017 // Testing decoder skip callback 5018 for (int i = 0; i < context3.length; i++) { 5019 decoder.reset(); 5020 cb.position(0); 5021 bb.position(0); 5022 ((CharsetDecoderICU)decoder).setToUCallback(CoderResult.malformedForLength(1), CharsetCallback.TO_U_CALLBACK_SKIP, context3[i]); 5023 result = decoder.decode(bb, cb, true); 5024 if (!result.isError()) { 5025 errln("Error occurred while testing of callbacks for ISO-2022-JP decoder should have occurred."); 5026 } 5027 } 5028 } 5029 5030 // Testing invalid input exceptions 5031 @Test TestInvalidInput()5032 public void TestInvalidInput() { 5033 CharsetProvider provider = new CharsetProviderICU(); 5034 Charset charset = provider.charsetForName("iso-2022-jp"); 5035 CharsetEncoder encoder = charset.newEncoder(); 5036 CharsetDecoder decoder = charset.newDecoder(); 5037 5038 try { 5039 encoder.encode(CharBuffer.allocate(10), null, true); 5040 errln("Illegal argument exception should have been thrown due to null target."); 5041 } catch (Exception ex) { 5042 } 5043 5044 try { 5045 decoder.decode(ByteBuffer.allocate(10), null, true); 5046 errln("Illegal argument exception should have been thrown due to null target."); 5047 } catch (Exception ex) { 5048 } 5049 } 5050 5051 // Test java canonical names 5052 @Test TestGetICUJavaCanonicalNames()5053 public void TestGetICUJavaCanonicalNames() { 5054 // Ambiguous charset name. 5055 String javaCName = CharsetProviderICU.getJavaCanonicalName("windows-1250"); 5056 String icuCName = CharsetProviderICU.getICUCanonicalName("Windows-1250"); 5057 if (javaCName == null || icuCName == null) { 5058 errln("Unable to get Java or ICU canonical name from ambiguous alias"); 5059 } 5060 5061 } 5062 5063 // Port over from ICU4C for test conversion tables (mbcs version 5.x) 5064 // Provide better code coverage in CharsetMBCS, CharsetDecoderICU, and CharsetEncoderICU. 5065 @Test TestCharsetTestData()5066 public void TestCharsetTestData() { 5067 CoderResult result = CoderResult.UNDERFLOW; 5068 String charsetName = "test4"; 5069 CharsetProvider provider = new CharsetProviderICU(); 5070 Charset charset = ((CharsetProviderICU)provider).charsetForName(charsetName, "com/ibm/icu/dev/data/testdata", 5071 this.getClass().getClassLoader()); 5072 CharsetEncoder encoder = charset.newEncoder(); 5073 CharsetDecoder decoder = charset.newDecoder(); 5074 5075 byte bytearray[] = { 5076 0x01, 0x02, 0x03, 0x0a, 5077 0x01, 0x02, 0x03, 0x0b, 5078 0x01, 0x02, 0x03, 0x0d, 5079 }; 5080 5081 // set the callback for overflow errors 5082 ((CharsetDecoderICU)decoder).setToUCallback(CoderResult.OVERFLOW, CharsetCallback.TO_U_CALLBACK_STOP, null); 5083 5084 ByteBuffer bb = ByteBuffer.wrap(bytearray); 5085 CharBuffer cb = CharBuffer.allocate(10); 5086 5087 bb.limit(4); 5088 cb.limit(1); // Overflow should occur and is expected 5089 result = decoder.decode(bb, cb, false); 5090 if (result.isError()) { 5091 errln("Error occurred while decoding: " + charsetName + " with error: " + result); 5092 } 5093 5094 bb.limit(8); 5095 result = decoder.decode(bb, cb, false); 5096 if (result.isError()) { 5097 errln("Error occurred while decoding: " + charsetName + " with error: " + result); 5098 } 5099 5100 bb.limit(12); 5101 result = decoder.decode(bb, cb, true); 5102 if (result.isError()) { 5103 errln("Error occurred while decoding: " + charsetName + " with error: " + result); 5104 } 5105 5106 char chararray[] = { 5107 0xDBC4,0xDE34,0xD900,0xDC05,/* \U00101234\U00050005 */ 5108 0xD940, /* first half of \U00060006 or \U00060007 */ 5109 0xDC07/* second half of \U00060007 */ 5110 }; 5111 5112 cb = CharBuffer.wrap(chararray); 5113 bb = ByteBuffer.allocate(10); 5114 5115 bb.limit(2); 5116 cb.limit(4); 5117 result = encoder.encode(cb, bb, false); 5118 if (result.isError()) { 5119 errln("Error occurred while encoding: " + charsetName + " with error: " + result); 5120 } 5121 cb.limit(5); 5122 result = encoder.encode(cb, bb, false); 5123 if (result.isError()) { 5124 errln("Error occurred while encoding: " + charsetName + " with error: " + result); 5125 } 5126 cb.limit(6); 5127 result = encoder.encode(cb, bb, true); 5128 if (!result.isError()) { 5129 errln("Error should have occurred while encoding: " + charsetName); 5130 } 5131 } 5132 5133 /* Round trip test of SCSU converter*/ 5134 @Test TestSCSUConverter()5135 public void TestSCSUConverter(){ 5136 byte allFeaturesSCSU[]={ 5137 0x41,(byte) 0xdf, 0x12,(byte) 0x81, 0x03, 0x5f, 0x10, (byte)0xdf, 0x1b, 0x03, 5138 (byte)0xdf, 0x1c,(byte) 0x88,(byte) 0x80, 0x0b, (byte)0xbf,(byte) 0xff,(byte) 0xff, 0x0d, 0x0a, 5139 0x41, 0x10, (byte)0xdf, 0x12, (byte)0x81, 0x03, 0x5f, 0x10, (byte)0xdf, 0x13, 5140 (byte)0xdf, 0x14,(byte) 0x80, 0x15, (byte)0xff 5141 }; 5142 5143 char allFeaturesUTF16[]={ 5144 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff, 5145 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 5146 0x01df, 0xf000, 0xdbff, 0xdfff 5147 }; 5148 5149 5150 char germanUTF16[]={ 5151 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074 5152 }; 5153 5154 byte germanSCSU[]={ 5155 (byte)0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65,(byte) 0xdf, 0x74 5156 }; 5157 5158 char russianUTF16[]={ 5159 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430 5160 }; 5161 5162 byte russianSCSU[]={ 5163 0x12, (byte)0x9c,(byte)0xbe,(byte) 0xc1, (byte)0xba, (byte)0xb2, (byte)0xb0 5164 }; 5165 5166 char japaneseUTF16[]={ 5167 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b, 5168 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3, 5169 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b, 5170 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4, 5171 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a, 5172 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044, 5173 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3, 5174 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd, 5175 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de, 5176 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09, 5177 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b, 5178 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068, 5179 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1, 5180 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9, 5181 0x307e, 0x3067, 0x3042, 0x308b, 0x3002 5182 }; 5183 5184 // SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice: 5185 //it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient 5186 byte japaneseSCSU[]={ 5187 0x08, 0x00, 0x1b, 0x4c,(byte) 0xea, 0x16, (byte)0xca, (byte)0xd3,(byte) 0x94, 0x0f, 0x53, (byte)0xef, 0x61, 0x1b, (byte)0xe5,(byte) 0x84, 5188 (byte)0xc4, 0x0f, (byte)0x53,(byte) 0xef, 0x61, 0x1b, (byte)0xe5, (byte)0x84, (byte)0xc4, 0x16, (byte)0xca, (byte)0xd3, (byte)0x94, 0x08, 0x02, 0x0f, 5189 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, (byte)0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41,(byte) 0x88, 0x4c, 5190 (byte) 0xe5,(byte) 0x97, (byte)0x9f, 0x08, 0x0c, 0x16,(byte) 0xca,(byte) 0xd3, (byte)0x94, 0x15, (byte)0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 5191 (byte) 0x8c, (byte)0xb4, (byte)0xa3,(byte) 0x9f,(byte) 0xca, (byte)0x99, (byte)0xcb,(byte) 0x8b, (byte)0xc2,(byte) 0x97,(byte) 0xcc,(byte) 0xaa,(byte) 0x84, 0x08, 0x02, 0x0e, 5192 0x7c, 0x73, (byte)0xe2, 0x16, (byte)0xa3,(byte) 0xb7, (byte)0xcb, (byte)0x93, (byte)0xd3,(byte) 0xb4,(byte) 0xc5, (byte)0xdc, (byte)0x9f, 0x0e, 0x79, 0x3e, 5193 0x06, (byte)0xae, (byte)0xb1, (byte)0x9d,(byte) 0x93, (byte)0xd3, 0x08, 0x0c, (byte)0xbe,(byte) 0xa3, (byte)0x8f, 0x08,(byte) 0x88,(byte) 0xbe,(byte) 0xa3,(byte) 0x8d, 5194 (byte)0xd3,(byte) 0xa8, (byte)0xa3, (byte)0x97,(byte) 0xc5, 0x17,(byte) 0x89, 0x08, 0x0d, 0x15,(byte) 0xd2, 0x08, 0x01, (byte)0x93, (byte)0xc8,(byte) 0xaa, 5195 (byte)0x8f, 0x0e, 0x61, 0x1b, (byte)0x99,(byte) 0xcb, 0x0e, 0x4e, (byte)0xba, (byte)0x9f, (byte)0xa1,(byte) 0xae,(byte) 0x93, (byte)0xa8,(byte) 0xa0, 0x08, 5196 0x02, 0x08, 0x0c, (byte)0xe2, 0x16, (byte)0xa3, (byte)0xb7, (byte)0xcb, 0x0f, 0x4f,(byte) 0xe1,(byte) 0x80, 0x05,(byte) 0xec, 0x60, (byte)0x8d, 5197 (byte)0xea, 0x06,(byte) 0xd3,(byte) 0xe6, 0x0f,(byte) 0x8a, 0x00, 0x30, 0x44, 0x65,(byte) 0xb9, (byte)0xe4, (byte)0xfe,(byte) 0xe7,(byte) 0xc2, 0x06, 5198 (byte)0xcb, (byte)0x82 5199 }; 5200 5201 CharsetProviderICU cs = new CharsetProviderICU(); 5202 CharsetICU charset = (CharsetICU)cs.charsetForName("scsu"); 5203 CharsetDecoder decode = charset.newDecoder(); 5204 CharsetEncoder encode = charset.newEncoder(); 5205 5206 //String[] codePoints = {"allFeatures", "german","russian","japanese"}; 5207 byte[][] fromUnicode={allFeaturesSCSU,germanSCSU,russianSCSU,japaneseSCSU}; 5208 char[][] toUnicode = {allFeaturesUTF16, germanUTF16,russianUTF16,japaneseUTF16}; 5209 5210 for(int i=0;i<4;i++){ 5211 ByteBuffer decoderBuffer = ByteBuffer.wrap(fromUnicode[i]); 5212 CharBuffer encoderBuffer = CharBuffer.wrap(toUnicode[i]); 5213 5214 try{ 5215 // Decoding 5216 CharBuffer decoderResult = decode.decode(decoderBuffer); 5217 encoderBuffer.position(0); 5218 if(!decoderResult.equals(encoderBuffer)){ 5219 errln("Error occured while decoding "+ charset.name()); 5220 } 5221 // Encoding 5222 ByteBuffer encoderResult = encode.encode(encoderBuffer); 5223 // RoundTrip Test 5224 ByteBuffer roundTrip = encoderResult; 5225 CharBuffer roundTripResult = decode.decode(roundTrip); 5226 encoderBuffer.position(0); 5227 if(!roundTripResult.equals(encoderBuffer)){ 5228 errln("Error occured while encoding "+ charset.name()); 5229 } 5230 // Test overflow for code coverage reasons 5231 if (i == 0) { 5232 ByteBuffer test = encoderResult; 5233 test.position(0); 5234 CharBuffer smallBuffer = CharBuffer.allocate(11); 5235 decode.reset(); 5236 CoderResult status = decode.decode(test, smallBuffer, true); 5237 if (status != CoderResult.OVERFLOW) { 5238 errln("Overflow buffer error should have been thrown."); 5239 } 5240 } 5241 }catch(Exception e){ 5242 errln("Exception while converting SCSU thrown: " + e); 5243 } 5244 } 5245 5246 /* Provide better code coverage */ 5247 /* testing illegal codepoints */ 5248 CoderResult illegalResult = CoderResult.UNDERFLOW; 5249 CharBuffer illegalDecoderTrgt = CharBuffer.allocate(10); 5250 5251 byte[] illegalDecoderSrc1 = { (byte)0x41, (byte)0xdf, (byte)0x0c }; 5252 decode.reset(); 5253 illegalResult = decode.decode(ByteBuffer.wrap(illegalDecoderSrc1), illegalDecoderTrgt, true); 5254 if (illegalResult == CoderResult.OVERFLOW || illegalResult == CoderResult.UNDERFLOW) { 5255 errln("Malformed error should have been returned for decoder " + charset.name()); 5256 } 5257 /* code coverage test from nucnvtst.c in ICU4C */ 5258 CoderResult ccResult = CoderResult.UNDERFLOW; 5259 int CCBufSize = 120 * 10; 5260 ByteBuffer trgt = ByteBuffer.allocate(CCBufSize); 5261 CharBuffer test = CharBuffer.allocate(CCBufSize); 5262 String [] ccSrc = { 5263 "\ud800\udc00", /* smallest surrogate*/ 5264 "\ud8ff\udcff", 5265 "\udBff\udFff", /* largest surrogate pair*/ 5266 "\ud834\udc00", 5267 //"\U0010FFFF", 5268 "Hello \u9292 \u9192 World!", 5269 "Hell\u0429o \u9292 \u9192 W\u00e4rld!", 5270 "Hell\u0429o \u9292 \u9292W\u00e4rld!", 5271 5272 "\u0648\u06c8", /* catch missing reset*/ 5273 "\u0648\u06c8", 5274 5275 "\u4444\uE001", /* lowest quotable*/ 5276 "\u4444\uf2FF", /* highest quotable*/ 5277 "\u4444\uf188\u4444", 5278 "\u4444\uf188\uf288", 5279 "\u4444\uf188abc\u0429\uf288", 5280 "\u9292\u2222", 5281 "Hell\u0429\u04230o \u9292 \u9292W\u00e4\u0192rld!", 5282 "Hell\u0429o \u9292 \u9292W\u00e4rld!", 5283 "Hello World!123456", 5284 "Hello W\u0081\u011f\u0082!", /* Latin 1 run*/ 5285 5286 "abc\u0301\u0302", /* uses SQn for u301 u302*/ 5287 "abc\u4411d", /* uses SQU*/ 5288 "abc\u4411\u4412d",/* uses SCU*/ 5289 "abc\u0401\u0402\u047f\u00a5\u0405", /* uses SQn for ua5*/ 5290 "\u9191\u9191\u3041\u9191\u3041\u3041\u3000", /* SJIS like data*/ 5291 "\u9292\u2222", 5292 "\u9191\u9191\u3041\u9191\u3041\u3041\u3000", 5293 "\u9999\u3051\u300c\u9999\u9999\u3060\u9999\u3065\u3065\u3065\u300c", 5294 "\u3000\u266a\u30ea\u30f3\u30b4\u53ef\u611b\u3044\u3084\u53ef\u611b\u3044\u3084\u30ea\u30f3\u30b4\u3002", 5295 5296 "", /* empty input*/ 5297 "\u0000", /* smallest BMP character*/ 5298 "\uFFFF", /* largest BMP character*/ 5299 5300 /* regression tests*/ 5301 "\u6441\ub413\ua733\uf8fe\ueedb\u587f\u195f\u4899\uf23d\u49fd\u0aac\u5792\ufc22\ufc3c\ufc46\u00aa", 5302 /*"\u00df\u01df\uf000\udbff\udfff\u000d\n\u0041\u00df\u0401\u015f\u00df\u01df\uf000\udbff\udfff",*/ 5303 "\u30f9\u8321\u05e5\u181c\ud72b\u2019\u99c9\u2f2f\uc10c\u82e1\u2c4d\u1ebc\u6013\u66dc\ubbde\u94a5\u4726\u74af\u3083\u55b9\u000c", 5304 "\u0041\u00df\u0401\u015f", 5305 "\u9066\u2123abc", 5306 //"\ud266\u43d7\ue386\uc9c0\u4a6b\u9222\u901f\u7410\ua63f\u539b\u9596\u482e\u9d47\ucfe4\u7b71\uc280\uf26a\u982f\u862a\u4edd\uf513\ufda6\u869d\u2ee0\ua216\u3ff6\u3c70\u89c0\u9576\ud5ec\ubfda\u6cca\u5bb3\ubcea\u554c\u914e\ufa4a\uede3\u2990\ud2f5\u2729\u5141\u0f26\uccd8\u5413\ud196\ubbe2\u51b9\u9b48\u0dc8\u2195\u21a2\u21e9\u00e4\u9d92\u0bc0\u06c5", 5307 "\uf95b\u2458\u2468\u0e20\uf51b\ue36e\ubfc1\u0080\u02dd\uf1b5\u0cf3\u6059\u7489", 5308 }; 5309 for (int i = 0; i < ccSrc.length; i++) { 5310 CharBuffer ubuf = CharBuffer.wrap(ccSrc[i]); 5311 encode.reset(); 5312 decode.reset(); 5313 trgt.clear(); 5314 test.clear(); 5315 ccResult = encode.encode(ubuf, trgt, true); 5316 if (ccResult.isError()) { 5317 errln("Error while encoding " + charset.name() + " in test for code coverage[" + i + "]."); 5318 } else { 5319 trgt.limit(trgt.position()); 5320 trgt.position(0); 5321 ccResult = decode.decode(trgt, test, true); 5322 if (ccResult.isError()) { 5323 errln("Error while decoding " + charset.name() + " in test for code coverage[" + i + "]."); 5324 } else { 5325 ubuf.position(0); 5326 test.limit(test.position()); 5327 test.position(0); 5328 if (!equals(test, ubuf)) { 5329 errln("Roundtrip failed for " + charset.name() + " in test for code coverage[" + i + "]."); 5330 } 5331 } 5332 } 5333 } 5334 5335 /* Monkey test */ 5336 { 5337 char[] monkeyIn = { 5338 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A, 5339 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A, 5340 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A, 5341 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A, 5342 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A, 5343 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A, 5344 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A, 5345 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A, 5346 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A, 5347 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A, 5348 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A, 5349 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 5350 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 5351 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 5352 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A, 5353 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 5354 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A, 5355 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A, 5356 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A, 5357 /* test non-BMP code points */ 5358 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F, 5359 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8, 5360 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF, 5361 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6, 5362 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB, 5363 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0, 5364 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8, 5365 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF, 5366 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4, 5367 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF, 5368 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF, 5369 5370 5371 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A, 5372 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A, 5373 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A, 5374 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 5375 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, 5376 }; 5377 encode.reset(); 5378 decode.reset(); 5379 CharBuffer monkeyCB = CharBuffer.wrap(monkeyIn); 5380 try { 5381 ByteBuffer monkeyBB = encode.encode(monkeyCB); 5382 /* CharBuffer monkeyEndResult =*/ decode.decode(monkeyBB); 5383 5384 } catch (Exception ex) { 5385 errln("Exception thrown while encoding/decoding monkey test in SCSU: " + ex); 5386 } 5387 } 5388 // Test malformed 5389 { 5390 char[] malformedSequence = { 5391 0xD899, 0xDC7F, 0xDC88, 0xDC88, 0xD888, 0xDDF9 5392 }; 5393 encode.reset(); 5394 CharBuffer malformedSrc = CharBuffer.wrap(malformedSequence); 5395 5396 try { 5397 encode.encode(malformedSrc); 5398 errln("Malformed error should have thrown an exception."); 5399 } catch (Exception ex) { 5400 } 5401 } 5402 // Test overflow buffer 5403 { 5404 ByteBuffer overflowTest = ByteBuffer.wrap(allFeaturesSCSU); 5405 int sizes[] = { 8, 2, 11 }; 5406 for (int i = 0; i < sizes.length; i++) { 5407 try { 5408 decode.reset(); 5409 overflowTest.position(0); 5410 smBufDecode(decode, "SCSU overflow test", overflowTest, CharBuffer.allocate(sizes[i]), true, false); 5411 errln("Buffer overflow exception should have been thrown."); 5412 } catch (BufferOverflowException ex) { 5413 } catch (Exception ex) { 5414 errln("Buffer overflow exception should have been thrown."); 5415 } 5416 } 5417 5418 } 5419 } 5420 5421 /* Test for BOCU1 converter*/ 5422 @Test TestBOCU1Converter()5423 public void TestBOCU1Converter(){ 5424 char expected[]={ 5425 0xFEFF, 0x0061, 0x0062, 0x0020, // 0 5426 0x0063, 0x0061, 0x000D, 0x000A, 5427 5428 0x0020, 0x0000, 0x00DF, 0x00E6, // 8 5429 0x0930, 0x0020, 0x0918, 0x0909, 5430 5431 0x3086, 0x304D, 0x0020, 0x3053, // 16 5432 0x4000, 0x4E00, 0x7777, 0x0020, 5433 5434 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, // 24 5435 0x0020, 0xD7A3, 0xDC00, 0xD800, 5436 5437 0xD800, 0xDC00, 0xD845, 0xDDDD, // 32 5438 0xDBBB, 0xDDEE, 0x0020, 0xDBFF, 5439 5440 0xDFFF, 0x0001, 0x0E40, 0x0020, // 40 5441 0x0009 5442 }; 5443 5444 byte sampleText[]={ // from cintltst/bocu1tst.c/TestBOCU1 text 1 5445 (byte) 0xFB, 5446 (byte) 0xEE, 5447 0x28, // from source offset 0 5448 0x24, 0x1E, 0x52, (byte) 0xB2, 0x20, 5449 (byte) 0xB3, 5450 (byte) 0xB1, 5451 0x0D, 5452 0x0A, 5453 5454 0x20, // from 8 5455 0x00, (byte) 0xD0, 0x6C, (byte) 0xB6, (byte) 0xD8, (byte) 0xA5, 5456 0x20, 0x68, 5457 0x59, 5458 5459 (byte) 0xF9, 5460 0x28, // from 16 5461 0x6D, 0x20, 0x73, (byte) 0xE0, 0x2D, (byte) 0xDE, 0x43, 5462 (byte) 0xD0, 0x33, 0x20, 5463 5464 (byte) 0xFA, 5465 (byte) 0x83, // from 24 5466 0x25, 0x01, (byte) 0xFB, 0x16, (byte) 0x87, 0x4B, 0x16, 0x20, 5467 (byte) 0xE6, (byte) 0xBD, (byte) 0xEB, 0x5B, 0x4B, (byte) 0xCC, 5468 5469 (byte) 0xF9, 5470 (byte) 0xA2, // from 32 5471 (byte) 0xFC, 0x10, 0x3E, (byte) 0xFE, 0x16, 0x3A, (byte) 0x8C, 5472 0x20, (byte) 0xFC, 0x03, (byte) 0xAC, 5473 5474 0x01, /// from 41 5475 (byte) 0xDE, (byte) 0x83, 0x20, 0x09 5476 }; 5477 5478 CharsetProviderICU cs = new CharsetProviderICU(); 5479 CharsetICU charset = (CharsetICU)cs.charsetForName("BOCU-1"); 5480 CharsetDecoder decode = charset.newDecoder(); 5481 CharsetEncoder encode = charset.newEncoder(); 5482 5483 ByteBuffer decoderBuffer = ByteBuffer.wrap(sampleText); 5484 CharBuffer encoderBuffer = CharBuffer.wrap(expected); 5485 try{ 5486 // Decoding 5487 CharBuffer decoderResult = decode.decode(decoderBuffer); 5488 5489 encoderBuffer.position(0); 5490 if(!decoderResult.equals(encoderBuffer)){ 5491 errln("Error occured while decoding "+ charset.name()); 5492 } 5493 // Encoding 5494 ByteBuffer encoderResult = encode.encode(encoderBuffer); 5495 // RoundTrip Test 5496 ByteBuffer roundTrip = encoderResult; 5497 CharBuffer roundTripResult = decode.decode(roundTrip); 5498 5499 encoderBuffer.position(0); 5500 if(!roundTripResult.equals(encoderBuffer)){ 5501 errln("Error occured while encoding "+ charset.name()); 5502 } 5503 }catch(Exception e){ 5504 errln("Exception while converting BOCU-1 thrown: " + e); 5505 } 5506 } 5507 5508 /* Test that ICU4C and ICU4J get the same ICU canonical name when given the same alias. */ 5509 @Test TestICUCanonicalNameConsistency()5510 public void TestICUCanonicalNameConsistency() { 5511 String[] alias = { 5512 "KSC_5601" 5513 }; 5514 String[] expected = { 5515 "windows-949-2000" 5516 }; 5517 5518 for (int i = 0; i < alias.length; i++) { 5519 String name = CharsetProviderICU.getICUCanonicalName(alias[i]); 5520 if (!name.equals(expected[i])) { 5521 errln("The ICU canonical name in ICU4J does not match that in ICU4C. Result: " + name + "Expected: " + expected[i]); 5522 } 5523 } 5524 } 5525 5526 /* Increase code coverage for CharsetICU and CharsetProviderICU*/ 5527 @Test TestCharsetICUCodeCoverage()5528 public void TestCharsetICUCodeCoverage() { 5529 CharsetProviderICU provider = new CharsetProviderICU(); 5530 5531 if (provider.charsetForName("UTF16", null) != null) { 5532 errln("charsetForName should have returned a null"); 5533 } 5534 5535 if (CharsetProviderICU.getJavaCanonicalName(null) != null) { 5536 errln("getJavaCanonicalName should have returned a null when null is given to it."); 5537 } 5538 5539 try { 5540 Charset testCharset = CharsetICU.forNameICU("bogus"); 5541 errln("UnsupportedCharsetException should be thrown for charset \"bogus\" - but got charset " + testCharset.name()); 5542 } catch (UnsupportedCharsetException ex) { 5543 logln("UnsupportedCharsetException was thrown for CharsetICU.forNameICU(\"bogus\")"); 5544 } 5545 5546 Charset charset = provider.charsetForName("UTF16"); 5547 5548 try { 5549 ((CharsetICU)charset).getUnicodeSet(null, 0); 5550 } catch (IllegalArgumentException ex) { 5551 return; 5552 } 5553 errln("IllegalArgumentException should have been thrown."); 5554 } 5555 5556 @Test TestCharsetLMBCS()5557 public void TestCharsetLMBCS() { 5558 String []lmbcsNames = { 5559 "LMBCS-1", 5560 "LMBCS-2", 5561 "LMBCS-3", 5562 "LMBCS-4", 5563 "LMBCS-5", 5564 "LMBCS-6", 5565 "LMBCS-8", 5566 "LMBCS-11", 5567 "LMBCS-16", 5568 "LMBCS-17", 5569 "LMBCS-18", 5570 "LMBCS-19" 5571 }; 5572 5573 char[] src = { 5574 0x0192, 0x0041, 0x0061, 0x00D0, 0x00F6, 0x0100, 0x0174, 0x02E4, 0x03F5, 0x03FB, 5575 0x05D3, 0x05D4, 0x05EA, 0x0684, 0x0685, 0x1801, 0x11B3, 0x11E8, 0x1F9A, 0x2EB4, 5576 0x3157, 0x3336, 0x3304, 0xD881, 0xDC88 5577 }; 5578 CharBuffer cbInput = CharBuffer.wrap(src); 5579 5580 CharsetProviderICU provider = new CharsetProviderICU(); 5581 5582 for (int i = 0; i < lmbcsNames.length; i++) { 5583 Charset charset = provider.charsetForName(lmbcsNames[i]); 5584 if (charset == null) { 5585 errln("Unable to create LMBCS charset: " + lmbcsNames[i]); 5586 return; 5587 } 5588 CharsetEncoder encoder = charset.newEncoder(); 5589 CharsetDecoder decoder = charset.newDecoder(); 5590 5591 try { 5592 cbInput.position(0); 5593 ByteBuffer bbTmp = encoder.encode(cbInput); 5594 CharBuffer cbOutput = decoder.decode(bbTmp); 5595 5596 if (!equals(cbInput, cbOutput)) { 5597 errln("Roundtrip test failed for charset: " + lmbcsNames[i]); 5598 } 5599 } catch (Exception ex) { 5600 if (i >= 8) { 5601 /* Expected exceptions */ 5602 continue; 5603 } 5604 errln("Exception thrown: " + ex + " while using charset: " + lmbcsNames[i]); 5605 } 5606 5607 } 5608 5609 // Test malformed 5610 CoderResult malformedResult = CoderResult.UNDERFLOW; 5611 byte[] malformedBytes = { 5612 (byte)0x61, (byte)0x01, (byte)0x29, (byte)0x81, (byte)0xa0, (byte)0x0f 5613 }; 5614 ByteBuffer malformedSrc = ByteBuffer.wrap(malformedBytes); 5615 CharBuffer malformedTrgt = CharBuffer.allocate(10); 5616 int[] malformedLimits = { 5617 2, 6 5618 }; 5619 CharsetDecoder malformedDecoderTest = provider.charsetForName("LMBCS-1").newDecoder(); 5620 for (int n = 0; n < malformedLimits.length; n++) { 5621 malformedDecoderTest.reset(); 5622 5623 malformedSrc.position(0); 5624 malformedSrc.limit(malformedLimits[n]); 5625 5626 malformedTrgt.clear(); 5627 5628 malformedResult = malformedDecoderTest.decode(malformedSrc,malformedTrgt, true); 5629 if (!malformedResult.isMalformed()) { 5630 errln("Malformed error should have resulted."); 5631 } 5632 } 5633 } 5634 5635 /* 5636 * This is a port of ICU4C TestAmbiguousConverter in cintltst. 5637 * Since there is no concept of ambiguous converters in ICU4J 5638 * this test is merely for code coverage reasons. 5639 */ 5640 @Test TestAmbiguousConverter()5641 public void TestAmbiguousConverter() { 5642 byte [] inBytes = { 5643 0x61, 0x5b, 0x5c 5644 }; 5645 ByteBuffer src = ByteBuffer.wrap(inBytes); 5646 CharBuffer trgt = CharBuffer.allocate(20); 5647 5648 CoderResult result = CoderResult.UNDERFLOW; 5649 CharsetProviderICU provider = new CharsetProviderICU(); 5650 String[] names = CharsetProviderICU.getAllNames(); 5651 5652 for (int i = 0; i < names.length; i++) { 5653 Charset charset = provider.charsetForName(names[i]); 5654 if (charset == null) { 5655 /* We don't care about any failures because not all converters are available. */ 5656 continue; 5657 } 5658 CharsetDecoder decoder = charset.newDecoder(); 5659 5660 src.position(0); 5661 trgt.clear(); 5662 5663 result = decoder.decode(src, trgt, true); 5664 if (result.isError()) { 5665 /* We don't care about any failures. */ 5666 continue; 5667 } 5668 } 5669 } 5670 5671 @Test TestIsFixedWidth()5672 public void TestIsFixedWidth(){ 5673 String[] fixedWidth = { 5674 "US-ASCII", 5675 "UTF32", 5676 "ibm-5478_P100-1995" 5677 }; 5678 5679 String[] notFixedWidth = { 5680 "GB18030", 5681 "UTF8", 5682 "windows-949-2000", 5683 "UTF16" 5684 }; 5685 CharsetProvider provider = new CharsetProviderICU(); 5686 Charset charset; 5687 5688 for (int i = 0; i < fixedWidth.length; i++) { 5689 charset = provider.charsetForName(fixedWidth[i]); 5690 5691 if (!((CharsetICU)charset).isFixedWidth()) { 5692 errln(fixedWidth[i] + " is a fixedWidth charset but returned false."); 5693 } 5694 } 5695 5696 for (int i = 0; i < notFixedWidth.length; i++) { 5697 charset = provider.charsetForName(notFixedWidth[i]); 5698 5699 if (((CharsetICU)charset).isFixedWidth()) { 5700 errln(notFixedWidth[i] + " is NOT a fixedWidth charset but returned true."); 5701 } 5702 } 5703 } 5704 5705 @Test TestBytesLengthForString()5706 public void TestBytesLengthForString() { 5707 CharsetProviderICU provider = new CharsetProviderICU(); 5708 String[] charsets = { 5709 "windows-949-2000", 5710 "ibm-1047_P100-1995,swaplfnl", 5711 "ibm-930_P120-1999", 5712 "ISCII,version=0", 5713 "ISO_2022,locale=ko,version=0" 5714 }; 5715 5716 int[] expected = { 5717 40, 5718 20, 5719 80, /* changed from 60 to 80 to reflect the updates by #9205 */ 5720 80, 5721 160 5722 }; 5723 5724 int stringLength = 10; 5725 int length; 5726 int maxCharSize; 5727 5728 for (int i = 0; i < charsets.length; i++) { 5729 maxCharSize = (int)provider.charsetForName(charsets[i]).newEncoder().maxBytesPerChar(); 5730 length = CharsetEncoderICU.getMaxBytesForString(stringLength, maxCharSize); 5731 5732 if (length != expected[i]) { 5733 errln("For charset " + charsets[i] + " with string length " + stringLength + ", expected max byte length is " + expected[i] + " but got " + length); 5734 } 5735 } 5736 } 5737 5738 /* 5739 * When converting slices of a larger CharBuffer, Charset88591 and CharsetASCII does not handle the buffer correctly when 5740 * an unmappable character occurs. 5741 * Ticket #8729 5742 */ 5743 @Test TestCharsetASCII8859BufferHandling()5744 public void TestCharsetASCII8859BufferHandling() { 5745 String firstLine = "C077693790=|MEMO=|00=|022=|Blanche st and the driveway grate was fault and rotated under my car=|\r\n"; 5746 String secondLine = "C077693790=|MEMO=|00=|023=|puncturing the fuel tank. I spoke to the store operator (Ram Reddi –=|\r\n"; 5747 5748 String charsetNames[] = { 5749 "ASCII", 5750 "ISO-8859-1" 5751 }; 5752 5753 CoderResult result = CoderResult.UNDERFLOW; 5754 5755 CharsetEncoder encoder; 5756 5757 ByteBuffer outBuffer = ByteBuffer.allocate(500); 5758 CharBuffer charBuffer = CharBuffer.allocate(firstLine.length() + secondLine.length()); 5759 charBuffer.put(firstLine); 5760 charBuffer.put(secondLine); 5761 charBuffer.flip(); 5762 5763 for (int i = 0; i < charsetNames.length; i++) { 5764 encoder = CharsetICU.forNameICU(charsetNames[i]).newEncoder(); 5765 5766 charBuffer.position(firstLine.length()); 5767 CharBuffer charBufferSlice = charBuffer.slice(); 5768 charBufferSlice.limit(secondLine.length() - 2); 5769 5770 5771 try { 5772 result = encoder.encode(charBufferSlice, outBuffer, false); 5773 if (!result.isUnmappable()) { 5774 errln("Result of encoding " + charsetNames[i] + " should be: \"Unmappable\". Instead got: " + result); 5775 } 5776 } catch (IllegalArgumentException ex) { 5777 errln("IllegalArgumentException should not have been thrown when encoding: " + charsetNames[i]); 5778 } 5779 } 5780 } 5781 5782 /* 5783 * When converting with the String method getBytes(), buffer overflow exception is thrown because 5784 * of the way ICU4J is calculating the max bytes per char. This should be changed only on the ICU4J 5785 * side to match what the Java method is expecting. The ICU4C size will be left unchanged. 5786 * Ticket #9205 5787 */ 5788 @Test TestBufferOverflowErrorUsingJavagetBytes()5789 public void TestBufferOverflowErrorUsingJavagetBytes() { 5790 String charsetName = "ibm-5035"; 5791 String testCase = "\u7d42"; 5792 5793 try { 5794 testCase.getBytes(charsetName); 5795 } catch (Exception ex) { 5796 errln("Error calling getBytes(): " + ex); 5797 } 5798 5799 } 5800 5801 @Test TestDefaultIgnorableCallback()5802 public void TestDefaultIgnorableCallback() { 5803 String cnv_name = "euc-jp-2007"; 5804 String pattern_ignorable = "[:Default_Ignorable_Code_Point:]"; 5805 String pattern_not_ignorable = "[:^Default_Ignorable_Code_Point:]"; 5806 UnicodeSet set_ignorable = new UnicodeSet(pattern_ignorable); 5807 UnicodeSet set_not_ignorable = new UnicodeSet(pattern_not_ignorable); 5808 CharsetEncoder encoder = CharsetICU.forNameICU(cnv_name).newEncoder(); 5809 5810 // set callback for the converter 5811 encoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 5812 encoder.onMalformedInput(CodingErrorAction.REPLACE); 5813 5814 // test ignorable code points are ignored 5815 int size = set_ignorable.size(); 5816 for (int i = 0; i < size; i++) { 5817 encoder.reset(); 5818 try { 5819 if(encoder.encode(CharBuffer.wrap(Character.toChars(set_ignorable.charAt(i)))).limit() > 0) { 5820 errln("Callback should have ignore default ignorable: U+" + Integer.toHexString(set_ignorable.charAt(i))); 5821 } 5822 } catch (Exception ex) { 5823 errln("Error received converting +" + Integer.toHexString(set_ignorable.charAt(i))); 5824 } 5825 } 5826 5827 // test non-ignorable code points are not ignored 5828 size = set_not_ignorable.size(); 5829 for (int i = 0; i < size; i++) { 5830 encoder.reset(); 5831 try { 5832 if(encoder.encode(CharBuffer.wrap(Character.toChars(set_not_ignorable.charAt(i)))).limit() == 0) { 5833 errln("Callback should not have ignored: U+" + Integer.toHexString(set_not_ignorable.charAt(i))); 5834 } 5835 } catch (Exception ex) { 5836 errln("Error received converting U+" + Integer.toHexString(set_not_ignorable.charAt(i))); 5837 } 5838 } 5839 } 5840 } 5841