1 /** 2 ******************************************************************************* 3 * Copyright (C) 1996-2006, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 * 7 * 8 ******************************************************************************* 9 */ 10 /* 11 * (C) Copyright IBM Corp. 2000 - All Rights Reserved 12 * A JNI wrapper to ICU native converter Interface 13 * @author: Ram Viswanadha 14 */ 15 16 #define LOG_TAG "NativeConverter" 17 18 #include <stdlib.h> 19 #include <string.h> 20 21 #include <memory> 22 #include <vector> 23 24 #include <android/log.h> 25 #include <nativehelper/JNIHelp.h> 26 #include <nativehelper/ScopedLocalRef.h> 27 #include <nativehelper/ScopedPrimitiveArray.h> 28 #include <nativehelper/ScopedStringChars.h> 29 #include <nativehelper/ScopedUtfChars.h> 30 #include <nativehelper/jni_macros.h> 31 #include <nativehelper/toStringArray.h> 32 33 #include "IcuUtilities.h" 34 #include "JniConstants.h" 35 #include "JniException.h" 36 #include "unicode/ucnv.h" 37 #include "unicode/ucnv_cb.h" 38 #include "unicode/uniset.h" 39 #include "unicode/ustring.h" 40 #include "unicode/utypes.h" 41 42 #define NativeConverter_REPORT 0 43 #define NativeConverter_IGNORE 1 44 #define NativeConverter_REPLACE 2 45 46 #define MAX_REPLACEMENT_LENGTH 32 // equivalent to UCNV_ERROR_BUFFER_LENGTH 47 48 struct DecoderCallbackContext { 49 UChar replacementChars[MAX_REPLACEMENT_LENGTH]; 50 size_t replacementCharCount; 51 UConverterToUCallback onUnmappableInput; 52 UConverterToUCallback onMalformedInput; 53 }; 54 55 struct EncoderCallbackContext { 56 char replacementBytes[MAX_REPLACEMENT_LENGTH]; 57 size_t replacementByteCount; 58 UConverterFromUCallback onUnmappableInput; 59 UConverterFromUCallback onMalformedInput; 60 }; 61 toUConverter(jlong address)62 static UConverter* toUConverter(jlong address) { 63 return reinterpret_cast<UConverter*>(static_cast<uintptr_t>(address)); 64 } 65 collectStandardNames(JNIEnv * env,const char * canonicalName,const char * standard,std::vector<std::string> & result)66 static bool collectStandardNames(JNIEnv* env, const char* canonicalName, const char* standard, 67 std::vector<std::string>& result) { 68 UErrorCode status = U_ZERO_ERROR; 69 icu::UStringEnumeration e(ucnv_openStandardNames(canonicalName, standard, &status)); 70 if (maybeThrowIcuException(env, "ucnv_openStandardNames", status)) { 71 return false; 72 } 73 74 int32_t count = e.count(status); 75 if (maybeThrowIcuException(env, "StringEnumeration::count", status)) { 76 return false; 77 } 78 79 for (int32_t i = 0; i < count; ++i) { 80 const icu::UnicodeString* string = e.snext(status); 81 if (maybeThrowIcuException(env, "StringEnumeration::snext", status)) { 82 return false; 83 } 84 std::string s; 85 string->toUTF8String(s); 86 if (s.find_first_of("+,") == std::string::npos) { 87 result.push_back(s); 88 } 89 } 90 91 return true; 92 } 93 getICUCanonicalName(const char * name)94 static const char* getICUCanonicalName(const char* name) { 95 UErrorCode error = U_ZERO_ERROR; 96 const char* canonicalName = NULL; 97 if ((canonicalName = ucnv_getCanonicalName(name, "MIME", &error)) != NULL) { 98 return canonicalName; 99 } else if ((canonicalName = ucnv_getCanonicalName(name, "IANA", &error)) != NULL) { 100 return canonicalName; 101 } else if ((canonicalName = ucnv_getCanonicalName(name, "", &error)) != NULL) { 102 return canonicalName; 103 } else if ((canonicalName = ucnv_getAlias(name, 0, &error)) != NULL) { 104 // We have some aliases in the form x-blah .. match those first. 105 return canonicalName; 106 } else if (strstr(name, "x-") == name) { 107 // Check if the converter can be opened with the name given. 108 error = U_ZERO_ERROR; 109 icu::LocalUConverterPointer cnv(ucnv_open(name + 2, &error)); 110 if (U_SUCCESS(error)) { 111 return name + 2; 112 } 113 } 114 return NULL; 115 } 116 117 // If a charset listed in the IANA Charset Registry is supported by an implementation 118 // of the Java platform then its canonical name must be the name listed in the registry. 119 // Many charsets are given more than one name in the registry, in which case the registry 120 // identifies one of the names as MIME-preferred. If a charset has more than one registry 121 // name then its canonical name must be the MIME-preferred name and the other names in 122 // the registry must be valid aliases. If a supported charset is not listed in the IANA 123 // registry then its canonical name must begin with one of the strings "X-" or "x-". getJavaCanonicalName(JNIEnv * env,const char * icuCanonicalName)124 static jstring getJavaCanonicalName(JNIEnv* env, const char* icuCanonicalName) { 125 UErrorCode status = U_ZERO_ERROR; 126 127 // Check to see if this is a well-known MIME or IANA name. 128 const char* cName = NULL; 129 if ((cName = ucnv_getStandardName(icuCanonicalName, "MIME", &status)) != NULL) { 130 return env->NewStringUTF(cName); 131 } else if ((cName = ucnv_getStandardName(icuCanonicalName, "IANA", &status)) != NULL) { 132 return env->NewStringUTF(cName); 133 } 134 135 // Check to see if an alias already exists with "x-" prefix, if yes then 136 // make that the canonical name. 137 int32_t aliasCount = ucnv_countAliases(icuCanonicalName, &status); 138 for (int i = 0; i < aliasCount; ++i) { 139 const char* name = ucnv_getAlias(icuCanonicalName, i, &status); 140 if (name != NULL && name[0] == 'x' && name[1] == '-') { 141 return env->NewStringUTF(name); 142 } 143 } 144 145 // As a last resort, prepend "x-" to any alias and make that the canonical name. 146 status = U_ZERO_ERROR; 147 const char* name = ucnv_getStandardName(icuCanonicalName, "UTR22", &status); 148 if (name == NULL && strchr(icuCanonicalName, ',') != NULL) { 149 name = ucnv_getAlias(icuCanonicalName, 1, &status); 150 } 151 // If there is no UTR22 canonical name then just return the original name. 152 if (name == NULL) { 153 name = icuCanonicalName; 154 } 155 std::unique_ptr<char[]> result(new char[2 + strlen(name) + 1]); 156 strcpy(&result[0], "x-"); 157 strcat(&result[0], name); 158 return env->NewStringUTF(&result[0]); 159 } 160 161 // Returns a canonical ICU converter name which may have a version number appended to it, based on 162 // the normal canonical name. This is used to determine the actual native converter to use (the 163 // normal unversioned name is used to determine the aliases and the Java name). getVersionedIcuCanonicalName(char const * icuCanonicalName)164 static char const * getVersionedIcuCanonicalName(char const * icuCanonicalName) { 165 if (strcmp(icuCanonicalName, "UTF-16") == 0) { 166 // The ICU UTF-16 converter encodes strings as platform-endian bytes with a BOM. The 167 // UTF-16,version=2 one encodes as big-endian with a BOM, as what the Charset javadoc requires. 168 return "UTF-16,version=2"; 169 } else { 170 return icuCanonicalName; 171 } 172 } 173 NativeConverter_openConverter(JNIEnv * env,jclass,jstring converterName)174 static jlong NativeConverter_openConverter(JNIEnv* env, jclass, jstring converterName) { 175 ScopedUtfChars converterNameChars(env, converterName); 176 if (converterNameChars.c_str() == NULL) { 177 // Extra debugging check that we do have an exception if the we could not 178 // create a string. See b/62612946. 179 if (env->ExceptionCheck()) { 180 return 0; 181 } 182 maybeThrowIcuException(env, "openConverter", U_ILLEGAL_ARGUMENT_ERROR); 183 return 0; 184 } 185 UErrorCode status = U_ZERO_ERROR; 186 UConverter* cnv = ucnv_open(converterNameChars.c_str(), &status); 187 maybeThrowIcuException(env, "ucnv_open", status); 188 if (env->ExceptionCheck()) { 189 return 0; 190 } 191 if (cnv == NULL) { 192 // Extra debugging exception in case cnv is null but ICU did not report 193 // an error. See b/62612946. 194 maybeThrowIcuException(env, "openConverter", U_ILLEGAL_ARGUMENT_ERROR); 195 return 0; 196 } 197 return reinterpret_cast<uintptr_t>(cnv); 198 } 199 NativeConverter_closeConverter(JNIEnv *,jclass,jlong address)200 static void NativeConverter_closeConverter(JNIEnv*, jclass, jlong address) { 201 ucnv_close(toUConverter(address)); 202 } 203 shouldCodecThrow(jboolean flush,UErrorCode error)204 static bool shouldCodecThrow(jboolean flush, UErrorCode error) { 205 if (flush) { 206 return (error != U_BUFFER_OVERFLOW_ERROR && error != U_TRUNCATED_CHAR_FOUND); 207 } else { 208 return (error != U_BUFFER_OVERFLOW_ERROR && error != U_INVALID_CHAR_FOUND && error != U_ILLEGAL_CHAR_FOUND); 209 } 210 } 211 NativeConverter_encode(JNIEnv * env,jclass,jlong address,jcharArray source,jint sourceEnd,jbyteArray target,jint targetEnd,jintArray data,jboolean flush)212 static jint NativeConverter_encode(JNIEnv* env, jclass, jlong address, 213 jcharArray source, jint sourceEnd, jbyteArray target, jint targetEnd, 214 jintArray data, jboolean flush) { 215 216 UConverter* cnv = toUConverter(address); 217 if (cnv == NULL) { 218 maybeThrowIcuException(env, "toUConverter", U_ILLEGAL_ARGUMENT_ERROR); 219 return U_ILLEGAL_ARGUMENT_ERROR; 220 } 221 ScopedCharArrayRO uSource(env, source); 222 if (uSource.get() == NULL) { 223 maybeThrowIcuException(env, "uSource", U_ILLEGAL_ARGUMENT_ERROR); 224 return U_ILLEGAL_ARGUMENT_ERROR; 225 } 226 ScopedByteArrayRW uTarget(env, target); 227 if (uTarget.get() == NULL) { 228 maybeThrowIcuException(env, "uTarget", U_ILLEGAL_ARGUMENT_ERROR); 229 return U_ILLEGAL_ARGUMENT_ERROR; 230 } 231 ScopedIntArrayRW myData(env, data); 232 if (myData.get() == NULL) { 233 maybeThrowIcuException(env, "myData", U_ILLEGAL_ARGUMENT_ERROR); 234 return U_ILLEGAL_ARGUMENT_ERROR; 235 } 236 237 // Do the conversion. 238 jint* sourceOffset = &myData[0]; 239 jint* targetOffset = &myData[1]; 240 const jchar* mySource = uSource.get() + *sourceOffset; 241 const UChar* mySourceLimit= reinterpret_cast<const UChar*>(uSource.get()) + sourceEnd; 242 char* cTarget = reinterpret_cast<char*>(uTarget.get() + *targetOffset); 243 const char* cTargetLimit = reinterpret_cast<const char*>(uTarget.get() + targetEnd); 244 UErrorCode errorCode = U_ZERO_ERROR; 245 ucnv_fromUnicode(cnv, &cTarget, cTargetLimit, reinterpret_cast<const UChar**>(&mySource), mySourceLimit, NULL, (UBool) flush, &errorCode); 246 *sourceOffset = (mySource - uSource.get()) - *sourceOffset; 247 *targetOffset = (reinterpret_cast<jbyte*>(cTarget) - uTarget.get()); 248 249 // If there was an error, count the problematic characters. 250 if (errorCode == U_ILLEGAL_CHAR_FOUND || errorCode == U_INVALID_CHAR_FOUND || 251 errorCode == U_TRUNCATED_CHAR_FOUND) { 252 int8_t invalidUCharCount = 32; 253 UChar invalidUChars[32]; 254 UErrorCode minorErrorCode = U_ZERO_ERROR; 255 ucnv_getInvalidUChars(cnv, invalidUChars, &invalidUCharCount, &minorErrorCode); 256 if (U_SUCCESS(minorErrorCode)) { 257 myData[2] = invalidUCharCount; 258 } 259 } 260 261 // Managed code handles some cases; throw all other errors. 262 if (shouldCodecThrow(flush, errorCode)) { 263 maybeThrowIcuException(env, "ucnv_fromUnicode", errorCode); 264 } 265 return errorCode; 266 } 267 NativeConverter_decode(JNIEnv * env,jclass,jlong address,jbyteArray source,jint sourceEnd,jcharArray target,jint targetEnd,jintArray data,jboolean flush)268 static jint NativeConverter_decode(JNIEnv* env, jclass, jlong address, 269 jbyteArray source, jint sourceEnd, jcharArray target, jint targetEnd, 270 jintArray data, jboolean flush) { 271 272 UConverter* cnv = toUConverter(address); 273 if (cnv == NULL) { 274 maybeThrowIcuException(env, "toUConverter", U_ILLEGAL_ARGUMENT_ERROR); 275 return U_ILLEGAL_ARGUMENT_ERROR; 276 } 277 ScopedByteArrayRO uSource(env, source); 278 if (uSource.get() == NULL) { 279 maybeThrowIcuException(env, "uSource", U_ILLEGAL_ARGUMENT_ERROR); 280 return U_ILLEGAL_ARGUMENT_ERROR; 281 } 282 ScopedCharArrayRW uTarget(env, target); 283 if (uTarget.get() == NULL) { 284 maybeThrowIcuException(env, "uTarget", U_ILLEGAL_ARGUMENT_ERROR); 285 return U_ILLEGAL_ARGUMENT_ERROR; 286 } 287 ScopedIntArrayRW myData(env, data); 288 if (myData.get() == NULL) { 289 maybeThrowIcuException(env, "myData", U_ILLEGAL_ARGUMENT_ERROR); 290 return U_ILLEGAL_ARGUMENT_ERROR; 291 } 292 293 // Do the conversion. 294 jint* sourceOffset = &myData[0]; 295 jint* targetOffset = &myData[1]; 296 const char* mySource = reinterpret_cast<const char*>(uSource.get() + *sourceOffset); 297 const char* mySourceLimit = reinterpret_cast<const char*>(uSource.get() + sourceEnd); 298 UChar* cTarget = reinterpret_cast<UChar*>(uTarget.get()) + *targetOffset; 299 const UChar* cTargetLimit = reinterpret_cast<UChar*>(uTarget.get()) + targetEnd; 300 UErrorCode errorCode = U_ZERO_ERROR; 301 ucnv_toUnicode(cnv, &cTarget, cTargetLimit, &mySource, mySourceLimit, NULL, flush, &errorCode); 302 *sourceOffset = mySource - reinterpret_cast<const char*>(uSource.get()) - *sourceOffset; 303 *targetOffset = cTarget - reinterpret_cast<UChar*>(uTarget.get()) - *targetOffset; 304 305 // If there was an error, count the problematic bytes. 306 if (errorCode == U_ILLEGAL_CHAR_FOUND || errorCode == U_INVALID_CHAR_FOUND || 307 errorCode == U_TRUNCATED_CHAR_FOUND) { 308 int8_t invalidByteCount = 32; 309 char invalidBytes[32] = {'\0'}; 310 UErrorCode minorErrorCode = U_ZERO_ERROR; 311 ucnv_getInvalidChars(cnv, invalidBytes, &invalidByteCount, &minorErrorCode); 312 if (U_SUCCESS(minorErrorCode)) { 313 myData[2] = invalidByteCount; 314 } 315 } 316 317 // Managed code handles some cases; throw all other errors. 318 if (shouldCodecThrow(flush, errorCode)) { 319 maybeThrowIcuException(env, "ucnv_toUnicode", errorCode); 320 } 321 return errorCode; 322 } 323 NativeConverter_resetByteToChar(JNIEnv *,jclass,jlong address)324 static void NativeConverter_resetByteToChar(JNIEnv*, jclass, jlong address) { 325 UConverter* cnv = toUConverter(address); 326 if (cnv) { 327 ucnv_resetToUnicode(cnv); 328 } 329 } 330 NativeConverter_resetCharToByte(JNIEnv *,jclass,jlong address)331 static void NativeConverter_resetCharToByte(JNIEnv*, jclass, jlong address) { 332 UConverter* cnv = toUConverter(address); 333 if (cnv) { 334 ucnv_resetFromUnicode(cnv); 335 } 336 } 337 NativeConverter_getMaxBytesPerChar(JNIEnv *,jclass,jlong address)338 static jint NativeConverter_getMaxBytesPerChar(JNIEnv*, jclass, jlong address) { 339 UConverter* cnv = toUConverter(address); 340 return (cnv != NULL) ? ucnv_getMaxCharSize(cnv) : -1; 341 } 342 NativeConverter_getMinBytesPerChar(JNIEnv *,jclass,jlong address)343 static jint NativeConverter_getMinBytesPerChar(JNIEnv*, jclass, jlong address) { 344 UConverter* cnv = toUConverter(address); 345 return (cnv != NULL) ? ucnv_getMinCharSize(cnv) : -1; 346 } 347 NativeConverter_getAveBytesPerChar(JNIEnv *,jclass,jlong address)348 static jfloat NativeConverter_getAveBytesPerChar(JNIEnv*, jclass, jlong address) { 349 UConverter* cnv = toUConverter(address); 350 return (cnv != NULL) ? ((ucnv_getMaxCharSize(cnv) + ucnv_getMinCharSize(cnv)) / 2.0) : -1; 351 } 352 NativeConverter_getAvailableCharsetNames(JNIEnv * env,jclass)353 static jobjectArray NativeConverter_getAvailableCharsetNames(JNIEnv* env, jclass) { 354 int32_t num = ucnv_countAvailable(); 355 jobjectArray result = env->NewObjectArray(num, JniConstants::GetStringClass(env), NULL); 356 if (result == NULL) { 357 return NULL; 358 } 359 for (int i = 0; i < num; ++i) { 360 const char* name = ucnv_getAvailableName(i); 361 ScopedLocalRef<jstring> javaCanonicalName(env, getJavaCanonicalName(env, name)); 362 if (javaCanonicalName.get() == NULL) { 363 return NULL; 364 } 365 env->SetObjectArrayElement(result, i, javaCanonicalName.get()); 366 if (env->ExceptionCheck()) { 367 return NULL; 368 } 369 } 370 return result; 371 } 372 CHARSET_ENCODER_CALLBACK(const void * rawContext,UConverterFromUnicodeArgs * args,const UChar * codeUnits,int32_t length,UChar32 codePoint,UConverterCallbackReason reason,UErrorCode * status)373 static void CHARSET_ENCODER_CALLBACK(const void* rawContext, UConverterFromUnicodeArgs* args, 374 const UChar* codeUnits, int32_t length, UChar32 codePoint, UConverterCallbackReason reason, 375 UErrorCode* status) { 376 if (!rawContext) { 377 return; 378 } 379 const EncoderCallbackContext* ctx = reinterpret_cast<const EncoderCallbackContext*>(rawContext); 380 switch(reason) { 381 case UCNV_UNASSIGNED: 382 ctx->onUnmappableInput(ctx, args, codeUnits, length, codePoint, reason, status); 383 return; 384 case UCNV_ILLEGAL: 385 case UCNV_IRREGULAR: 386 ctx->onMalformedInput(ctx, args, codeUnits, length, codePoint, reason, status); 387 return; 388 case UCNV_CLOSE: 389 delete ctx; 390 return; 391 default: 392 *status = U_ILLEGAL_ARGUMENT_ERROR; 393 return; 394 } 395 } 396 encoderReplaceCallback(const void * rawContext,UConverterFromUnicodeArgs * fromArgs,const UChar *,int32_t,UChar32,UConverterCallbackReason,UErrorCode * err)397 static void encoderReplaceCallback(const void* rawContext, 398 UConverterFromUnicodeArgs* fromArgs, const UChar*, int32_t, UChar32, 399 UConverterCallbackReason, UErrorCode * err) { 400 if (rawContext == NULL) { 401 return; 402 } 403 const EncoderCallbackContext* context = reinterpret_cast<const EncoderCallbackContext*>(rawContext); 404 *err = U_ZERO_ERROR; 405 ucnv_cbFromUWriteBytes(fromArgs, context->replacementBytes, context->replacementByteCount, 0, err); 406 } 407 getFromUCallback(int32_t mode)408 static UConverterFromUCallback getFromUCallback(int32_t mode) { 409 switch(mode) { 410 case NativeConverter_IGNORE: return UCNV_FROM_U_CALLBACK_SKIP; 411 case NativeConverter_REPLACE: return encoderReplaceCallback; 412 case NativeConverter_REPORT: return UCNV_FROM_U_CALLBACK_STOP; 413 } 414 abort(); 415 } 416 NativeConverter_setCallbackEncode(JNIEnv * env,jclass,jlong address,jint onMalformedInput,jint onUnmappableInput,jbyteArray javaReplacement)417 static void NativeConverter_setCallbackEncode(JNIEnv* env, jclass, jlong address, 418 jint onMalformedInput, jint onUnmappableInput, jbyteArray javaReplacement) { 419 UConverter* cnv = toUConverter(address); 420 if (cnv == NULL) { 421 maybeThrowIcuException(env, "toUConverter", U_ILLEGAL_ARGUMENT_ERROR); 422 return; 423 } 424 425 UConverterFromUCallback oldCallback = NULL; 426 const void* oldCallbackContext = NULL; 427 ucnv_getFromUCallBack(cnv, &oldCallback, const_cast<const void**>(&oldCallbackContext)); 428 429 EncoderCallbackContext* callbackContext = const_cast<EncoderCallbackContext*>( 430 reinterpret_cast<const EncoderCallbackContext*>(oldCallbackContext)); 431 // Hold the reference to any new callbackContext we create in a unique_ptr 432 // so that the default behavior is to collect it automatically if we exit 433 // early. 434 std::unique_ptr<EncoderCallbackContext> callbackContextDeleter; 435 if (callbackContext == NULL) { 436 callbackContext = new EncoderCallbackContext; 437 callbackContextDeleter.reset(callbackContext); 438 } 439 440 callbackContext->onMalformedInput = getFromUCallback(onMalformedInput); 441 callbackContext->onUnmappableInput = getFromUCallback(onUnmappableInput); 442 443 ScopedByteArrayRO replacementBytes(env, javaReplacement); 444 if (replacementBytes.get() == NULL 445 || replacementBytes.size() > sizeof(callbackContext->replacementBytes)) { 446 maybeThrowIcuException(env, "replacementBytes", U_ILLEGAL_ARGUMENT_ERROR); 447 return; 448 } 449 memcpy(callbackContext->replacementBytes, replacementBytes.get(), replacementBytes.size()); 450 callbackContext->replacementByteCount = replacementBytes.size(); 451 452 UErrorCode errorCode = U_ZERO_ERROR; 453 ucnv_setFromUCallBack(cnv, CHARSET_ENCODER_CALLBACK, callbackContext, NULL, NULL, &errorCode); 454 // Iff callbackContextDeleter holds a reference to a callbackContext we can 455 // prevent it being automatically deleted here as responsibility for deletion 456 // has passed to the code that closes the NativeConverter. 457 callbackContextDeleter.release(); 458 maybeThrowIcuException(env, "ucnv_setFromUCallBack", errorCode); 459 } 460 decoderIgnoreCallback(const void *,UConverterToUnicodeArgs *,const char *,int32_t,UConverterCallbackReason,UErrorCode * err)461 static void decoderIgnoreCallback(const void*, UConverterToUnicodeArgs*, const char*, int32_t, UConverterCallbackReason, UErrorCode* err) { 462 // The icu4c UCNV_FROM_U_CALLBACK_SKIP callback requires that the context is NULL, which is 463 // never true for us. 464 *err = U_ZERO_ERROR; 465 } 466 decoderReplaceCallback(const void * rawContext,UConverterToUnicodeArgs * toArgs,const char *,int32_t,UConverterCallbackReason,UErrorCode * err)467 static void decoderReplaceCallback(const void* rawContext, 468 UConverterToUnicodeArgs* toArgs, const char*, int32_t, UConverterCallbackReason, 469 UErrorCode* err) { 470 if (!rawContext) { 471 return; 472 } 473 const DecoderCallbackContext* context = reinterpret_cast<const DecoderCallbackContext*>(rawContext); 474 *err = U_ZERO_ERROR; 475 ucnv_cbToUWriteUChars(toArgs,context->replacementChars, context->replacementCharCount, 0, err); 476 } 477 getToUCallback(int32_t mode)478 static UConverterToUCallback getToUCallback(int32_t mode) { 479 switch (mode) { 480 case NativeConverter_IGNORE: return decoderIgnoreCallback; 481 case NativeConverter_REPLACE: return decoderReplaceCallback; 482 case NativeConverter_REPORT: return UCNV_TO_U_CALLBACK_STOP; 483 } 484 abort(); 485 } 486 CHARSET_DECODER_CALLBACK(const void * rawContext,UConverterToUnicodeArgs * args,const char * codeUnits,int32_t length,UConverterCallbackReason reason,UErrorCode * status)487 static void CHARSET_DECODER_CALLBACK(const void* rawContext, UConverterToUnicodeArgs* args, 488 const char* codeUnits, int32_t length, 489 UConverterCallbackReason reason, UErrorCode* status) { 490 if (!rawContext) { 491 return; 492 } 493 const DecoderCallbackContext* ctx = reinterpret_cast<const DecoderCallbackContext*>(rawContext); 494 switch(reason) { 495 case UCNV_UNASSIGNED: 496 ctx->onUnmappableInput(ctx, args, codeUnits, length, reason, status); 497 return; 498 case UCNV_ILLEGAL: 499 case UCNV_IRREGULAR: 500 ctx->onMalformedInput(ctx, args, codeUnits, length, reason, status); 501 return; 502 case UCNV_CLOSE: 503 delete ctx; 504 return; 505 default: 506 *status = U_ILLEGAL_ARGUMENT_ERROR; 507 return; 508 } 509 } 510 NativeConverter_setCallbackDecode(JNIEnv * env,jclass,jlong address,jint onMalformedInput,jint onUnmappableInput,jstring javaReplacement)511 static void NativeConverter_setCallbackDecode(JNIEnv* env, jclass, jlong address, 512 jint onMalformedInput, jint onUnmappableInput, jstring javaReplacement) { 513 UConverter* cnv = toUConverter(address); 514 if (cnv == NULL) { 515 maybeThrowIcuException(env, "toConverter", U_ILLEGAL_ARGUMENT_ERROR); 516 return; 517 } 518 519 UConverterToUCallback oldCallback; 520 const void* oldCallbackContext; 521 ucnv_getToUCallBack(cnv, &oldCallback, &oldCallbackContext); 522 523 DecoderCallbackContext* callbackContext = const_cast<DecoderCallbackContext*>( 524 reinterpret_cast<const DecoderCallbackContext*>(oldCallbackContext)); 525 // Hold the reference to any new callbackContext we create in a unique_ptr 526 // so that the default behavior is to collect it automatically if we exit 527 // early. 528 std::unique_ptr<DecoderCallbackContext> callbackContextDeleter; 529 if (callbackContext == NULL) { 530 callbackContext = new DecoderCallbackContext; 531 callbackContextDeleter.reset(callbackContext); 532 } 533 534 callbackContext->onMalformedInput = getToUCallback(onMalformedInput); 535 callbackContext->onUnmappableInput = getToUCallback(onUnmappableInput); 536 537 ScopedStringChars replacement(env, javaReplacement); 538 if (replacement.get() == NULL 539 || replacement.size() > sizeof(callbackContext->replacementChars) / sizeof(UChar)) { 540 maybeThrowIcuException(env, "replacement", U_ILLEGAL_ARGUMENT_ERROR); 541 return; 542 } 543 u_strncpy(callbackContext->replacementChars, reinterpret_cast<const UChar*>(replacement.get()), replacement.size()); 544 callbackContext->replacementCharCount = replacement.size(); 545 546 UErrorCode errorCode = U_ZERO_ERROR; 547 ucnv_setToUCallBack(cnv, CHARSET_DECODER_CALLBACK, callbackContext, NULL, NULL, &errorCode); 548 // Iff callbackContextDeleter holds a reference to a callbackContext we can 549 // prevent it being automatically deleted here as responsibility for deletion 550 // has passed to the code that closes the NativeConverter. 551 callbackContextDeleter.release(); 552 maybeThrowIcuException(env, "ucnv_setToUCallBack", errorCode); 553 } 554 NativeConverter_getAveCharsPerByte(JNIEnv * env,jclass,jlong handle)555 static jfloat NativeConverter_getAveCharsPerByte(JNIEnv* env, jclass, jlong handle) { 556 return (1 / (jfloat) NativeConverter_getMaxBytesPerChar(env, NULL, handle)); 557 } 558 NativeConverter_getSubstitutionBytes(JNIEnv * env,jclass,jlong address)559 static jbyteArray NativeConverter_getSubstitutionBytes(JNIEnv* env, jclass, jlong address) { 560 UConverter* cnv = toUConverter(address); 561 if (cnv == NULL) { 562 return NULL; 563 } 564 UErrorCode status = U_ZERO_ERROR; 565 char replacementBytes[MAX_REPLACEMENT_LENGTH]; 566 int8_t len = sizeof(replacementBytes); 567 ucnv_getSubstChars(cnv, replacementBytes, &len, &status); 568 if (!U_SUCCESS(status)) { 569 return env->NewByteArray(0); 570 } 571 jbyteArray result = env->NewByteArray(len); 572 if (result == NULL) { 573 return NULL; 574 } 575 env->SetByteArrayRegion(result, 0, len, reinterpret_cast<jbyte*>(replacementBytes)); 576 return result; 577 } 578 NativeConverter_contains(JNIEnv * env,jclass,jstring name1,jstring name2)579 static jboolean NativeConverter_contains(JNIEnv* env, jclass, jstring name1, jstring name2) { 580 ScopedUtfChars name1Chars(env, name1); 581 if (name1Chars.c_str() == NULL) { 582 return JNI_FALSE; 583 } 584 ScopedUtfChars name2Chars(env, name2); 585 if (name2Chars.c_str() == NULL) { 586 return JNI_FALSE; 587 } 588 589 UErrorCode errorCode = U_ZERO_ERROR; 590 icu::LocalUConverterPointer converter1(ucnv_open(name1Chars.c_str(), &errorCode)); 591 icu::UnicodeSet set1; 592 ucnv_getUnicodeSet(&*converter1, set1.toUSet(), UCNV_ROUNDTRIP_SET, &errorCode); 593 594 icu::LocalUConverterPointer converter2(ucnv_open(name2Chars.c_str(), &errorCode)); 595 icu::UnicodeSet set2; 596 ucnv_getUnicodeSet(&*converter2, set2.toUSet(), UCNV_ROUNDTRIP_SET, &errorCode); 597 598 return U_SUCCESS(errorCode) && set1.containsAll(set2); 599 } 600 NativeConverter_charsetForName(JNIEnv * env,jclass,jstring charsetName)601 static jobject NativeConverter_charsetForName(JNIEnv* env, jclass, jstring charsetName) { 602 ScopedUtfChars charsetNameChars(env, charsetName); 603 if (charsetNameChars.c_str() == NULL) { 604 return NULL; 605 } 606 607 // Get ICU's canonical name for this charset. 608 const char* icuCanonicalName = getICUCanonicalName(charsetNameChars.c_str()); 609 if (icuCanonicalName == NULL) { 610 return NULL; 611 } 612 613 // Get Java's canonical name for this charset. 614 jstring javaCanonicalName = getJavaCanonicalName(env, icuCanonicalName); 615 if (env->ExceptionCheck()) { 616 return NULL; 617 } 618 619 // Check that this charset is supported. 620 { 621 // ICU doesn't offer any "isSupported", so we just open and immediately close. 622 UErrorCode error = U_ZERO_ERROR; 623 icu::LocalUConverterPointer cnv(ucnv_open(icuCanonicalName, &error)); 624 if (!U_SUCCESS(error)) { 625 return NULL; 626 } 627 } 628 629 // Get the aliases for this charset. 630 std::vector<std::string> aliases; 631 if (!collectStandardNames(env, icuCanonicalName, "IANA", aliases)) { 632 return NULL; 633 } 634 if (!collectStandardNames(env, icuCanonicalName, "MIME", aliases)) { 635 return NULL; 636 } 637 if (!collectStandardNames(env, icuCanonicalName, "JAVA", aliases)) { 638 return NULL; 639 } 640 if (!collectStandardNames(env, icuCanonicalName, "WINDOWS", aliases)) { 641 return NULL; 642 } 643 jobjectArray javaAliases = toStringArray(env, aliases); 644 if (env->ExceptionCheck()) { 645 return NULL; 646 } 647 648 // Construct the CharsetICU object. 649 static jmethodID charsetConstructor = env->GetMethodID(JniConstants::GetCharsetICUClass(env), "<init>", 650 "(Ljava/lang/String;Ljava/lang/String;[Ljava/lang/String;)V"); 651 if (env->ExceptionCheck()) { 652 return NULL; 653 } 654 655 char const * versionedIcuCanonicalName = getVersionedIcuCanonicalName(icuCanonicalName); 656 jstring versionedIcuCanonicalNameStr = env->NewStringUTF(versionedIcuCanonicalName); 657 if (env->ExceptionCheck()) { 658 return NULL; 659 } 660 661 return env->NewObject(JniConstants::GetCharsetICUClass(env), charsetConstructor, 662 javaCanonicalName, versionedIcuCanonicalNameStr, javaAliases); 663 } 664 FreeNativeConverter(void * converter)665 static void FreeNativeConverter(void *converter) { 666 ucnv_close(reinterpret_cast<UConverter*>(converter)); 667 } 668 NativeConverter_getNativeFinalizer(JNIEnv *,jclass)669 static jlong NativeConverter_getNativeFinalizer(JNIEnv*, jclass) { 670 return reinterpret_cast<jlong>(&FreeNativeConverter); 671 } 672 NativeConverter_getNativeSize(JNIEnv *,jclass)673 static jlong NativeConverter_getNativeSize(JNIEnv*, jclass) { 674 // TODO: Improve estimate. 675 return 200; 676 } 677 678 static JNINativeMethod gMethods[] = { 679 NATIVE_METHOD(NativeConverter, charsetForName, "(Ljava/lang/String;)Ljava/nio/charset/Charset;"), 680 NATIVE_METHOD(NativeConverter, closeConverter, "(J)V"), 681 NATIVE_METHOD(NativeConverter, contains, "(Ljava/lang/String;Ljava/lang/String;)Z"), 682 NATIVE_METHOD(NativeConverter, decode, "(J[BI[CI[IZ)I"), 683 NATIVE_METHOD(NativeConverter, encode, "(J[CI[BI[IZ)I"), 684 NATIVE_METHOD(NativeConverter, getAvailableCharsetNames, "()[Ljava/lang/String;"), 685 NATIVE_METHOD(NativeConverter, getAveBytesPerChar, "(J)F"), 686 NATIVE_METHOD(NativeConverter, getAveCharsPerByte, "(J)F"), 687 NATIVE_METHOD(NativeConverter, getMaxBytesPerChar, "(J)I"), 688 NATIVE_METHOD(NativeConverter, getMinBytesPerChar, "(J)I"), 689 NATIVE_METHOD(NativeConverter, getSubstitutionBytes, "(J)[B"), 690 NATIVE_METHOD(NativeConverter, openConverter, "(Ljava/lang/String;)J"), 691 NATIVE_METHOD(NativeConverter, resetByteToChar, "(J)V"), 692 NATIVE_METHOD(NativeConverter, resetCharToByte, "(J)V"), 693 NATIVE_METHOD(NativeConverter, setCallbackDecode, "(JIILjava/lang/String;)V"), 694 NATIVE_METHOD(NativeConverter, setCallbackEncode, "(JII[B)V"), 695 NATIVE_METHOD(NativeConverter, getNativeFinalizer, "()J"), 696 NATIVE_METHOD(NativeConverter, getNativeSize, "()J") 697 }; register_libcore_icu_NativeConverter(JNIEnv * env)698 void register_libcore_icu_NativeConverter(JNIEnv* env) { 699 jniRegisterNativeMethods(env, "libcore/icu/NativeConverter", gMethods, NELEM(gMethods)); 700 } 701