1 /**
2 *******************************************************************************
3 * Copyright (C) 1996-2006, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
6 *
7 *
8 *******************************************************************************
9 */
10 /*
11 * (C) Copyright IBM Corp. 2000 - All Rights Reserved
12 * A JNI wrapper to ICU native converter Interface
13 * @author: Ram Viswanadha
14 */
15
16 #define LOG_TAG "NativeConverter"
17
18 #include <stdlib.h>
19 #include <string.h>
20
21 #include <memory>
22 #include <vector>
23
24 #include <android/log.h>
25 #include <nativehelper/JNIHelp.h>
26 #include <nativehelper/ScopedLocalRef.h>
27 #include <nativehelper/ScopedPrimitiveArray.h>
28 #include <nativehelper/ScopedStringChars.h>
29 #include <nativehelper/ScopedUtfChars.h>
30 #include <nativehelper/jni_macros.h>
31 #include <nativehelper/toStringArray.h>
32
33 #include "IcuUtilities.h"
34 #include "JniConstants.h"
35 #include "JniException.h"
36 #include "unicode/ucnv.h"
37 #include "unicode/ucnv_cb.h"
38 #include "unicode/uniset.h"
39 #include "unicode/ustring.h"
40 #include "unicode/utypes.h"
41
42 #define NativeConverter_REPORT 0
43 #define NativeConverter_IGNORE 1
44 #define NativeConverter_REPLACE 2
45
46 #define MAX_REPLACEMENT_LENGTH 32 // equivalent to UCNV_ERROR_BUFFER_LENGTH
47
48 struct DecoderCallbackContext {
49 UChar replacementChars[MAX_REPLACEMENT_LENGTH];
50 size_t replacementCharCount;
51 UConverterToUCallback onUnmappableInput;
52 UConverterToUCallback onMalformedInput;
53 };
54
55 struct EncoderCallbackContext {
56 char replacementBytes[MAX_REPLACEMENT_LENGTH];
57 size_t replacementByteCount;
58 UConverterFromUCallback onUnmappableInput;
59 UConverterFromUCallback onMalformedInput;
60 };
61
toUConverter(jlong address)62 static UConverter* toUConverter(jlong address) {
63 return reinterpret_cast<UConverter*>(static_cast<uintptr_t>(address));
64 }
65
collectStandardNames(JNIEnv * env,const char * canonicalName,const char * standard,std::vector<std::string> & result)66 static bool collectStandardNames(JNIEnv* env, const char* canonicalName, const char* standard,
67 std::vector<std::string>& result) {
68 UErrorCode status = U_ZERO_ERROR;
69 icu::UStringEnumeration e(ucnv_openStandardNames(canonicalName, standard, &status));
70 if (maybeThrowIcuException(env, "ucnv_openStandardNames", status)) {
71 return false;
72 }
73
74 int32_t count = e.count(status);
75 if (maybeThrowIcuException(env, "StringEnumeration::count", status)) {
76 return false;
77 }
78
79 for (int32_t i = 0; i < count; ++i) {
80 const icu::UnicodeString* string = e.snext(status);
81 if (maybeThrowIcuException(env, "StringEnumeration::snext", status)) {
82 return false;
83 }
84 std::string s;
85 string->toUTF8String(s);
86 if (s.find_first_of("+,") == std::string::npos) {
87 result.push_back(s);
88 }
89 }
90
91 return true;
92 }
93
getICUCanonicalName(const char * name)94 static const char* getICUCanonicalName(const char* name) {
95 UErrorCode error = U_ZERO_ERROR;
96 const char* canonicalName = NULL;
97 if ((canonicalName = ucnv_getCanonicalName(name, "MIME", &error)) != NULL) {
98 return canonicalName;
99 } else if ((canonicalName = ucnv_getCanonicalName(name, "IANA", &error)) != NULL) {
100 return canonicalName;
101 } else if ((canonicalName = ucnv_getCanonicalName(name, "", &error)) != NULL) {
102 return canonicalName;
103 } else if ((canonicalName = ucnv_getAlias(name, 0, &error)) != NULL) {
104 // We have some aliases in the form x-blah .. match those first.
105 return canonicalName;
106 } else if (strstr(name, "x-") == name) {
107 // Check if the converter can be opened with the name given.
108 error = U_ZERO_ERROR;
109 icu::LocalUConverterPointer cnv(ucnv_open(name + 2, &error));
110 if (U_SUCCESS(error)) {
111 return name + 2;
112 }
113 }
114 return NULL;
115 }
116
117 // If a charset listed in the IANA Charset Registry is supported by an implementation
118 // of the Java platform then its canonical name must be the name listed in the registry.
119 // Many charsets are given more than one name in the registry, in which case the registry
120 // identifies one of the names as MIME-preferred. If a charset has more than one registry
121 // name then its canonical name must be the MIME-preferred name and the other names in
122 // the registry must be valid aliases. If a supported charset is not listed in the IANA
123 // registry then its canonical name must begin with one of the strings "X-" or "x-".
getJavaCanonicalName(JNIEnv * env,const char * icuCanonicalName)124 static jstring getJavaCanonicalName(JNIEnv* env, const char* icuCanonicalName) {
125 UErrorCode status = U_ZERO_ERROR;
126
127 // Check to see if this is a well-known MIME or IANA name.
128 const char* cName = NULL;
129 if ((cName = ucnv_getStandardName(icuCanonicalName, "MIME", &status)) != NULL) {
130 return env->NewStringUTF(cName);
131 } else if ((cName = ucnv_getStandardName(icuCanonicalName, "IANA", &status)) != NULL) {
132 return env->NewStringUTF(cName);
133 }
134
135 // Check to see if an alias already exists with "x-" prefix, if yes then
136 // make that the canonical name.
137 int32_t aliasCount = ucnv_countAliases(icuCanonicalName, &status);
138 for (int i = 0; i < aliasCount; ++i) {
139 const char* name = ucnv_getAlias(icuCanonicalName, i, &status);
140 if (name != NULL && name[0] == 'x' && name[1] == '-') {
141 return env->NewStringUTF(name);
142 }
143 }
144
145 // As a last resort, prepend "x-" to any alias and make that the canonical name.
146 status = U_ZERO_ERROR;
147 const char* name = ucnv_getStandardName(icuCanonicalName, "UTR22", &status);
148 if (name == NULL && strchr(icuCanonicalName, ',') != NULL) {
149 name = ucnv_getAlias(icuCanonicalName, 1, &status);
150 }
151 // If there is no UTR22 canonical name then just return the original name.
152 if (name == NULL) {
153 name = icuCanonicalName;
154 }
155 std::unique_ptr<char[]> result(new char[2 + strlen(name) + 1]);
156 strcpy(&result[0], "x-");
157 strcat(&result[0], name);
158 return env->NewStringUTF(&result[0]);
159 }
160
161 // Returns a canonical ICU converter name which may have a version number appended to it, based on
162 // the normal canonical name. This is used to determine the actual native converter to use (the
163 // normal unversioned name is used to determine the aliases and the Java name).
getVersionedIcuCanonicalName(char const * icuCanonicalName)164 static char const * getVersionedIcuCanonicalName(char const * icuCanonicalName) {
165 if (strcmp(icuCanonicalName, "UTF-16") == 0) {
166 // The ICU UTF-16 converter encodes strings as platform-endian bytes with a BOM. The
167 // UTF-16,version=2 one encodes as big-endian with a BOM, as what the Charset javadoc requires.
168 return "UTF-16,version=2";
169 } else {
170 return icuCanonicalName;
171 }
172 }
173
NativeConverter_openConverter(JNIEnv * env,jclass,jstring converterName)174 static jlong NativeConverter_openConverter(JNIEnv* env, jclass, jstring converterName) {
175 ScopedUtfChars converterNameChars(env, converterName);
176 if (converterNameChars.c_str() == NULL) {
177 // Extra debugging check that we do have an exception if the we could not
178 // create a string. See b/62612946.
179 if (env->ExceptionCheck()) {
180 return 0;
181 }
182 maybeThrowIcuException(env, "openConverter", U_ILLEGAL_ARGUMENT_ERROR);
183 return 0;
184 }
185 UErrorCode status = U_ZERO_ERROR;
186 UConverter* cnv = ucnv_open(converterNameChars.c_str(), &status);
187 maybeThrowIcuException(env, "ucnv_open", status);
188 if (env->ExceptionCheck()) {
189 return 0;
190 }
191 if (cnv == NULL) {
192 // Extra debugging exception in case cnv is null but ICU did not report
193 // an error. See b/62612946.
194 maybeThrowIcuException(env, "openConverter", U_ILLEGAL_ARGUMENT_ERROR);
195 return 0;
196 }
197 return reinterpret_cast<uintptr_t>(cnv);
198 }
199
NativeConverter_closeConverter(JNIEnv *,jclass,jlong address)200 static void NativeConverter_closeConverter(JNIEnv*, jclass, jlong address) {
201 ucnv_close(toUConverter(address));
202 }
203
shouldCodecThrow(jboolean flush,UErrorCode error)204 static bool shouldCodecThrow(jboolean flush, UErrorCode error) {
205 if (flush) {
206 return (error != U_BUFFER_OVERFLOW_ERROR && error != U_TRUNCATED_CHAR_FOUND);
207 } else {
208 return (error != U_BUFFER_OVERFLOW_ERROR && error != U_INVALID_CHAR_FOUND && error != U_ILLEGAL_CHAR_FOUND);
209 }
210 }
211
NativeConverter_encode(JNIEnv * env,jclass,jlong address,jcharArray source,jint sourceEnd,jbyteArray target,jint targetEnd,jintArray data,jboolean flush)212 static jint NativeConverter_encode(JNIEnv* env, jclass, jlong address,
213 jcharArray source, jint sourceEnd, jbyteArray target, jint targetEnd,
214 jintArray data, jboolean flush) {
215
216 UConverter* cnv = toUConverter(address);
217 if (cnv == NULL) {
218 maybeThrowIcuException(env, "toUConverter", U_ILLEGAL_ARGUMENT_ERROR);
219 return U_ILLEGAL_ARGUMENT_ERROR;
220 }
221 ScopedCharArrayRO uSource(env, source);
222 if (uSource.get() == NULL) {
223 maybeThrowIcuException(env, "uSource", U_ILLEGAL_ARGUMENT_ERROR);
224 return U_ILLEGAL_ARGUMENT_ERROR;
225 }
226 ScopedByteArrayRW uTarget(env, target);
227 if (uTarget.get() == NULL) {
228 maybeThrowIcuException(env, "uTarget", U_ILLEGAL_ARGUMENT_ERROR);
229 return U_ILLEGAL_ARGUMENT_ERROR;
230 }
231 ScopedIntArrayRW myData(env, data);
232 if (myData.get() == NULL) {
233 maybeThrowIcuException(env, "myData", U_ILLEGAL_ARGUMENT_ERROR);
234 return U_ILLEGAL_ARGUMENT_ERROR;
235 }
236
237 // Do the conversion.
238 jint* sourceOffset = &myData[0];
239 jint* targetOffset = &myData[1];
240 const jchar* mySource = uSource.get() + *sourceOffset;
241 const UChar* mySourceLimit= reinterpret_cast<const UChar*>(uSource.get()) + sourceEnd;
242 char* cTarget = reinterpret_cast<char*>(uTarget.get() + *targetOffset);
243 const char* cTargetLimit = reinterpret_cast<const char*>(uTarget.get() + targetEnd);
244 UErrorCode errorCode = U_ZERO_ERROR;
245 ucnv_fromUnicode(cnv, &cTarget, cTargetLimit, reinterpret_cast<const UChar**>(&mySource), mySourceLimit, NULL, (UBool) flush, &errorCode);
246 *sourceOffset = (mySource - uSource.get()) - *sourceOffset;
247 *targetOffset = (reinterpret_cast<jbyte*>(cTarget) - uTarget.get());
248
249 // If there was an error, count the problematic characters.
250 if (errorCode == U_ILLEGAL_CHAR_FOUND || errorCode == U_INVALID_CHAR_FOUND ||
251 errorCode == U_TRUNCATED_CHAR_FOUND) {
252 int8_t invalidUCharCount = 32;
253 UChar invalidUChars[32];
254 UErrorCode minorErrorCode = U_ZERO_ERROR;
255 ucnv_getInvalidUChars(cnv, invalidUChars, &invalidUCharCount, &minorErrorCode);
256 if (U_SUCCESS(minorErrorCode)) {
257 myData[2] = invalidUCharCount;
258 }
259 }
260
261 // Managed code handles some cases; throw all other errors.
262 if (shouldCodecThrow(flush, errorCode)) {
263 maybeThrowIcuException(env, "ucnv_fromUnicode", errorCode);
264 }
265 return errorCode;
266 }
267
NativeConverter_decode(JNIEnv * env,jclass,jlong address,jbyteArray source,jint sourceEnd,jcharArray target,jint targetEnd,jintArray data,jboolean flush)268 static jint NativeConverter_decode(JNIEnv* env, jclass, jlong address,
269 jbyteArray source, jint sourceEnd, jcharArray target, jint targetEnd,
270 jintArray data, jboolean flush) {
271
272 UConverter* cnv = toUConverter(address);
273 if (cnv == NULL) {
274 maybeThrowIcuException(env, "toUConverter", U_ILLEGAL_ARGUMENT_ERROR);
275 return U_ILLEGAL_ARGUMENT_ERROR;
276 }
277 ScopedByteArrayRO uSource(env, source);
278 if (uSource.get() == NULL) {
279 maybeThrowIcuException(env, "uSource", U_ILLEGAL_ARGUMENT_ERROR);
280 return U_ILLEGAL_ARGUMENT_ERROR;
281 }
282 ScopedCharArrayRW uTarget(env, target);
283 if (uTarget.get() == NULL) {
284 maybeThrowIcuException(env, "uTarget", U_ILLEGAL_ARGUMENT_ERROR);
285 return U_ILLEGAL_ARGUMENT_ERROR;
286 }
287 ScopedIntArrayRW myData(env, data);
288 if (myData.get() == NULL) {
289 maybeThrowIcuException(env, "myData", U_ILLEGAL_ARGUMENT_ERROR);
290 return U_ILLEGAL_ARGUMENT_ERROR;
291 }
292
293 // Do the conversion.
294 jint* sourceOffset = &myData[0];
295 jint* targetOffset = &myData[1];
296 const char* mySource = reinterpret_cast<const char*>(uSource.get() + *sourceOffset);
297 const char* mySourceLimit = reinterpret_cast<const char*>(uSource.get() + sourceEnd);
298 UChar* cTarget = reinterpret_cast<UChar*>(uTarget.get()) + *targetOffset;
299 const UChar* cTargetLimit = reinterpret_cast<UChar*>(uTarget.get()) + targetEnd;
300 UErrorCode errorCode = U_ZERO_ERROR;
301 ucnv_toUnicode(cnv, &cTarget, cTargetLimit, &mySource, mySourceLimit, NULL, flush, &errorCode);
302 *sourceOffset = mySource - reinterpret_cast<const char*>(uSource.get()) - *sourceOffset;
303 *targetOffset = cTarget - reinterpret_cast<UChar*>(uTarget.get()) - *targetOffset;
304
305 // If there was an error, count the problematic bytes.
306 if (errorCode == U_ILLEGAL_CHAR_FOUND || errorCode == U_INVALID_CHAR_FOUND ||
307 errorCode == U_TRUNCATED_CHAR_FOUND) {
308 int8_t invalidByteCount = 32;
309 char invalidBytes[32] = {'\0'};
310 UErrorCode minorErrorCode = U_ZERO_ERROR;
311 ucnv_getInvalidChars(cnv, invalidBytes, &invalidByteCount, &minorErrorCode);
312 if (U_SUCCESS(minorErrorCode)) {
313 myData[2] = invalidByteCount;
314 }
315 }
316
317 // Managed code handles some cases; throw all other errors.
318 if (shouldCodecThrow(flush, errorCode)) {
319 maybeThrowIcuException(env, "ucnv_toUnicode", errorCode);
320 }
321 return errorCode;
322 }
323
NativeConverter_resetByteToChar(JNIEnv *,jclass,jlong address)324 static void NativeConverter_resetByteToChar(JNIEnv*, jclass, jlong address) {
325 UConverter* cnv = toUConverter(address);
326 if (cnv) {
327 ucnv_resetToUnicode(cnv);
328 }
329 }
330
NativeConverter_resetCharToByte(JNIEnv *,jclass,jlong address)331 static void NativeConverter_resetCharToByte(JNIEnv*, jclass, jlong address) {
332 UConverter* cnv = toUConverter(address);
333 if (cnv) {
334 ucnv_resetFromUnicode(cnv);
335 }
336 }
337
NativeConverter_getMaxBytesPerChar(JNIEnv *,jclass,jlong address)338 static jint NativeConverter_getMaxBytesPerChar(JNIEnv*, jclass, jlong address) {
339 UConverter* cnv = toUConverter(address);
340 return (cnv != NULL) ? ucnv_getMaxCharSize(cnv) : -1;
341 }
342
NativeConverter_getMinBytesPerChar(JNIEnv *,jclass,jlong address)343 static jint NativeConverter_getMinBytesPerChar(JNIEnv*, jclass, jlong address) {
344 UConverter* cnv = toUConverter(address);
345 return (cnv != NULL) ? ucnv_getMinCharSize(cnv) : -1;
346 }
347
NativeConverter_getAveBytesPerChar(JNIEnv *,jclass,jlong address)348 static jfloat NativeConverter_getAveBytesPerChar(JNIEnv*, jclass, jlong address) {
349 UConverter* cnv = toUConverter(address);
350 return (cnv != NULL) ? ((ucnv_getMaxCharSize(cnv) + ucnv_getMinCharSize(cnv)) / 2.0) : -1;
351 }
352
NativeConverter_getAvailableCharsetNames(JNIEnv * env,jclass)353 static jobjectArray NativeConverter_getAvailableCharsetNames(JNIEnv* env, jclass) {
354 int32_t num = ucnv_countAvailable();
355 jobjectArray result = env->NewObjectArray(num, JniConstants::GetStringClass(env), NULL);
356 if (result == NULL) {
357 return NULL;
358 }
359 for (int i = 0; i < num; ++i) {
360 const char* name = ucnv_getAvailableName(i);
361 ScopedLocalRef<jstring> javaCanonicalName(env, getJavaCanonicalName(env, name));
362 if (javaCanonicalName.get() == NULL) {
363 return NULL;
364 }
365 env->SetObjectArrayElement(result, i, javaCanonicalName.get());
366 if (env->ExceptionCheck()) {
367 return NULL;
368 }
369 }
370 return result;
371 }
372
CHARSET_ENCODER_CALLBACK(const void * rawContext,UConverterFromUnicodeArgs * args,const UChar * codeUnits,int32_t length,UChar32 codePoint,UConverterCallbackReason reason,UErrorCode * status)373 static void CHARSET_ENCODER_CALLBACK(const void* rawContext, UConverterFromUnicodeArgs* args,
374 const UChar* codeUnits, int32_t length, UChar32 codePoint, UConverterCallbackReason reason,
375 UErrorCode* status) {
376 if (!rawContext) {
377 return;
378 }
379 const EncoderCallbackContext* ctx = reinterpret_cast<const EncoderCallbackContext*>(rawContext);
380 switch(reason) {
381 case UCNV_UNASSIGNED:
382 ctx->onUnmappableInput(ctx, args, codeUnits, length, codePoint, reason, status);
383 return;
384 case UCNV_ILLEGAL:
385 case UCNV_IRREGULAR:
386 ctx->onMalformedInput(ctx, args, codeUnits, length, codePoint, reason, status);
387 return;
388 case UCNV_CLOSE:
389 delete ctx;
390 return;
391 default:
392 *status = U_ILLEGAL_ARGUMENT_ERROR;
393 return;
394 }
395 }
396
encoderReplaceCallback(const void * rawContext,UConverterFromUnicodeArgs * fromArgs,const UChar *,int32_t,UChar32,UConverterCallbackReason,UErrorCode * err)397 static void encoderReplaceCallback(const void* rawContext,
398 UConverterFromUnicodeArgs* fromArgs, const UChar*, int32_t, UChar32,
399 UConverterCallbackReason, UErrorCode * err) {
400 if (rawContext == NULL) {
401 return;
402 }
403 const EncoderCallbackContext* context = reinterpret_cast<const EncoderCallbackContext*>(rawContext);
404 *err = U_ZERO_ERROR;
405 ucnv_cbFromUWriteBytes(fromArgs, context->replacementBytes, context->replacementByteCount, 0, err);
406 }
407
getFromUCallback(int32_t mode)408 static UConverterFromUCallback getFromUCallback(int32_t mode) {
409 switch(mode) {
410 case NativeConverter_IGNORE: return UCNV_FROM_U_CALLBACK_SKIP;
411 case NativeConverter_REPLACE: return encoderReplaceCallback;
412 case NativeConverter_REPORT: return UCNV_FROM_U_CALLBACK_STOP;
413 }
414 abort();
415 }
416
NativeConverter_setCallbackEncode(JNIEnv * env,jclass,jlong address,jint onMalformedInput,jint onUnmappableInput,jbyteArray javaReplacement)417 static void NativeConverter_setCallbackEncode(JNIEnv* env, jclass, jlong address,
418 jint onMalformedInput, jint onUnmappableInput, jbyteArray javaReplacement) {
419 UConverter* cnv = toUConverter(address);
420 if (cnv == NULL) {
421 maybeThrowIcuException(env, "toUConverter", U_ILLEGAL_ARGUMENT_ERROR);
422 return;
423 }
424
425 UConverterFromUCallback oldCallback = NULL;
426 const void* oldCallbackContext = NULL;
427 ucnv_getFromUCallBack(cnv, &oldCallback, const_cast<const void**>(&oldCallbackContext));
428
429 EncoderCallbackContext* callbackContext = const_cast<EncoderCallbackContext*>(
430 reinterpret_cast<const EncoderCallbackContext*>(oldCallbackContext));
431 // Hold the reference to any new callbackContext we create in a unique_ptr
432 // so that the default behavior is to collect it automatically if we exit
433 // early.
434 std::unique_ptr<EncoderCallbackContext> callbackContextDeleter;
435 if (callbackContext == NULL) {
436 callbackContext = new EncoderCallbackContext;
437 callbackContextDeleter.reset(callbackContext);
438 }
439
440 callbackContext->onMalformedInput = getFromUCallback(onMalformedInput);
441 callbackContext->onUnmappableInput = getFromUCallback(onUnmappableInput);
442
443 ScopedByteArrayRO replacementBytes(env, javaReplacement);
444 if (replacementBytes.get() == NULL
445 || replacementBytes.size() > sizeof(callbackContext->replacementBytes)) {
446 maybeThrowIcuException(env, "replacementBytes", U_ILLEGAL_ARGUMENT_ERROR);
447 return;
448 }
449 memcpy(callbackContext->replacementBytes, replacementBytes.get(), replacementBytes.size());
450 callbackContext->replacementByteCount = replacementBytes.size();
451
452 UErrorCode errorCode = U_ZERO_ERROR;
453 ucnv_setFromUCallBack(cnv, CHARSET_ENCODER_CALLBACK, callbackContext, NULL, NULL, &errorCode);
454 // Iff callbackContextDeleter holds a reference to a callbackContext we can
455 // prevent it being automatically deleted here as responsibility for deletion
456 // has passed to the code that closes the NativeConverter.
457 callbackContextDeleter.release();
458 maybeThrowIcuException(env, "ucnv_setFromUCallBack", errorCode);
459 }
460
decoderIgnoreCallback(const void *,UConverterToUnicodeArgs *,const char *,int32_t,UConverterCallbackReason,UErrorCode * err)461 static void decoderIgnoreCallback(const void*, UConverterToUnicodeArgs*, const char*, int32_t, UConverterCallbackReason, UErrorCode* err) {
462 // The icu4c UCNV_FROM_U_CALLBACK_SKIP callback requires that the context is NULL, which is
463 // never true for us.
464 *err = U_ZERO_ERROR;
465 }
466
decoderReplaceCallback(const void * rawContext,UConverterToUnicodeArgs * toArgs,const char *,int32_t,UConverterCallbackReason,UErrorCode * err)467 static void decoderReplaceCallback(const void* rawContext,
468 UConverterToUnicodeArgs* toArgs, const char*, int32_t, UConverterCallbackReason,
469 UErrorCode* err) {
470 if (!rawContext) {
471 return;
472 }
473 const DecoderCallbackContext* context = reinterpret_cast<const DecoderCallbackContext*>(rawContext);
474 *err = U_ZERO_ERROR;
475 ucnv_cbToUWriteUChars(toArgs,context->replacementChars, context->replacementCharCount, 0, err);
476 }
477
getToUCallback(int32_t mode)478 static UConverterToUCallback getToUCallback(int32_t mode) {
479 switch (mode) {
480 case NativeConverter_IGNORE: return decoderIgnoreCallback;
481 case NativeConverter_REPLACE: return decoderReplaceCallback;
482 case NativeConverter_REPORT: return UCNV_TO_U_CALLBACK_STOP;
483 }
484 abort();
485 }
486
CHARSET_DECODER_CALLBACK(const void * rawContext,UConverterToUnicodeArgs * args,const char * codeUnits,int32_t length,UConverterCallbackReason reason,UErrorCode * status)487 static void CHARSET_DECODER_CALLBACK(const void* rawContext, UConverterToUnicodeArgs* args,
488 const char* codeUnits, int32_t length,
489 UConverterCallbackReason reason, UErrorCode* status) {
490 if (!rawContext) {
491 return;
492 }
493 const DecoderCallbackContext* ctx = reinterpret_cast<const DecoderCallbackContext*>(rawContext);
494 switch(reason) {
495 case UCNV_UNASSIGNED:
496 ctx->onUnmappableInput(ctx, args, codeUnits, length, reason, status);
497 return;
498 case UCNV_ILLEGAL:
499 case UCNV_IRREGULAR:
500 ctx->onMalformedInput(ctx, args, codeUnits, length, reason, status);
501 return;
502 case UCNV_CLOSE:
503 delete ctx;
504 return;
505 default:
506 *status = U_ILLEGAL_ARGUMENT_ERROR;
507 return;
508 }
509 }
510
NativeConverter_setCallbackDecode(JNIEnv * env,jclass,jlong address,jint onMalformedInput,jint onUnmappableInput,jstring javaReplacement)511 static void NativeConverter_setCallbackDecode(JNIEnv* env, jclass, jlong address,
512 jint onMalformedInput, jint onUnmappableInput, jstring javaReplacement) {
513 UConverter* cnv = toUConverter(address);
514 if (cnv == NULL) {
515 maybeThrowIcuException(env, "toConverter", U_ILLEGAL_ARGUMENT_ERROR);
516 return;
517 }
518
519 UConverterToUCallback oldCallback;
520 const void* oldCallbackContext;
521 ucnv_getToUCallBack(cnv, &oldCallback, &oldCallbackContext);
522
523 DecoderCallbackContext* callbackContext = const_cast<DecoderCallbackContext*>(
524 reinterpret_cast<const DecoderCallbackContext*>(oldCallbackContext));
525 // Hold the reference to any new callbackContext we create in a unique_ptr
526 // so that the default behavior is to collect it automatically if we exit
527 // early.
528 std::unique_ptr<DecoderCallbackContext> callbackContextDeleter;
529 if (callbackContext == NULL) {
530 callbackContext = new DecoderCallbackContext;
531 callbackContextDeleter.reset(callbackContext);
532 }
533
534 callbackContext->onMalformedInput = getToUCallback(onMalformedInput);
535 callbackContext->onUnmappableInput = getToUCallback(onUnmappableInput);
536
537 ScopedStringChars replacement(env, javaReplacement);
538 if (replacement.get() == NULL
539 || replacement.size() > sizeof(callbackContext->replacementChars) / sizeof(UChar)) {
540 maybeThrowIcuException(env, "replacement", U_ILLEGAL_ARGUMENT_ERROR);
541 return;
542 }
543 u_strncpy(callbackContext->replacementChars, reinterpret_cast<const UChar*>(replacement.get()), replacement.size());
544 callbackContext->replacementCharCount = replacement.size();
545
546 UErrorCode errorCode = U_ZERO_ERROR;
547 ucnv_setToUCallBack(cnv, CHARSET_DECODER_CALLBACK, callbackContext, NULL, NULL, &errorCode);
548 // Iff callbackContextDeleter holds a reference to a callbackContext we can
549 // prevent it being automatically deleted here as responsibility for deletion
550 // has passed to the code that closes the NativeConverter.
551 callbackContextDeleter.release();
552 maybeThrowIcuException(env, "ucnv_setToUCallBack", errorCode);
553 }
554
NativeConverter_getAveCharsPerByte(JNIEnv * env,jclass,jlong handle)555 static jfloat NativeConverter_getAveCharsPerByte(JNIEnv* env, jclass, jlong handle) {
556 return (1 / (jfloat) NativeConverter_getMaxBytesPerChar(env, NULL, handle));
557 }
558
NativeConverter_getSubstitutionBytes(JNIEnv * env,jclass,jlong address)559 static jbyteArray NativeConverter_getSubstitutionBytes(JNIEnv* env, jclass, jlong address) {
560 UConverter* cnv = toUConverter(address);
561 if (cnv == NULL) {
562 return NULL;
563 }
564 UErrorCode status = U_ZERO_ERROR;
565 char replacementBytes[MAX_REPLACEMENT_LENGTH];
566 int8_t len = sizeof(replacementBytes);
567 ucnv_getSubstChars(cnv, replacementBytes, &len, &status);
568 if (!U_SUCCESS(status)) {
569 return env->NewByteArray(0);
570 }
571 jbyteArray result = env->NewByteArray(len);
572 if (result == NULL) {
573 return NULL;
574 }
575 env->SetByteArrayRegion(result, 0, len, reinterpret_cast<jbyte*>(replacementBytes));
576 return result;
577 }
578
NativeConverter_contains(JNIEnv * env,jclass,jstring name1,jstring name2)579 static jboolean NativeConverter_contains(JNIEnv* env, jclass, jstring name1, jstring name2) {
580 ScopedUtfChars name1Chars(env, name1);
581 if (name1Chars.c_str() == NULL) {
582 return JNI_FALSE;
583 }
584 ScopedUtfChars name2Chars(env, name2);
585 if (name2Chars.c_str() == NULL) {
586 return JNI_FALSE;
587 }
588
589 UErrorCode errorCode = U_ZERO_ERROR;
590 icu::LocalUConverterPointer converter1(ucnv_open(name1Chars.c_str(), &errorCode));
591 icu::UnicodeSet set1;
592 ucnv_getUnicodeSet(&*converter1, set1.toUSet(), UCNV_ROUNDTRIP_SET, &errorCode);
593
594 icu::LocalUConverterPointer converter2(ucnv_open(name2Chars.c_str(), &errorCode));
595 icu::UnicodeSet set2;
596 ucnv_getUnicodeSet(&*converter2, set2.toUSet(), UCNV_ROUNDTRIP_SET, &errorCode);
597
598 return U_SUCCESS(errorCode) && set1.containsAll(set2);
599 }
600
NativeConverter_charsetForName(JNIEnv * env,jclass,jstring charsetName)601 static jobject NativeConverter_charsetForName(JNIEnv* env, jclass, jstring charsetName) {
602 ScopedUtfChars charsetNameChars(env, charsetName);
603 if (charsetNameChars.c_str() == NULL) {
604 return NULL;
605 }
606
607 // Get ICU's canonical name for this charset.
608 const char* icuCanonicalName = getICUCanonicalName(charsetNameChars.c_str());
609 if (icuCanonicalName == NULL) {
610 return NULL;
611 }
612
613 // Get Java's canonical name for this charset.
614 jstring javaCanonicalName = getJavaCanonicalName(env, icuCanonicalName);
615 if (env->ExceptionCheck()) {
616 return NULL;
617 }
618
619 // Check that this charset is supported.
620 {
621 // ICU doesn't offer any "isSupported", so we just open and immediately close.
622 UErrorCode error = U_ZERO_ERROR;
623 icu::LocalUConverterPointer cnv(ucnv_open(icuCanonicalName, &error));
624 if (!U_SUCCESS(error)) {
625 return NULL;
626 }
627 }
628
629 // Get the aliases for this charset.
630 std::vector<std::string> aliases;
631 if (!collectStandardNames(env, icuCanonicalName, "IANA", aliases)) {
632 return NULL;
633 }
634 if (!collectStandardNames(env, icuCanonicalName, "MIME", aliases)) {
635 return NULL;
636 }
637 if (!collectStandardNames(env, icuCanonicalName, "JAVA", aliases)) {
638 return NULL;
639 }
640 if (!collectStandardNames(env, icuCanonicalName, "WINDOWS", aliases)) {
641 return NULL;
642 }
643 jobjectArray javaAliases = toStringArray(env, aliases);
644 if (env->ExceptionCheck()) {
645 return NULL;
646 }
647
648 // Construct the CharsetICU object.
649 static jmethodID charsetConstructor = env->GetMethodID(JniConstants::GetCharsetICUClass(env), "<init>",
650 "(Ljava/lang/String;Ljava/lang/String;[Ljava/lang/String;)V");
651 if (env->ExceptionCheck()) {
652 return NULL;
653 }
654
655 char const * versionedIcuCanonicalName = getVersionedIcuCanonicalName(icuCanonicalName);
656 jstring versionedIcuCanonicalNameStr = env->NewStringUTF(versionedIcuCanonicalName);
657 if (env->ExceptionCheck()) {
658 return NULL;
659 }
660
661 return env->NewObject(JniConstants::GetCharsetICUClass(env), charsetConstructor,
662 javaCanonicalName, versionedIcuCanonicalNameStr, javaAliases);
663 }
664
FreeNativeConverter(void * converter)665 static void FreeNativeConverter(void *converter) {
666 ucnv_close(reinterpret_cast<UConverter*>(converter));
667 }
668
NativeConverter_getNativeFinalizer(JNIEnv *,jclass)669 static jlong NativeConverter_getNativeFinalizer(JNIEnv*, jclass) {
670 return reinterpret_cast<jlong>(&FreeNativeConverter);
671 }
672
NativeConverter_getNativeSize(JNIEnv *,jclass)673 static jlong NativeConverter_getNativeSize(JNIEnv*, jclass) {
674 // TODO: Improve estimate.
675 return 200;
676 }
677
678 static JNINativeMethod gMethods[] = {
679 NATIVE_METHOD(NativeConverter, charsetForName, "(Ljava/lang/String;)Ljava/nio/charset/Charset;"),
680 NATIVE_METHOD(NativeConverter, closeConverter, "(J)V"),
681 NATIVE_METHOD(NativeConverter, contains, "(Ljava/lang/String;Ljava/lang/String;)Z"),
682 NATIVE_METHOD(NativeConverter, decode, "(J[BI[CI[IZ)I"),
683 NATIVE_METHOD(NativeConverter, encode, "(J[CI[BI[IZ)I"),
684 NATIVE_METHOD(NativeConverter, getAvailableCharsetNames, "()[Ljava/lang/String;"),
685 NATIVE_METHOD(NativeConverter, getAveBytesPerChar, "(J)F"),
686 NATIVE_METHOD(NativeConverter, getAveCharsPerByte, "(J)F"),
687 NATIVE_METHOD(NativeConverter, getMaxBytesPerChar, "(J)I"),
688 NATIVE_METHOD(NativeConverter, getMinBytesPerChar, "(J)I"),
689 NATIVE_METHOD(NativeConverter, getSubstitutionBytes, "(J)[B"),
690 NATIVE_METHOD(NativeConverter, openConverter, "(Ljava/lang/String;)J"),
691 NATIVE_METHOD(NativeConverter, resetByteToChar, "(J)V"),
692 NATIVE_METHOD(NativeConverter, resetCharToByte, "(J)V"),
693 NATIVE_METHOD(NativeConverter, setCallbackDecode, "(JIILjava/lang/String;)V"),
694 NATIVE_METHOD(NativeConverter, setCallbackEncode, "(JII[B)V"),
695 NATIVE_METHOD(NativeConverter, getNativeFinalizer, "()J"),
696 NATIVE_METHOD(NativeConverter, getNativeSize, "()J")
697 };
register_libcore_icu_NativeConverter(JNIEnv * env)698 void register_libcore_icu_NativeConverter(JNIEnv* env) {
699 jniRegisterNativeMethods(env, "libcore/icu/NativeConverter", gMethods, NELEM(gMethods));
700 }
701