1 /**
2 *******************************************************************************
3 * Copyright (C) 1996-2006, International Business Machines Corporation and    *
4 * others. All Rights Reserved.                                                *
5 *******************************************************************************
6 *
7 *
8 *******************************************************************************
9 */
10 /*
11  * (C) Copyright IBM Corp. 2000 - All Rights Reserved
12  *  A JNI wrapper to ICU native converter Interface
13  * @author: Ram Viswanadha
14  */
15 
16 #define LOG_TAG "NativeConverter"
17 
18 #include <stdlib.h>
19 #include <string.h>
20 
21 #include <memory>
22 #include <vector>
23 
24 #include <android/log.h>
25 #include <nativehelper/JNIHelp.h>
26 #include <nativehelper/ScopedLocalRef.h>
27 #include <nativehelper/ScopedPrimitiveArray.h>
28 #include <nativehelper/ScopedStringChars.h>
29 #include <nativehelper/ScopedUtfChars.h>
30 #include <nativehelper/jni_macros.h>
31 #include <nativehelper/toStringArray.h>
32 
33 #include "IcuUtilities.h"
34 #include "JniConstants.h"
35 #include "JniException.h"
36 #include "unicode/ucnv.h"
37 #include "unicode/ucnv_cb.h"
38 #include "unicode/uniset.h"
39 #include "unicode/ustring.h"
40 #include "unicode/utypes.h"
41 
42 #define NativeConverter_REPORT 0
43 #define NativeConverter_IGNORE 1
44 #define NativeConverter_REPLACE 2
45 
46 #define MAX_REPLACEMENT_LENGTH 32 // equivalent to UCNV_ERROR_BUFFER_LENGTH
47 
48 struct DecoderCallbackContext {
49     UChar replacementChars[MAX_REPLACEMENT_LENGTH];
50     size_t replacementCharCount;
51     UConverterToUCallback onUnmappableInput;
52     UConverterToUCallback onMalformedInput;
53 };
54 
55 struct EncoderCallbackContext {
56     char replacementBytes[MAX_REPLACEMENT_LENGTH];
57     size_t replacementByteCount;
58     UConverterFromUCallback onUnmappableInput;
59     UConverterFromUCallback onMalformedInput;
60 };
61 
toUConverter(jlong address)62 static UConverter* toUConverter(jlong address) {
63     return reinterpret_cast<UConverter*>(static_cast<uintptr_t>(address));
64 }
65 
collectStandardNames(JNIEnv * env,const char * canonicalName,const char * standard,std::vector<std::string> & result)66 static bool collectStandardNames(JNIEnv* env, const char* canonicalName, const char* standard,
67                                  std::vector<std::string>& result) {
68   UErrorCode status = U_ZERO_ERROR;
69   icu::UStringEnumeration e(ucnv_openStandardNames(canonicalName, standard, &status));
70   if (maybeThrowIcuException(env, "ucnv_openStandardNames", status)) {
71     return false;
72   }
73 
74   int32_t count = e.count(status);
75   if (maybeThrowIcuException(env, "StringEnumeration::count", status)) {
76     return false;
77   }
78 
79   for (int32_t i = 0; i < count; ++i) {
80     const icu::UnicodeString* string = e.snext(status);
81     if (maybeThrowIcuException(env, "StringEnumeration::snext", status)) {
82       return false;
83     }
84     std::string s;
85     string->toUTF8String(s);
86     if (s.find_first_of("+,") == std::string::npos) {
87       result.push_back(s);
88     }
89   }
90 
91   return true;
92 }
93 
getICUCanonicalName(const char * name)94 static const char* getICUCanonicalName(const char* name) {
95   UErrorCode error = U_ZERO_ERROR;
96   const char* canonicalName = NULL;
97   if ((canonicalName = ucnv_getCanonicalName(name, "MIME", &error)) != NULL) {
98     return canonicalName;
99   } else if ((canonicalName = ucnv_getCanonicalName(name, "IANA", &error)) != NULL) {
100     return canonicalName;
101   } else if ((canonicalName = ucnv_getCanonicalName(name, "", &error)) != NULL) {
102     return canonicalName;
103   } else if ((canonicalName = ucnv_getAlias(name, 0, &error)) != NULL) {
104     // We have some aliases in the form x-blah .. match those first.
105     return canonicalName;
106   } else if (strstr(name, "x-") == name) {
107     // Check if the converter can be opened with the name given.
108     error = U_ZERO_ERROR;
109     icu::LocalUConverterPointer cnv(ucnv_open(name + 2, &error));
110     if (U_SUCCESS(error)) {
111       return name + 2;
112     }
113   }
114   return NULL;
115 }
116 
117 // If a charset listed in the IANA Charset Registry is supported by an implementation
118 // of the Java platform then its canonical name must be the name listed in the registry.
119 // Many charsets are given more than one name in the registry, in which case the registry
120 // identifies one of the names as MIME-preferred. If a charset has more than one registry
121 // name then its canonical name must be the MIME-preferred name and the other names in
122 // the registry must be valid aliases. If a supported charset is not listed in the IANA
123 // registry then its canonical name must begin with one of the strings "X-" or "x-".
getJavaCanonicalName(JNIEnv * env,const char * icuCanonicalName)124 static jstring getJavaCanonicalName(JNIEnv* env, const char* icuCanonicalName) {
125   UErrorCode status = U_ZERO_ERROR;
126 
127   // Check to see if this is a well-known MIME or IANA name.
128   const char* cName = NULL;
129   if ((cName = ucnv_getStandardName(icuCanonicalName, "MIME", &status)) != NULL) {
130     return env->NewStringUTF(cName);
131   } else if ((cName = ucnv_getStandardName(icuCanonicalName, "IANA", &status)) != NULL) {
132     return env->NewStringUTF(cName);
133   }
134 
135   // Check to see if an alias already exists with "x-" prefix, if yes then
136   // make that the canonical name.
137   int32_t aliasCount = ucnv_countAliases(icuCanonicalName, &status);
138   for (int i = 0; i < aliasCount; ++i) {
139     const char* name = ucnv_getAlias(icuCanonicalName, i, &status);
140     if (name != NULL && name[0] == 'x' && name[1] == '-') {
141       return env->NewStringUTF(name);
142     }
143   }
144 
145   // As a last resort, prepend "x-" to any alias and make that the canonical name.
146   status = U_ZERO_ERROR;
147   const char* name = ucnv_getStandardName(icuCanonicalName, "UTR22", &status);
148   if (name == NULL && strchr(icuCanonicalName, ',') != NULL) {
149     name = ucnv_getAlias(icuCanonicalName, 1, &status);
150   }
151   // If there is no UTR22 canonical name then just return the original name.
152   if (name == NULL) {
153     name = icuCanonicalName;
154   }
155   std::unique_ptr<char[]> result(new char[2 + strlen(name) + 1]);
156   strcpy(&result[0], "x-");
157   strcat(&result[0], name);
158   return env->NewStringUTF(&result[0]);
159 }
160 
161 // Returns a canonical ICU converter name which may have a version number appended to it, based on
162 // the normal canonical name. This is used to determine the actual native converter to use (the
163 // normal unversioned name is used to determine the aliases and the Java name).
getVersionedIcuCanonicalName(char const * icuCanonicalName)164 static char const * getVersionedIcuCanonicalName(char const * icuCanonicalName) {
165   if (strcmp(icuCanonicalName, "UTF-16") == 0) {
166     // The ICU UTF-16 converter encodes strings as platform-endian bytes with a BOM. The
167     // UTF-16,version=2 one encodes as big-endian with a BOM, as what the Charset javadoc requires.
168     return "UTF-16,version=2";
169   } else {
170     return icuCanonicalName;
171   }
172 }
173 
NativeConverter_openConverter(JNIEnv * env,jclass,jstring converterName)174 static jlong NativeConverter_openConverter(JNIEnv* env, jclass, jstring converterName) {
175     ScopedUtfChars converterNameChars(env, converterName);
176     if (converterNameChars.c_str() == NULL) {
177         // Extra debugging check that we do have an exception if the we could not
178         // create a string. See b/62612946.
179         if (env->ExceptionCheck()) {
180             return 0;
181         }
182         maybeThrowIcuException(env, "openConverter", U_ILLEGAL_ARGUMENT_ERROR);
183         return 0;
184     }
185     UErrorCode status = U_ZERO_ERROR;
186     UConverter* cnv = ucnv_open(converterNameChars.c_str(), &status);
187     maybeThrowIcuException(env, "ucnv_open", status);
188     if (env->ExceptionCheck()) {
189         return 0;
190     }
191     if (cnv == NULL) {
192         // Extra debugging exception in case cnv is null but ICU did not report
193         // an error. See b/62612946.
194         maybeThrowIcuException(env, "openConverter", U_ILLEGAL_ARGUMENT_ERROR);
195         return 0;
196     }
197     return reinterpret_cast<uintptr_t>(cnv);
198 }
199 
NativeConverter_closeConverter(JNIEnv *,jclass,jlong address)200 static void NativeConverter_closeConverter(JNIEnv*, jclass, jlong address) {
201     ucnv_close(toUConverter(address));
202 }
203 
shouldCodecThrow(jboolean flush,UErrorCode error)204 static bool shouldCodecThrow(jboolean flush, UErrorCode error) {
205     if (flush) {
206         return (error != U_BUFFER_OVERFLOW_ERROR && error != U_TRUNCATED_CHAR_FOUND);
207     } else {
208         return (error != U_BUFFER_OVERFLOW_ERROR && error != U_INVALID_CHAR_FOUND && error != U_ILLEGAL_CHAR_FOUND);
209     }
210 }
211 
NativeConverter_encode(JNIEnv * env,jclass,jlong address,jcharArray source,jint sourceEnd,jbyteArray target,jint targetEnd,jintArray data,jboolean flush)212 static jint NativeConverter_encode(JNIEnv* env, jclass, jlong address,
213         jcharArray source, jint sourceEnd, jbyteArray target, jint targetEnd,
214         jintArray data, jboolean flush) {
215 
216     UConverter* cnv = toUConverter(address);
217     if (cnv == NULL) {
218         maybeThrowIcuException(env, "toUConverter", U_ILLEGAL_ARGUMENT_ERROR);
219         return U_ILLEGAL_ARGUMENT_ERROR;
220     }
221     ScopedCharArrayRO uSource(env, source);
222     if (uSource.get() == NULL) {
223         maybeThrowIcuException(env, "uSource", U_ILLEGAL_ARGUMENT_ERROR);
224         return U_ILLEGAL_ARGUMENT_ERROR;
225     }
226     ScopedByteArrayRW uTarget(env, target);
227     if (uTarget.get() == NULL) {
228         maybeThrowIcuException(env, "uTarget", U_ILLEGAL_ARGUMENT_ERROR);
229         return U_ILLEGAL_ARGUMENT_ERROR;
230     }
231     ScopedIntArrayRW myData(env, data);
232     if (myData.get() == NULL) {
233         maybeThrowIcuException(env, "myData", U_ILLEGAL_ARGUMENT_ERROR);
234         return U_ILLEGAL_ARGUMENT_ERROR;
235     }
236 
237     // Do the conversion.
238     jint* sourceOffset = &myData[0];
239     jint* targetOffset = &myData[1];
240     const jchar* mySource = uSource.get() + *sourceOffset;
241     const UChar* mySourceLimit= reinterpret_cast<const UChar*>(uSource.get()) + sourceEnd;
242     char* cTarget = reinterpret_cast<char*>(uTarget.get() + *targetOffset);
243     const char* cTargetLimit = reinterpret_cast<const char*>(uTarget.get() + targetEnd);
244     UErrorCode errorCode = U_ZERO_ERROR;
245     ucnv_fromUnicode(cnv, &cTarget, cTargetLimit, reinterpret_cast<const UChar**>(&mySource), mySourceLimit, NULL, (UBool) flush, &errorCode);
246     *sourceOffset = (mySource - uSource.get()) - *sourceOffset;
247     *targetOffset = (reinterpret_cast<jbyte*>(cTarget) - uTarget.get());
248 
249     // If there was an error, count the problematic characters.
250     if (errorCode == U_ILLEGAL_CHAR_FOUND || errorCode == U_INVALID_CHAR_FOUND ||
251         errorCode == U_TRUNCATED_CHAR_FOUND) {
252         int8_t invalidUCharCount = 32;
253         UChar invalidUChars[32];
254         UErrorCode minorErrorCode = U_ZERO_ERROR;
255         ucnv_getInvalidUChars(cnv, invalidUChars, &invalidUCharCount, &minorErrorCode);
256         if (U_SUCCESS(minorErrorCode)) {
257             myData[2] = invalidUCharCount;
258         }
259     }
260 
261     // Managed code handles some cases; throw all other errors.
262     if (shouldCodecThrow(flush, errorCode)) {
263         maybeThrowIcuException(env, "ucnv_fromUnicode", errorCode);
264     }
265     return errorCode;
266 }
267 
NativeConverter_decode(JNIEnv * env,jclass,jlong address,jbyteArray source,jint sourceEnd,jcharArray target,jint targetEnd,jintArray data,jboolean flush)268 static jint NativeConverter_decode(JNIEnv* env, jclass, jlong address,
269         jbyteArray source, jint sourceEnd, jcharArray target, jint targetEnd,
270         jintArray data, jboolean flush) {
271 
272     UConverter* cnv = toUConverter(address);
273     if (cnv == NULL) {
274         maybeThrowIcuException(env, "toUConverter", U_ILLEGAL_ARGUMENT_ERROR);
275         return U_ILLEGAL_ARGUMENT_ERROR;
276     }
277     ScopedByteArrayRO uSource(env, source);
278     if (uSource.get() == NULL) {
279         maybeThrowIcuException(env, "uSource", U_ILLEGAL_ARGUMENT_ERROR);
280         return U_ILLEGAL_ARGUMENT_ERROR;
281     }
282     ScopedCharArrayRW uTarget(env, target);
283     if (uTarget.get() == NULL) {
284         maybeThrowIcuException(env, "uTarget", U_ILLEGAL_ARGUMENT_ERROR);
285         return U_ILLEGAL_ARGUMENT_ERROR;
286     }
287     ScopedIntArrayRW myData(env, data);
288     if (myData.get() == NULL) {
289         maybeThrowIcuException(env, "myData", U_ILLEGAL_ARGUMENT_ERROR);
290         return U_ILLEGAL_ARGUMENT_ERROR;
291     }
292 
293     // Do the conversion.
294     jint* sourceOffset = &myData[0];
295     jint* targetOffset = &myData[1];
296     const char* mySource = reinterpret_cast<const char*>(uSource.get() + *sourceOffset);
297     const char* mySourceLimit = reinterpret_cast<const char*>(uSource.get() + sourceEnd);
298     UChar* cTarget = reinterpret_cast<UChar*>(uTarget.get()) + *targetOffset;
299     const UChar* cTargetLimit = reinterpret_cast<UChar*>(uTarget.get()) + targetEnd;
300     UErrorCode errorCode = U_ZERO_ERROR;
301     ucnv_toUnicode(cnv, &cTarget, cTargetLimit, &mySource, mySourceLimit, NULL, flush, &errorCode);
302     *sourceOffset = mySource - reinterpret_cast<const char*>(uSource.get()) - *sourceOffset;
303     *targetOffset = cTarget - reinterpret_cast<UChar*>(uTarget.get()) - *targetOffset;
304 
305     // If there was an error, count the problematic bytes.
306     if (errorCode == U_ILLEGAL_CHAR_FOUND || errorCode == U_INVALID_CHAR_FOUND ||
307         errorCode == U_TRUNCATED_CHAR_FOUND) {
308         int8_t invalidByteCount = 32;
309         char invalidBytes[32] = {'\0'};
310         UErrorCode minorErrorCode = U_ZERO_ERROR;
311         ucnv_getInvalidChars(cnv, invalidBytes, &invalidByteCount, &minorErrorCode);
312         if (U_SUCCESS(minorErrorCode)) {
313             myData[2] = invalidByteCount;
314         }
315     }
316 
317     // Managed code handles some cases; throw all other errors.
318     if (shouldCodecThrow(flush, errorCode)) {
319         maybeThrowIcuException(env, "ucnv_toUnicode", errorCode);
320     }
321     return errorCode;
322 }
323 
NativeConverter_resetByteToChar(JNIEnv *,jclass,jlong address)324 static void NativeConverter_resetByteToChar(JNIEnv*, jclass, jlong address) {
325     UConverter* cnv = toUConverter(address);
326     if (cnv) {
327         ucnv_resetToUnicode(cnv);
328     }
329 }
330 
NativeConverter_resetCharToByte(JNIEnv *,jclass,jlong address)331 static void NativeConverter_resetCharToByte(JNIEnv*, jclass, jlong address) {
332     UConverter* cnv = toUConverter(address);
333     if (cnv) {
334         ucnv_resetFromUnicode(cnv);
335     }
336 }
337 
NativeConverter_getMaxBytesPerChar(JNIEnv *,jclass,jlong address)338 static jint NativeConverter_getMaxBytesPerChar(JNIEnv*, jclass, jlong address) {
339     UConverter* cnv = toUConverter(address);
340     return (cnv != NULL) ? ucnv_getMaxCharSize(cnv) : -1;
341 }
342 
NativeConverter_getMinBytesPerChar(JNIEnv *,jclass,jlong address)343 static jint NativeConverter_getMinBytesPerChar(JNIEnv*, jclass, jlong address) {
344     UConverter* cnv = toUConverter(address);
345     return (cnv != NULL) ? ucnv_getMinCharSize(cnv) : -1;
346 }
347 
NativeConverter_getAveBytesPerChar(JNIEnv *,jclass,jlong address)348 static jfloat NativeConverter_getAveBytesPerChar(JNIEnv*, jclass, jlong address) {
349     UConverter* cnv = toUConverter(address);
350     return (cnv != NULL) ? ((ucnv_getMaxCharSize(cnv) + ucnv_getMinCharSize(cnv)) / 2.0) : -1;
351 }
352 
NativeConverter_getAvailableCharsetNames(JNIEnv * env,jclass)353 static jobjectArray NativeConverter_getAvailableCharsetNames(JNIEnv* env, jclass) {
354     int32_t num = ucnv_countAvailable();
355     jobjectArray result = env->NewObjectArray(num, JniConstants::GetStringClass(env), NULL);
356     if (result == NULL) {
357         return NULL;
358     }
359     for (int i = 0; i < num; ++i) {
360         const char* name = ucnv_getAvailableName(i);
361         ScopedLocalRef<jstring> javaCanonicalName(env, getJavaCanonicalName(env, name));
362         if (javaCanonicalName.get() == NULL) {
363             return NULL;
364         }
365         env->SetObjectArrayElement(result, i, javaCanonicalName.get());
366         if (env->ExceptionCheck()) {
367             return NULL;
368         }
369     }
370     return result;
371 }
372 
CHARSET_ENCODER_CALLBACK(const void * rawContext,UConverterFromUnicodeArgs * args,const UChar * codeUnits,int32_t length,UChar32 codePoint,UConverterCallbackReason reason,UErrorCode * status)373 static void CHARSET_ENCODER_CALLBACK(const void* rawContext, UConverterFromUnicodeArgs* args,
374         const UChar* codeUnits, int32_t length, UChar32 codePoint, UConverterCallbackReason reason,
375         UErrorCode* status) {
376     if (!rawContext) {
377         return;
378     }
379     const EncoderCallbackContext* ctx = reinterpret_cast<const EncoderCallbackContext*>(rawContext);
380     switch(reason) {
381     case UCNV_UNASSIGNED:
382         ctx->onUnmappableInput(ctx, args, codeUnits, length, codePoint, reason, status);
383         return;
384     case UCNV_ILLEGAL:
385     case UCNV_IRREGULAR:
386         ctx->onMalformedInput(ctx, args, codeUnits, length, codePoint, reason, status);
387         return;
388     case UCNV_CLOSE:
389         delete ctx;
390         return;
391     default:
392         *status = U_ILLEGAL_ARGUMENT_ERROR;
393         return;
394     }
395 }
396 
encoderReplaceCallback(const void * rawContext,UConverterFromUnicodeArgs * fromArgs,const UChar *,int32_t,UChar32,UConverterCallbackReason,UErrorCode * err)397 static void encoderReplaceCallback(const void* rawContext,
398         UConverterFromUnicodeArgs* fromArgs, const UChar*, int32_t, UChar32,
399         UConverterCallbackReason, UErrorCode * err) {
400     if (rawContext == NULL) {
401         return;
402     }
403     const EncoderCallbackContext* context = reinterpret_cast<const EncoderCallbackContext*>(rawContext);
404     *err = U_ZERO_ERROR;
405     ucnv_cbFromUWriteBytes(fromArgs, context->replacementBytes, context->replacementByteCount, 0, err);
406 }
407 
getFromUCallback(int32_t mode)408 static UConverterFromUCallback getFromUCallback(int32_t mode) {
409     switch(mode) {
410     case NativeConverter_IGNORE: return UCNV_FROM_U_CALLBACK_SKIP;
411     case NativeConverter_REPLACE: return encoderReplaceCallback;
412     case NativeConverter_REPORT: return UCNV_FROM_U_CALLBACK_STOP;
413     }
414     abort();
415 }
416 
NativeConverter_setCallbackEncode(JNIEnv * env,jclass,jlong address,jint onMalformedInput,jint onUnmappableInput,jbyteArray javaReplacement)417 static void NativeConverter_setCallbackEncode(JNIEnv* env, jclass, jlong address,
418         jint onMalformedInput, jint onUnmappableInput, jbyteArray javaReplacement) {
419     UConverter* cnv = toUConverter(address);
420     if (cnv == NULL) {
421         maybeThrowIcuException(env, "toUConverter", U_ILLEGAL_ARGUMENT_ERROR);
422         return;
423     }
424 
425     UConverterFromUCallback oldCallback = NULL;
426     const void* oldCallbackContext = NULL;
427     ucnv_getFromUCallBack(cnv, &oldCallback, const_cast<const void**>(&oldCallbackContext));
428 
429     EncoderCallbackContext* callbackContext = const_cast<EncoderCallbackContext*>(
430             reinterpret_cast<const EncoderCallbackContext*>(oldCallbackContext));
431     // Hold the reference to any new callbackContext we create in a unique_ptr
432     // so that the default behavior is to collect it automatically if we exit
433     // early.
434     std::unique_ptr<EncoderCallbackContext> callbackContextDeleter;
435     if (callbackContext == NULL) {
436         callbackContext = new EncoderCallbackContext;
437         callbackContextDeleter.reset(callbackContext);
438     }
439 
440     callbackContext->onMalformedInput = getFromUCallback(onMalformedInput);
441     callbackContext->onUnmappableInput = getFromUCallback(onUnmappableInput);
442 
443     ScopedByteArrayRO replacementBytes(env, javaReplacement);
444     if (replacementBytes.get() == NULL
445             || replacementBytes.size() > sizeof(callbackContext->replacementBytes)) {
446         maybeThrowIcuException(env, "replacementBytes", U_ILLEGAL_ARGUMENT_ERROR);
447         return;
448     }
449     memcpy(callbackContext->replacementBytes, replacementBytes.get(), replacementBytes.size());
450     callbackContext->replacementByteCount = replacementBytes.size();
451 
452     UErrorCode errorCode = U_ZERO_ERROR;
453     ucnv_setFromUCallBack(cnv, CHARSET_ENCODER_CALLBACK, callbackContext, NULL, NULL, &errorCode);
454     // Iff callbackContextDeleter holds a reference to a callbackContext we can
455     // prevent it being automatically deleted here as responsibility for deletion
456     // has passed to the code that closes the NativeConverter.
457     callbackContextDeleter.release();
458     maybeThrowIcuException(env, "ucnv_setFromUCallBack", errorCode);
459 }
460 
decoderIgnoreCallback(const void *,UConverterToUnicodeArgs *,const char *,int32_t,UConverterCallbackReason,UErrorCode * err)461 static void decoderIgnoreCallback(const void*, UConverterToUnicodeArgs*, const char*, int32_t, UConverterCallbackReason, UErrorCode* err) {
462     // The icu4c UCNV_FROM_U_CALLBACK_SKIP callback requires that the context is NULL, which is
463     // never true for us.
464     *err = U_ZERO_ERROR;
465 }
466 
decoderReplaceCallback(const void * rawContext,UConverterToUnicodeArgs * toArgs,const char *,int32_t,UConverterCallbackReason,UErrorCode * err)467 static void decoderReplaceCallback(const void* rawContext,
468         UConverterToUnicodeArgs* toArgs, const char*, int32_t, UConverterCallbackReason,
469         UErrorCode* err) {
470     if (!rawContext) {
471         return;
472     }
473     const DecoderCallbackContext* context = reinterpret_cast<const DecoderCallbackContext*>(rawContext);
474     *err = U_ZERO_ERROR;
475     ucnv_cbToUWriteUChars(toArgs,context->replacementChars, context->replacementCharCount, 0, err);
476 }
477 
getToUCallback(int32_t mode)478 static UConverterToUCallback getToUCallback(int32_t mode) {
479     switch (mode) {
480     case NativeConverter_IGNORE: return decoderIgnoreCallback;
481     case NativeConverter_REPLACE: return decoderReplaceCallback;
482     case NativeConverter_REPORT: return UCNV_TO_U_CALLBACK_STOP;
483     }
484     abort();
485 }
486 
CHARSET_DECODER_CALLBACK(const void * rawContext,UConverterToUnicodeArgs * args,const char * codeUnits,int32_t length,UConverterCallbackReason reason,UErrorCode * status)487 static void CHARSET_DECODER_CALLBACK(const void* rawContext, UConverterToUnicodeArgs* args,
488         const char* codeUnits, int32_t length,
489         UConverterCallbackReason reason, UErrorCode* status) {
490     if (!rawContext) {
491         return;
492     }
493     const DecoderCallbackContext* ctx = reinterpret_cast<const DecoderCallbackContext*>(rawContext);
494     switch(reason) {
495     case UCNV_UNASSIGNED:
496         ctx->onUnmappableInput(ctx, args, codeUnits, length, reason, status);
497         return;
498     case UCNV_ILLEGAL:
499     case UCNV_IRREGULAR:
500         ctx->onMalformedInput(ctx, args, codeUnits, length, reason, status);
501         return;
502     case UCNV_CLOSE:
503         delete ctx;
504         return;
505     default:
506         *status = U_ILLEGAL_ARGUMENT_ERROR;
507         return;
508     }
509 }
510 
NativeConverter_setCallbackDecode(JNIEnv * env,jclass,jlong address,jint onMalformedInput,jint onUnmappableInput,jstring javaReplacement)511 static void NativeConverter_setCallbackDecode(JNIEnv* env, jclass, jlong address,
512         jint onMalformedInput, jint onUnmappableInput, jstring javaReplacement) {
513     UConverter* cnv = toUConverter(address);
514     if (cnv == NULL) {
515         maybeThrowIcuException(env, "toConverter", U_ILLEGAL_ARGUMENT_ERROR);
516         return;
517     }
518 
519     UConverterToUCallback oldCallback;
520     const void* oldCallbackContext;
521     ucnv_getToUCallBack(cnv, &oldCallback, &oldCallbackContext);
522 
523     DecoderCallbackContext* callbackContext = const_cast<DecoderCallbackContext*>(
524             reinterpret_cast<const DecoderCallbackContext*>(oldCallbackContext));
525     // Hold the reference to any new callbackContext we create in a unique_ptr
526     // so that the default behavior is to collect it automatically if we exit
527     // early.
528     std::unique_ptr<DecoderCallbackContext> callbackContextDeleter;
529     if (callbackContext == NULL) {
530         callbackContext = new DecoderCallbackContext;
531         callbackContextDeleter.reset(callbackContext);
532     }
533 
534     callbackContext->onMalformedInput = getToUCallback(onMalformedInput);
535     callbackContext->onUnmappableInput = getToUCallback(onUnmappableInput);
536 
537     ScopedStringChars replacement(env, javaReplacement);
538     if (replacement.get() == NULL
539                 || replacement.size() > sizeof(callbackContext->replacementChars) / sizeof(UChar)) {
540         maybeThrowIcuException(env, "replacement", U_ILLEGAL_ARGUMENT_ERROR);
541         return;
542     }
543     u_strncpy(callbackContext->replacementChars, reinterpret_cast<const UChar*>(replacement.get()), replacement.size());
544     callbackContext->replacementCharCount = replacement.size();
545 
546     UErrorCode errorCode = U_ZERO_ERROR;
547     ucnv_setToUCallBack(cnv, CHARSET_DECODER_CALLBACK, callbackContext, NULL, NULL, &errorCode);
548     // Iff callbackContextDeleter holds a reference to a callbackContext we can
549     // prevent it being automatically deleted here as responsibility for deletion
550     // has passed to the code that closes the NativeConverter.
551     callbackContextDeleter.release();
552     maybeThrowIcuException(env, "ucnv_setToUCallBack", errorCode);
553 }
554 
NativeConverter_getAveCharsPerByte(JNIEnv * env,jclass,jlong handle)555 static jfloat NativeConverter_getAveCharsPerByte(JNIEnv* env, jclass, jlong handle) {
556     return (1 / (jfloat) NativeConverter_getMaxBytesPerChar(env, NULL, handle));
557 }
558 
NativeConverter_getSubstitutionBytes(JNIEnv * env,jclass,jlong address)559 static jbyteArray NativeConverter_getSubstitutionBytes(JNIEnv* env, jclass, jlong address) {
560     UConverter* cnv = toUConverter(address);
561     if (cnv == NULL) {
562         return NULL;
563     }
564     UErrorCode status = U_ZERO_ERROR;
565     char replacementBytes[MAX_REPLACEMENT_LENGTH];
566     int8_t len = sizeof(replacementBytes);
567     ucnv_getSubstChars(cnv, replacementBytes, &len, &status);
568     if (!U_SUCCESS(status)) {
569         return env->NewByteArray(0);
570     }
571     jbyteArray result = env->NewByteArray(len);
572     if (result == NULL) {
573         return NULL;
574     }
575     env->SetByteArrayRegion(result, 0, len, reinterpret_cast<jbyte*>(replacementBytes));
576     return result;
577 }
578 
NativeConverter_contains(JNIEnv * env,jclass,jstring name1,jstring name2)579 static jboolean NativeConverter_contains(JNIEnv* env, jclass, jstring name1, jstring name2) {
580     ScopedUtfChars name1Chars(env, name1);
581     if (name1Chars.c_str() == NULL) {
582         return JNI_FALSE;
583     }
584     ScopedUtfChars name2Chars(env, name2);
585     if (name2Chars.c_str() == NULL) {
586         return JNI_FALSE;
587     }
588 
589     UErrorCode errorCode = U_ZERO_ERROR;
590     icu::LocalUConverterPointer converter1(ucnv_open(name1Chars.c_str(), &errorCode));
591     icu::UnicodeSet set1;
592     ucnv_getUnicodeSet(&*converter1, set1.toUSet(), UCNV_ROUNDTRIP_SET, &errorCode);
593 
594     icu::LocalUConverterPointer converter2(ucnv_open(name2Chars.c_str(), &errorCode));
595     icu::UnicodeSet set2;
596     ucnv_getUnicodeSet(&*converter2, set2.toUSet(), UCNV_ROUNDTRIP_SET, &errorCode);
597 
598     return U_SUCCESS(errorCode) && set1.containsAll(set2);
599 }
600 
NativeConverter_charsetForName(JNIEnv * env,jclass,jstring charsetName)601 static jobject NativeConverter_charsetForName(JNIEnv* env, jclass, jstring charsetName) {
602     ScopedUtfChars charsetNameChars(env, charsetName);
603     if (charsetNameChars.c_str() == NULL) {
604         return NULL;
605     }
606 
607     // Get ICU's canonical name for this charset.
608     const char* icuCanonicalName = getICUCanonicalName(charsetNameChars.c_str());
609     if (icuCanonicalName == NULL) {
610         return NULL;
611     }
612 
613     // Get Java's canonical name for this charset.
614     jstring javaCanonicalName = getJavaCanonicalName(env, icuCanonicalName);
615     if (env->ExceptionCheck()) {
616         return NULL;
617     }
618 
619     // Check that this charset is supported.
620     {
621         // ICU doesn't offer any "isSupported", so we just open and immediately close.
622         UErrorCode error = U_ZERO_ERROR;
623         icu::LocalUConverterPointer cnv(ucnv_open(icuCanonicalName, &error));
624         if (!U_SUCCESS(error)) {
625             return NULL;
626         }
627     }
628 
629     // Get the aliases for this charset.
630     std::vector<std::string> aliases;
631     if (!collectStandardNames(env, icuCanonicalName, "IANA", aliases)) {
632         return NULL;
633     }
634     if (!collectStandardNames(env, icuCanonicalName, "MIME", aliases)) {
635         return NULL;
636     }
637     if (!collectStandardNames(env, icuCanonicalName, "JAVA", aliases)) {
638         return NULL;
639     }
640     if (!collectStandardNames(env, icuCanonicalName, "WINDOWS", aliases)) {
641         return NULL;
642     }
643     jobjectArray javaAliases = toStringArray(env, aliases);
644     if (env->ExceptionCheck()) {
645         return NULL;
646     }
647 
648     // Construct the CharsetICU object.
649     static jmethodID charsetConstructor = env->GetMethodID(JniConstants::GetCharsetICUClass(env), "<init>",
650             "(Ljava/lang/String;Ljava/lang/String;[Ljava/lang/String;)V");
651     if (env->ExceptionCheck()) {
652         return NULL;
653     }
654 
655     char const * versionedIcuCanonicalName = getVersionedIcuCanonicalName(icuCanonicalName);
656     jstring versionedIcuCanonicalNameStr = env->NewStringUTF(versionedIcuCanonicalName);
657     if (env->ExceptionCheck()) {
658         return NULL;
659     }
660 
661     return env->NewObject(JniConstants::GetCharsetICUClass(env), charsetConstructor,
662             javaCanonicalName, versionedIcuCanonicalNameStr, javaAliases);
663 }
664 
FreeNativeConverter(void * converter)665 static void FreeNativeConverter(void *converter) {
666     ucnv_close(reinterpret_cast<UConverter*>(converter));
667 }
668 
NativeConverter_getNativeFinalizer(JNIEnv *,jclass)669 static jlong NativeConverter_getNativeFinalizer(JNIEnv*, jclass) {
670     return reinterpret_cast<jlong>(&FreeNativeConverter);
671 }
672 
NativeConverter_getNativeSize(JNIEnv *,jclass)673 static jlong NativeConverter_getNativeSize(JNIEnv*, jclass) {
674     // TODO: Improve estimate.
675     return 200;
676 }
677 
678 static JNINativeMethod gMethods[] = {
679     NATIVE_METHOD(NativeConverter, charsetForName, "(Ljava/lang/String;)Ljava/nio/charset/Charset;"),
680     NATIVE_METHOD(NativeConverter, closeConverter, "(J)V"),
681     NATIVE_METHOD(NativeConverter, contains, "(Ljava/lang/String;Ljava/lang/String;)Z"),
682     NATIVE_METHOD(NativeConverter, decode, "(J[BI[CI[IZ)I"),
683     NATIVE_METHOD(NativeConverter, encode, "(J[CI[BI[IZ)I"),
684     NATIVE_METHOD(NativeConverter, getAvailableCharsetNames, "()[Ljava/lang/String;"),
685     NATIVE_METHOD(NativeConverter, getAveBytesPerChar, "(J)F"),
686     NATIVE_METHOD(NativeConverter, getAveCharsPerByte, "(J)F"),
687     NATIVE_METHOD(NativeConverter, getMaxBytesPerChar, "(J)I"),
688     NATIVE_METHOD(NativeConverter, getMinBytesPerChar, "(J)I"),
689     NATIVE_METHOD(NativeConverter, getSubstitutionBytes, "(J)[B"),
690     NATIVE_METHOD(NativeConverter, openConverter, "(Ljava/lang/String;)J"),
691     NATIVE_METHOD(NativeConverter, resetByteToChar, "(J)V"),
692     NATIVE_METHOD(NativeConverter, resetCharToByte, "(J)V"),
693     NATIVE_METHOD(NativeConverter, setCallbackDecode, "(JIILjava/lang/String;)V"),
694     NATIVE_METHOD(NativeConverter, setCallbackEncode, "(JII[B)V"),
695     NATIVE_METHOD(NativeConverter, getNativeFinalizer, "()J"),
696     NATIVE_METHOD(NativeConverter, getNativeSize, "()J")
697 };
register_libcore_icu_NativeConverter(JNIEnv * env)698 void register_libcore_icu_NativeConverter(JNIEnv* env) {
699     jniRegisterNativeMethods(env, "libcore/icu/NativeConverter", gMethods, NELEM(gMethods));
700 }
701