• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /**
2  *******************************************************************************
3  * Copyright (C) 1996-2006, International Business Machines Corporation and    *
4  * others. All Rights Reserved.                                                *
5  *******************************************************************************
6  *
7  *
8  *******************************************************************************
9  */
10  /*
11   * (C) Copyright IBM Corp. 2000 - All Rights Reserved
12   *  A JNI wrapper to ICU native converter Interface
13   * @author: Ram Viswanadha
14   */
15  
16  #define LOG_TAG "NativeConverter"
17  
18  #include <stdlib.h>
19  #include <string.h>
20  
21  #include <memory>
22  #include <vector>
23  
24  #include <android/log.h>
25  #include <nativehelper/JNIHelp.h>
26  #include <nativehelper/ScopedLocalRef.h>
27  #include <nativehelper/ScopedPrimitiveArray.h>
28  #include <nativehelper/ScopedStringChars.h>
29  #include <nativehelper/ScopedUtfChars.h>
30  #include <nativehelper/jni_macros.h>
31  #include <nativehelper/toStringArray.h>
32  
33  #include "IcuUtilities.h"
34  #include "JniConstants.h"
35  #include "JniException.h"
36  #include "unicode/ucnv.h"
37  #include "unicode/ucnv_cb.h"
38  #include "unicode/uniset.h"
39  #include "unicode/ustring.h"
40  #include "unicode/utypes.h"
41  
42  #define NativeConverter_REPORT 0
43  #define NativeConverter_IGNORE 1
44  #define NativeConverter_REPLACE 2
45  
46  #define MAX_REPLACEMENT_LENGTH 32 // equivalent to UCNV_ERROR_BUFFER_LENGTH
47  
48  struct DecoderCallbackContext {
49      UChar replacementChars[MAX_REPLACEMENT_LENGTH];
50      size_t replacementCharCount;
51      UConverterToUCallback onUnmappableInput;
52      UConverterToUCallback onMalformedInput;
53  };
54  
55  struct EncoderCallbackContext {
56      char replacementBytes[MAX_REPLACEMENT_LENGTH];
57      size_t replacementByteCount;
58      UConverterFromUCallback onUnmappableInput;
59      UConverterFromUCallback onMalformedInput;
60  };
61  
toUConverter(jlong address)62  static UConverter* toUConverter(jlong address) {
63      return reinterpret_cast<UConverter*>(static_cast<uintptr_t>(address));
64  }
65  
collectStandardNames(JNIEnv * env,const char * canonicalName,const char * standard,std::vector<std::string> & result)66  static bool collectStandardNames(JNIEnv* env, const char* canonicalName, const char* standard,
67                                   std::vector<std::string>& result) {
68    UErrorCode status = U_ZERO_ERROR;
69    icu::UStringEnumeration e(ucnv_openStandardNames(canonicalName, standard, &status));
70    if (maybeThrowIcuException(env, "ucnv_openStandardNames", status)) {
71      return false;
72    }
73  
74    int32_t count = e.count(status);
75    if (maybeThrowIcuException(env, "StringEnumeration::count", status)) {
76      return false;
77    }
78  
79    for (int32_t i = 0; i < count; ++i) {
80      const icu::UnicodeString* string = e.snext(status);
81      if (maybeThrowIcuException(env, "StringEnumeration::snext", status)) {
82        return false;
83      }
84      std::string s;
85      string->toUTF8String(s);
86      if (s.find_first_of("+,") == std::string::npos) {
87        result.push_back(s);
88      }
89    }
90  
91    return true;
92  }
93  
getICUCanonicalName(const char * name)94  static const char* getICUCanonicalName(const char* name) {
95    UErrorCode error = U_ZERO_ERROR;
96    const char* canonicalName = NULL;
97    if ((canonicalName = ucnv_getCanonicalName(name, "MIME", &error)) != NULL) {
98      return canonicalName;
99    } else if ((canonicalName = ucnv_getCanonicalName(name, "IANA", &error)) != NULL) {
100      return canonicalName;
101    } else if ((canonicalName = ucnv_getCanonicalName(name, "", &error)) != NULL) {
102      return canonicalName;
103    } else if ((canonicalName = ucnv_getAlias(name, 0, &error)) != NULL) {
104      // We have some aliases in the form x-blah .. match those first.
105      return canonicalName;
106    } else if (strstr(name, "x-") == name) {
107      // Check if the converter can be opened with the name given.
108      error = U_ZERO_ERROR;
109      icu::LocalUConverterPointer cnv(ucnv_open(name + 2, &error));
110      if (U_SUCCESS(error)) {
111        return name + 2;
112      }
113    }
114    return NULL;
115  }
116  
117  // If a charset listed in the IANA Charset Registry is supported by an implementation
118  // of the Java platform then its canonical name must be the name listed in the registry.
119  // Many charsets are given more than one name in the registry, in which case the registry
120  // identifies one of the names as MIME-preferred. If a charset has more than one registry
121  // name then its canonical name must be the MIME-preferred name and the other names in
122  // the registry must be valid aliases. If a supported charset is not listed in the IANA
123  // registry then its canonical name must begin with one of the strings "X-" or "x-".
getJavaCanonicalName(JNIEnv * env,const char * icuCanonicalName)124  static jstring getJavaCanonicalName(JNIEnv* env, const char* icuCanonicalName) {
125    UErrorCode status = U_ZERO_ERROR;
126  
127    // Check to see if this is a well-known MIME or IANA name.
128    const char* cName = NULL;
129    if ((cName = ucnv_getStandardName(icuCanonicalName, "MIME", &status)) != NULL) {
130      return env->NewStringUTF(cName);
131    } else if ((cName = ucnv_getStandardName(icuCanonicalName, "IANA", &status)) != NULL) {
132      return env->NewStringUTF(cName);
133    }
134  
135    // Check to see if an alias already exists with "x-" prefix, if yes then
136    // make that the canonical name.
137    int32_t aliasCount = ucnv_countAliases(icuCanonicalName, &status);
138    for (int i = 0; i < aliasCount; ++i) {
139      const char* name = ucnv_getAlias(icuCanonicalName, i, &status);
140      if (name != NULL && name[0] == 'x' && name[1] == '-') {
141        return env->NewStringUTF(name);
142      }
143    }
144  
145    // As a last resort, prepend "x-" to any alias and make that the canonical name.
146    status = U_ZERO_ERROR;
147    const char* name = ucnv_getStandardName(icuCanonicalName, "UTR22", &status);
148    if (name == NULL && strchr(icuCanonicalName, ',') != NULL) {
149      name = ucnv_getAlias(icuCanonicalName, 1, &status);
150    }
151    // If there is no UTR22 canonical name then just return the original name.
152    if (name == NULL) {
153      name = icuCanonicalName;
154    }
155    std::unique_ptr<char[]> result(new char[2 + strlen(name) + 1]);
156    strcpy(&result[0], "x-");
157    strcat(&result[0], name);
158    return env->NewStringUTF(&result[0]);
159  }
160  
161  // Returns a canonical ICU converter name which may have a version number appended to it, based on
162  // the normal canonical name. This is used to determine the actual native converter to use (the
163  // normal unversioned name is used to determine the aliases and the Java name).
getVersionedIcuCanonicalName(char const * icuCanonicalName)164  static char const * getVersionedIcuCanonicalName(char const * icuCanonicalName) {
165    if (strcmp(icuCanonicalName, "UTF-16") == 0) {
166      // The ICU UTF-16 converter encodes strings as platform-endian bytes with a BOM. The
167      // UTF-16,version=2 one encodes as big-endian with a BOM, as what the Charset javadoc requires.
168      return "UTF-16,version=2";
169    } else {
170      return icuCanonicalName;
171    }
172  }
173  
NativeConverter_openConverter(JNIEnv * env,jclass,jstring converterName)174  static jlong NativeConverter_openConverter(JNIEnv* env, jclass, jstring converterName) {
175      ScopedUtfChars converterNameChars(env, converterName);
176      if (converterNameChars.c_str() == NULL) {
177          // Extra debugging check that we do have an exception if the we could not
178          // create a string. See b/62612946.
179          if (env->ExceptionCheck()) {
180              return 0;
181          }
182          maybeThrowIcuException(env, "openConverter", U_ILLEGAL_ARGUMENT_ERROR);
183          return 0;
184      }
185      UErrorCode status = U_ZERO_ERROR;
186      UConverter* cnv = ucnv_open(converterNameChars.c_str(), &status);
187      maybeThrowIcuException(env, "ucnv_open", status);
188      if (env->ExceptionCheck()) {
189          return 0;
190      }
191      if (cnv == NULL) {
192          // Extra debugging exception in case cnv is null but ICU did not report
193          // an error. See b/62612946.
194          maybeThrowIcuException(env, "openConverter", U_ILLEGAL_ARGUMENT_ERROR);
195          return 0;
196      }
197      return reinterpret_cast<uintptr_t>(cnv);
198  }
199  
NativeConverter_closeConverter(JNIEnv *,jclass,jlong address)200  static void NativeConverter_closeConverter(JNIEnv*, jclass, jlong address) {
201      ucnv_close(toUConverter(address));
202  }
203  
shouldCodecThrow(jboolean flush,UErrorCode error)204  static bool shouldCodecThrow(jboolean flush, UErrorCode error) {
205      if (flush) {
206          return (error != U_BUFFER_OVERFLOW_ERROR && error != U_TRUNCATED_CHAR_FOUND);
207      } else {
208          return (error != U_BUFFER_OVERFLOW_ERROR && error != U_INVALID_CHAR_FOUND && error != U_ILLEGAL_CHAR_FOUND);
209      }
210  }
211  
NativeConverter_encode(JNIEnv * env,jclass,jlong address,jcharArray source,jint sourceEnd,jbyteArray target,jint targetEnd,jintArray data,jboolean flush)212  static jint NativeConverter_encode(JNIEnv* env, jclass, jlong address,
213          jcharArray source, jint sourceEnd, jbyteArray target, jint targetEnd,
214          jintArray data, jboolean flush) {
215  
216      UConverter* cnv = toUConverter(address);
217      if (cnv == NULL) {
218          maybeThrowIcuException(env, "toUConverter", U_ILLEGAL_ARGUMENT_ERROR);
219          return U_ILLEGAL_ARGUMENT_ERROR;
220      }
221      ScopedCharArrayRO uSource(env, source);
222      if (uSource.get() == NULL) {
223          maybeThrowIcuException(env, "uSource", U_ILLEGAL_ARGUMENT_ERROR);
224          return U_ILLEGAL_ARGUMENT_ERROR;
225      }
226      ScopedByteArrayRW uTarget(env, target);
227      if (uTarget.get() == NULL) {
228          maybeThrowIcuException(env, "uTarget", U_ILLEGAL_ARGUMENT_ERROR);
229          return U_ILLEGAL_ARGUMENT_ERROR;
230      }
231      ScopedIntArrayRW myData(env, data);
232      if (myData.get() == NULL) {
233          maybeThrowIcuException(env, "myData", U_ILLEGAL_ARGUMENT_ERROR);
234          return U_ILLEGAL_ARGUMENT_ERROR;
235      }
236  
237      // Do the conversion.
238      jint* sourceOffset = &myData[0];
239      jint* targetOffset = &myData[1];
240      const jchar* mySource = uSource.get() + *sourceOffset;
241      const UChar* mySourceLimit= reinterpret_cast<const UChar*>(uSource.get()) + sourceEnd;
242      char* cTarget = reinterpret_cast<char*>(uTarget.get() + *targetOffset);
243      const char* cTargetLimit = reinterpret_cast<const char*>(uTarget.get() + targetEnd);
244      UErrorCode errorCode = U_ZERO_ERROR;
245      ucnv_fromUnicode(cnv, &cTarget, cTargetLimit, reinterpret_cast<const UChar**>(&mySource), mySourceLimit, NULL, (UBool) flush, &errorCode);
246      *sourceOffset = (mySource - uSource.get()) - *sourceOffset;
247      *targetOffset = (reinterpret_cast<jbyte*>(cTarget) - uTarget.get());
248  
249      // If there was an error, count the problematic characters.
250      if (errorCode == U_ILLEGAL_CHAR_FOUND || errorCode == U_INVALID_CHAR_FOUND ||
251          errorCode == U_TRUNCATED_CHAR_FOUND) {
252          int8_t invalidUCharCount = 32;
253          UChar invalidUChars[32];
254          UErrorCode minorErrorCode = U_ZERO_ERROR;
255          ucnv_getInvalidUChars(cnv, invalidUChars, &invalidUCharCount, &minorErrorCode);
256          if (U_SUCCESS(minorErrorCode)) {
257              myData[2] = invalidUCharCount;
258          }
259      }
260  
261      // Managed code handles some cases; throw all other errors.
262      if (shouldCodecThrow(flush, errorCode)) {
263          maybeThrowIcuException(env, "ucnv_fromUnicode", errorCode);
264      }
265      return errorCode;
266  }
267  
NativeConverter_decode(JNIEnv * env,jclass,jlong address,jbyteArray source,jint sourceEnd,jcharArray target,jint targetEnd,jintArray data,jboolean flush)268  static jint NativeConverter_decode(JNIEnv* env, jclass, jlong address,
269          jbyteArray source, jint sourceEnd, jcharArray target, jint targetEnd,
270          jintArray data, jboolean flush) {
271  
272      UConverter* cnv = toUConverter(address);
273      if (cnv == NULL) {
274          maybeThrowIcuException(env, "toUConverter", U_ILLEGAL_ARGUMENT_ERROR);
275          return U_ILLEGAL_ARGUMENT_ERROR;
276      }
277      ScopedByteArrayRO uSource(env, source);
278      if (uSource.get() == NULL) {
279          maybeThrowIcuException(env, "uSource", U_ILLEGAL_ARGUMENT_ERROR);
280          return U_ILLEGAL_ARGUMENT_ERROR;
281      }
282      ScopedCharArrayRW uTarget(env, target);
283      if (uTarget.get() == NULL) {
284          maybeThrowIcuException(env, "uTarget", U_ILLEGAL_ARGUMENT_ERROR);
285          return U_ILLEGAL_ARGUMENT_ERROR;
286      }
287      ScopedIntArrayRW myData(env, data);
288      if (myData.get() == NULL) {
289          maybeThrowIcuException(env, "myData", U_ILLEGAL_ARGUMENT_ERROR);
290          return U_ILLEGAL_ARGUMENT_ERROR;
291      }
292  
293      // Do the conversion.
294      jint* sourceOffset = &myData[0];
295      jint* targetOffset = &myData[1];
296      const char* mySource = reinterpret_cast<const char*>(uSource.get() + *sourceOffset);
297      const char* mySourceLimit = reinterpret_cast<const char*>(uSource.get() + sourceEnd);
298      UChar* cTarget = reinterpret_cast<UChar*>(uTarget.get()) + *targetOffset;
299      const UChar* cTargetLimit = reinterpret_cast<UChar*>(uTarget.get()) + targetEnd;
300      UErrorCode errorCode = U_ZERO_ERROR;
301      ucnv_toUnicode(cnv, &cTarget, cTargetLimit, &mySource, mySourceLimit, NULL, flush, &errorCode);
302      *sourceOffset = mySource - reinterpret_cast<const char*>(uSource.get()) - *sourceOffset;
303      *targetOffset = cTarget - reinterpret_cast<UChar*>(uTarget.get()) - *targetOffset;
304  
305      // If there was an error, count the problematic bytes.
306      if (errorCode == U_ILLEGAL_CHAR_FOUND || errorCode == U_INVALID_CHAR_FOUND ||
307          errorCode == U_TRUNCATED_CHAR_FOUND) {
308          int8_t invalidByteCount = 32;
309          char invalidBytes[32] = {'\0'};
310          UErrorCode minorErrorCode = U_ZERO_ERROR;
311          ucnv_getInvalidChars(cnv, invalidBytes, &invalidByteCount, &minorErrorCode);
312          if (U_SUCCESS(minorErrorCode)) {
313              myData[2] = invalidByteCount;
314          }
315      }
316  
317      // Managed code handles some cases; throw all other errors.
318      if (shouldCodecThrow(flush, errorCode)) {
319          maybeThrowIcuException(env, "ucnv_toUnicode", errorCode);
320      }
321      return errorCode;
322  }
323  
NativeConverter_resetByteToChar(JNIEnv *,jclass,jlong address)324  static void NativeConverter_resetByteToChar(JNIEnv*, jclass, jlong address) {
325      UConverter* cnv = toUConverter(address);
326      if (cnv) {
327          ucnv_resetToUnicode(cnv);
328      }
329  }
330  
NativeConverter_resetCharToByte(JNIEnv *,jclass,jlong address)331  static void NativeConverter_resetCharToByte(JNIEnv*, jclass, jlong address) {
332      UConverter* cnv = toUConverter(address);
333      if (cnv) {
334          ucnv_resetFromUnicode(cnv);
335      }
336  }
337  
NativeConverter_getMaxBytesPerChar(JNIEnv *,jclass,jlong address)338  static jint NativeConverter_getMaxBytesPerChar(JNIEnv*, jclass, jlong address) {
339      UConverter* cnv = toUConverter(address);
340      return (cnv != NULL) ? ucnv_getMaxCharSize(cnv) : -1;
341  }
342  
NativeConverter_getMinBytesPerChar(JNIEnv *,jclass,jlong address)343  static jint NativeConverter_getMinBytesPerChar(JNIEnv*, jclass, jlong address) {
344      UConverter* cnv = toUConverter(address);
345      return (cnv != NULL) ? ucnv_getMinCharSize(cnv) : -1;
346  }
347  
NativeConverter_getAveBytesPerChar(JNIEnv *,jclass,jlong address)348  static jfloat NativeConverter_getAveBytesPerChar(JNIEnv*, jclass, jlong address) {
349      UConverter* cnv = toUConverter(address);
350      return (cnv != NULL) ? ((ucnv_getMaxCharSize(cnv) + ucnv_getMinCharSize(cnv)) / 2.0) : -1;
351  }
352  
NativeConverter_getAvailableCharsetNames(JNIEnv * env,jclass)353  static jobjectArray NativeConverter_getAvailableCharsetNames(JNIEnv* env, jclass) {
354      int32_t num = ucnv_countAvailable();
355      jobjectArray result = env->NewObjectArray(num, JniConstants::GetStringClass(env), NULL);
356      if (result == NULL) {
357          return NULL;
358      }
359      for (int i = 0; i < num; ++i) {
360          const char* name = ucnv_getAvailableName(i);
361          ScopedLocalRef<jstring> javaCanonicalName(env, getJavaCanonicalName(env, name));
362          if (javaCanonicalName.get() == NULL) {
363              return NULL;
364          }
365          env->SetObjectArrayElement(result, i, javaCanonicalName.get());
366          if (env->ExceptionCheck()) {
367              return NULL;
368          }
369      }
370      return result;
371  }
372  
CHARSET_ENCODER_CALLBACK(const void * rawContext,UConverterFromUnicodeArgs * args,const UChar * codeUnits,int32_t length,UChar32 codePoint,UConverterCallbackReason reason,UErrorCode * status)373  static void CHARSET_ENCODER_CALLBACK(const void* rawContext, UConverterFromUnicodeArgs* args,
374          const UChar* codeUnits, int32_t length, UChar32 codePoint, UConverterCallbackReason reason,
375          UErrorCode* status) {
376      if (!rawContext) {
377          return;
378      }
379      const EncoderCallbackContext* ctx = reinterpret_cast<const EncoderCallbackContext*>(rawContext);
380      switch(reason) {
381      case UCNV_UNASSIGNED:
382          ctx->onUnmappableInput(ctx, args, codeUnits, length, codePoint, reason, status);
383          return;
384      case UCNV_ILLEGAL:
385      case UCNV_IRREGULAR:
386          ctx->onMalformedInput(ctx, args, codeUnits, length, codePoint, reason, status);
387          return;
388      case UCNV_CLOSE:
389          delete ctx;
390          return;
391      default:
392          *status = U_ILLEGAL_ARGUMENT_ERROR;
393          return;
394      }
395  }
396  
encoderReplaceCallback(const void * rawContext,UConverterFromUnicodeArgs * fromArgs,const UChar *,int32_t,UChar32,UConverterCallbackReason,UErrorCode * err)397  static void encoderReplaceCallback(const void* rawContext,
398          UConverterFromUnicodeArgs* fromArgs, const UChar*, int32_t, UChar32,
399          UConverterCallbackReason, UErrorCode * err) {
400      if (rawContext == NULL) {
401          return;
402      }
403      const EncoderCallbackContext* context = reinterpret_cast<const EncoderCallbackContext*>(rawContext);
404      *err = U_ZERO_ERROR;
405      ucnv_cbFromUWriteBytes(fromArgs, context->replacementBytes, context->replacementByteCount, 0, err);
406  }
407  
getFromUCallback(int32_t mode)408  static UConverterFromUCallback getFromUCallback(int32_t mode) {
409      switch(mode) {
410      case NativeConverter_IGNORE: return UCNV_FROM_U_CALLBACK_SKIP;
411      case NativeConverter_REPLACE: return encoderReplaceCallback;
412      case NativeConverter_REPORT: return UCNV_FROM_U_CALLBACK_STOP;
413      }
414      abort();
415  }
416  
NativeConverter_setCallbackEncode(JNIEnv * env,jclass,jlong address,jint onMalformedInput,jint onUnmappableInput,jbyteArray javaReplacement)417  static void NativeConverter_setCallbackEncode(JNIEnv* env, jclass, jlong address,
418          jint onMalformedInput, jint onUnmappableInput, jbyteArray javaReplacement) {
419      UConverter* cnv = toUConverter(address);
420      if (cnv == NULL) {
421          maybeThrowIcuException(env, "toUConverter", U_ILLEGAL_ARGUMENT_ERROR);
422          return;
423      }
424  
425      UConverterFromUCallback oldCallback = NULL;
426      const void* oldCallbackContext = NULL;
427      ucnv_getFromUCallBack(cnv, &oldCallback, const_cast<const void**>(&oldCallbackContext));
428  
429      EncoderCallbackContext* callbackContext = const_cast<EncoderCallbackContext*>(
430              reinterpret_cast<const EncoderCallbackContext*>(oldCallbackContext));
431      // Hold the reference to any new callbackContext we create in a unique_ptr
432      // so that the default behavior is to collect it automatically if we exit
433      // early.
434      std::unique_ptr<EncoderCallbackContext> callbackContextDeleter;
435      if (callbackContext == NULL) {
436          callbackContext = new EncoderCallbackContext;
437          callbackContextDeleter.reset(callbackContext);
438      }
439  
440      callbackContext->onMalformedInput = getFromUCallback(onMalformedInput);
441      callbackContext->onUnmappableInput = getFromUCallback(onUnmappableInput);
442  
443      ScopedByteArrayRO replacementBytes(env, javaReplacement);
444      if (replacementBytes.get() == NULL
445              || replacementBytes.size() > sizeof(callbackContext->replacementBytes)) {
446          maybeThrowIcuException(env, "replacementBytes", U_ILLEGAL_ARGUMENT_ERROR);
447          return;
448      }
449      memcpy(callbackContext->replacementBytes, replacementBytes.get(), replacementBytes.size());
450      callbackContext->replacementByteCount = replacementBytes.size();
451  
452      UErrorCode errorCode = U_ZERO_ERROR;
453      ucnv_setFromUCallBack(cnv, CHARSET_ENCODER_CALLBACK, callbackContext, NULL, NULL, &errorCode);
454      // Iff callbackContextDeleter holds a reference to a callbackContext we can
455      // prevent it being automatically deleted here as responsibility for deletion
456      // has passed to the code that closes the NativeConverter.
457      callbackContextDeleter.release();
458      maybeThrowIcuException(env, "ucnv_setFromUCallBack", errorCode);
459  }
460  
decoderIgnoreCallback(const void *,UConverterToUnicodeArgs *,const char *,int32_t,UConverterCallbackReason,UErrorCode * err)461  static void decoderIgnoreCallback(const void*, UConverterToUnicodeArgs*, const char*, int32_t, UConverterCallbackReason, UErrorCode* err) {
462      // The icu4c UCNV_FROM_U_CALLBACK_SKIP callback requires that the context is NULL, which is
463      // never true for us.
464      *err = U_ZERO_ERROR;
465  }
466  
decoderReplaceCallback(const void * rawContext,UConverterToUnicodeArgs * toArgs,const char *,int32_t,UConverterCallbackReason,UErrorCode * err)467  static void decoderReplaceCallback(const void* rawContext,
468          UConverterToUnicodeArgs* toArgs, const char*, int32_t, UConverterCallbackReason,
469          UErrorCode* err) {
470      if (!rawContext) {
471          return;
472      }
473      const DecoderCallbackContext* context = reinterpret_cast<const DecoderCallbackContext*>(rawContext);
474      *err = U_ZERO_ERROR;
475      ucnv_cbToUWriteUChars(toArgs,context->replacementChars, context->replacementCharCount, 0, err);
476  }
477  
getToUCallback(int32_t mode)478  static UConverterToUCallback getToUCallback(int32_t mode) {
479      switch (mode) {
480      case NativeConverter_IGNORE: return decoderIgnoreCallback;
481      case NativeConverter_REPLACE: return decoderReplaceCallback;
482      case NativeConverter_REPORT: return UCNV_TO_U_CALLBACK_STOP;
483      }
484      abort();
485  }
486  
CHARSET_DECODER_CALLBACK(const void * rawContext,UConverterToUnicodeArgs * args,const char * codeUnits,int32_t length,UConverterCallbackReason reason,UErrorCode * status)487  static void CHARSET_DECODER_CALLBACK(const void* rawContext, UConverterToUnicodeArgs* args,
488          const char* codeUnits, int32_t length,
489          UConverterCallbackReason reason, UErrorCode* status) {
490      if (!rawContext) {
491          return;
492      }
493      const DecoderCallbackContext* ctx = reinterpret_cast<const DecoderCallbackContext*>(rawContext);
494      switch(reason) {
495      case UCNV_UNASSIGNED:
496          ctx->onUnmappableInput(ctx, args, codeUnits, length, reason, status);
497          return;
498      case UCNV_ILLEGAL:
499      case UCNV_IRREGULAR:
500          ctx->onMalformedInput(ctx, args, codeUnits, length, reason, status);
501          return;
502      case UCNV_CLOSE:
503          delete ctx;
504          return;
505      default:
506          *status = U_ILLEGAL_ARGUMENT_ERROR;
507          return;
508      }
509  }
510  
NativeConverter_setCallbackDecode(JNIEnv * env,jclass,jlong address,jint onMalformedInput,jint onUnmappableInput,jstring javaReplacement)511  static void NativeConverter_setCallbackDecode(JNIEnv* env, jclass, jlong address,
512          jint onMalformedInput, jint onUnmappableInput, jstring javaReplacement) {
513      UConverter* cnv = toUConverter(address);
514      if (cnv == NULL) {
515          maybeThrowIcuException(env, "toConverter", U_ILLEGAL_ARGUMENT_ERROR);
516          return;
517      }
518  
519      UConverterToUCallback oldCallback;
520      const void* oldCallbackContext;
521      ucnv_getToUCallBack(cnv, &oldCallback, &oldCallbackContext);
522  
523      DecoderCallbackContext* callbackContext = const_cast<DecoderCallbackContext*>(
524              reinterpret_cast<const DecoderCallbackContext*>(oldCallbackContext));
525      // Hold the reference to any new callbackContext we create in a unique_ptr
526      // so that the default behavior is to collect it automatically if we exit
527      // early.
528      std::unique_ptr<DecoderCallbackContext> callbackContextDeleter;
529      if (callbackContext == NULL) {
530          callbackContext = new DecoderCallbackContext;
531          callbackContextDeleter.reset(callbackContext);
532      }
533  
534      callbackContext->onMalformedInput = getToUCallback(onMalformedInput);
535      callbackContext->onUnmappableInput = getToUCallback(onUnmappableInput);
536  
537      ScopedStringChars replacement(env, javaReplacement);
538      if (replacement.get() == NULL
539                  || replacement.size() > sizeof(callbackContext->replacementChars) / sizeof(UChar)) {
540          maybeThrowIcuException(env, "replacement", U_ILLEGAL_ARGUMENT_ERROR);
541          return;
542      }
543      u_strncpy(callbackContext->replacementChars, reinterpret_cast<const UChar*>(replacement.get()), replacement.size());
544      callbackContext->replacementCharCount = replacement.size();
545  
546      UErrorCode errorCode = U_ZERO_ERROR;
547      ucnv_setToUCallBack(cnv, CHARSET_DECODER_CALLBACK, callbackContext, NULL, NULL, &errorCode);
548      // Iff callbackContextDeleter holds a reference to a callbackContext we can
549      // prevent it being automatically deleted here as responsibility for deletion
550      // has passed to the code that closes the NativeConverter.
551      callbackContextDeleter.release();
552      maybeThrowIcuException(env, "ucnv_setToUCallBack", errorCode);
553  }
554  
NativeConverter_getAveCharsPerByte(JNIEnv * env,jclass,jlong handle)555  static jfloat NativeConverter_getAveCharsPerByte(JNIEnv* env, jclass, jlong handle) {
556      return (1 / (jfloat) NativeConverter_getMaxBytesPerChar(env, NULL, handle));
557  }
558  
NativeConverter_getSubstitutionBytes(JNIEnv * env,jclass,jlong address)559  static jbyteArray NativeConverter_getSubstitutionBytes(JNIEnv* env, jclass, jlong address) {
560      UConverter* cnv = toUConverter(address);
561      if (cnv == NULL) {
562          return NULL;
563      }
564      UErrorCode status = U_ZERO_ERROR;
565      char replacementBytes[MAX_REPLACEMENT_LENGTH];
566      int8_t len = sizeof(replacementBytes);
567      ucnv_getSubstChars(cnv, replacementBytes, &len, &status);
568      if (!U_SUCCESS(status)) {
569          return env->NewByteArray(0);
570      }
571      jbyteArray result = env->NewByteArray(len);
572      if (result == NULL) {
573          return NULL;
574      }
575      env->SetByteArrayRegion(result, 0, len, reinterpret_cast<jbyte*>(replacementBytes));
576      return result;
577  }
578  
NativeConverter_contains(JNIEnv * env,jclass,jstring name1,jstring name2)579  static jboolean NativeConverter_contains(JNIEnv* env, jclass, jstring name1, jstring name2) {
580      ScopedUtfChars name1Chars(env, name1);
581      if (name1Chars.c_str() == NULL) {
582          return JNI_FALSE;
583      }
584      ScopedUtfChars name2Chars(env, name2);
585      if (name2Chars.c_str() == NULL) {
586          return JNI_FALSE;
587      }
588  
589      UErrorCode errorCode = U_ZERO_ERROR;
590      icu::LocalUConverterPointer converter1(ucnv_open(name1Chars.c_str(), &errorCode));
591      icu::UnicodeSet set1;
592      ucnv_getUnicodeSet(&*converter1, set1.toUSet(), UCNV_ROUNDTRIP_SET, &errorCode);
593  
594      icu::LocalUConverterPointer converter2(ucnv_open(name2Chars.c_str(), &errorCode));
595      icu::UnicodeSet set2;
596      ucnv_getUnicodeSet(&*converter2, set2.toUSet(), UCNV_ROUNDTRIP_SET, &errorCode);
597  
598      return U_SUCCESS(errorCode) && set1.containsAll(set2);
599  }
600  
NativeConverter_charsetForName(JNIEnv * env,jclass,jstring charsetName)601  static jobject NativeConverter_charsetForName(JNIEnv* env, jclass, jstring charsetName) {
602      ScopedUtfChars charsetNameChars(env, charsetName);
603      if (charsetNameChars.c_str() == NULL) {
604          return NULL;
605      }
606  
607      // Get ICU's canonical name for this charset.
608      const char* icuCanonicalName = getICUCanonicalName(charsetNameChars.c_str());
609      if (icuCanonicalName == NULL) {
610          return NULL;
611      }
612  
613      // Get Java's canonical name for this charset.
614      jstring javaCanonicalName = getJavaCanonicalName(env, icuCanonicalName);
615      if (env->ExceptionCheck()) {
616          return NULL;
617      }
618  
619      // Check that this charset is supported.
620      {
621          // ICU doesn't offer any "isSupported", so we just open and immediately close.
622          UErrorCode error = U_ZERO_ERROR;
623          icu::LocalUConverterPointer cnv(ucnv_open(icuCanonicalName, &error));
624          if (!U_SUCCESS(error)) {
625              return NULL;
626          }
627      }
628  
629      // Get the aliases for this charset.
630      std::vector<std::string> aliases;
631      if (!collectStandardNames(env, icuCanonicalName, "IANA", aliases)) {
632          return NULL;
633      }
634      if (!collectStandardNames(env, icuCanonicalName, "MIME", aliases)) {
635          return NULL;
636      }
637      if (!collectStandardNames(env, icuCanonicalName, "JAVA", aliases)) {
638          return NULL;
639      }
640      if (!collectStandardNames(env, icuCanonicalName, "WINDOWS", aliases)) {
641          return NULL;
642      }
643      jobjectArray javaAliases = toStringArray(env, aliases);
644      if (env->ExceptionCheck()) {
645          return NULL;
646      }
647  
648      // Construct the CharsetICU object.
649      static jmethodID charsetConstructor = env->GetMethodID(JniConstants::GetCharsetICUClass(env), "<init>",
650              "(Ljava/lang/String;Ljava/lang/String;[Ljava/lang/String;)V");
651      if (env->ExceptionCheck()) {
652          return NULL;
653      }
654  
655      char const * versionedIcuCanonicalName = getVersionedIcuCanonicalName(icuCanonicalName);
656      jstring versionedIcuCanonicalNameStr = env->NewStringUTF(versionedIcuCanonicalName);
657      if (env->ExceptionCheck()) {
658          return NULL;
659      }
660  
661      return env->NewObject(JniConstants::GetCharsetICUClass(env), charsetConstructor,
662              javaCanonicalName, versionedIcuCanonicalNameStr, javaAliases);
663  }
664  
FreeNativeConverter(void * converter)665  static void FreeNativeConverter(void *converter) {
666      ucnv_close(reinterpret_cast<UConverter*>(converter));
667  }
668  
NativeConverter_getNativeFinalizer(JNIEnv *,jclass)669  static jlong NativeConverter_getNativeFinalizer(JNIEnv*, jclass) {
670      return reinterpret_cast<jlong>(&FreeNativeConverter);
671  }
672  
NativeConverter_getNativeSize(JNIEnv *,jclass)673  static jlong NativeConverter_getNativeSize(JNIEnv*, jclass) {
674      // TODO: Improve estimate.
675      return 200;
676  }
677  
678  static JNINativeMethod gMethods[] = {
679      NATIVE_METHOD(NativeConverter, charsetForName, "(Ljava/lang/String;)Ljava/nio/charset/Charset;"),
680      NATIVE_METHOD(NativeConverter, closeConverter, "(J)V"),
681      NATIVE_METHOD(NativeConverter, contains, "(Ljava/lang/String;Ljava/lang/String;)Z"),
682      NATIVE_METHOD(NativeConverter, decode, "(J[BI[CI[IZ)I"),
683      NATIVE_METHOD(NativeConverter, encode, "(J[CI[BI[IZ)I"),
684      NATIVE_METHOD(NativeConverter, getAvailableCharsetNames, "()[Ljava/lang/String;"),
685      NATIVE_METHOD(NativeConverter, getAveBytesPerChar, "(J)F"),
686      NATIVE_METHOD(NativeConverter, getAveCharsPerByte, "(J)F"),
687      NATIVE_METHOD(NativeConverter, getMaxBytesPerChar, "(J)I"),
688      NATIVE_METHOD(NativeConverter, getMinBytesPerChar, "(J)I"),
689      NATIVE_METHOD(NativeConverter, getSubstitutionBytes, "(J)[B"),
690      NATIVE_METHOD(NativeConverter, openConverter, "(Ljava/lang/String;)J"),
691      NATIVE_METHOD(NativeConverter, resetByteToChar, "(J)V"),
692      NATIVE_METHOD(NativeConverter, resetCharToByte, "(J)V"),
693      NATIVE_METHOD(NativeConverter, setCallbackDecode, "(JIILjava/lang/String;)V"),
694      NATIVE_METHOD(NativeConverter, setCallbackEncode, "(JII[B)V"),
695      NATIVE_METHOD(NativeConverter, getNativeFinalizer, "()J"),
696      NATIVE_METHOD(NativeConverter, getNativeSize, "()J")
697  };
register_libcore_icu_NativeConverter(JNIEnv * env)698  void register_libcore_icu_NativeConverter(JNIEnv* env) {
699      jniRegisterNativeMethods(env, "libcore/icu/NativeConverter", gMethods, NELEM(gMethods));
700  }
701