1 /*
2  * Copyright (C) 2010 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "libcore_util_CharsetUtils.h"
18 
19 #include <string.h>
20 
21 #include "jni_internal.h"
22 #include "mirror/string-inl.h"
23 #include "mirror/string.h"
24 #include "native_util.h"
25 #include "nativehelper/scoped_primitive_array.h"
26 #include "nativehelper/jni_macros.h"
27 #include "scoped_fast_native_object_access-inl.h"
28 #include "unicode/utf16.h"
29 
30 namespace art {
31 
32 /**
33  * Approximates java.lang.UnsafeByteSequence so we don't have to pay the cost of calling back into
34  * Java when converting a char[] to a UTF-8 byte[]. This lets us have UTF-8 conversions slightly
35  * faster than ICU for large char[]s without paying for the NIO overhead with small char[]s.
36  *
37  * We could avoid this by keeping the UTF-8 bytes on the native heap until we're done and only
38  * creating a byte[] on the Java heap when we know how big it needs to be, but one shouldn't lie
39  * to the garbage collector (nor hide potentially large allocations from it).
40  *
41  * Because a call to append might require an allocation, it might fail. Callers should always
42  * check the return value of append.
43  */
44 class NativeUnsafeByteSequence {
45  public:
NativeUnsafeByteSequence(JNIEnv * env)46   explicit NativeUnsafeByteSequence(JNIEnv* env)
47     : mEnv(env), mJavaArray(nullptr), mRawArray(nullptr), mSize(-1), mOffset(0) {
48   }
49 
~NativeUnsafeByteSequence()50   ~NativeUnsafeByteSequence() {
51     // Release our pointer to the raw array, copying changes back to the Java heap.
52     if (mRawArray != nullptr) {
53       mEnv->ReleaseByteArrayElements(mJavaArray, mRawArray, 0);
54     }
55   }
56 
append(jbyte b)57   bool append(jbyte b) {
58     if (mOffset == mSize && !resize(mSize * 2)) {
59       return false;
60     }
61     mRawArray[mOffset++] = b;
62     return true;
63   }
64 
resize(int newSize)65   bool resize(int newSize) {
66     if (newSize == mSize) {
67       return true;
68     }
69 
70     // Allocate a new array.
71     jbyteArray newJavaArray = mEnv->NewByteArray(newSize);
72     if (newJavaArray == nullptr) {
73       return false;
74     }
75     jbyte* newRawArray = mEnv->GetByteArrayElements(newJavaArray, nullptr);
76     if (newRawArray == nullptr) {
77       return false;
78     }
79 
80     // Copy data out of the old array and then let go of it.
81     // Note that we may be trimming the array.
82     if (mRawArray != nullptr) {
83       memcpy(newRawArray, mRawArray, mOffset);
84       mEnv->ReleaseByteArrayElements(mJavaArray, mRawArray, JNI_ABORT);
85       mEnv->DeleteLocalRef(mJavaArray);
86     }
87 
88     // Point ourselves at the new array.
89     mJavaArray = newJavaArray;
90     mRawArray = newRawArray;
91     mSize = newSize;
92     return true;
93   }
94 
toByteArray()95   jbyteArray toByteArray() {
96     // Trim any unused space, if necessary.
97     bool okay = resize(mOffset);
98     return okay ? mJavaArray : nullptr;
99   }
100 
101  private:
102   JNIEnv* mEnv;
103   jbyteArray mJavaArray;
104   jbyte* mRawArray;
105   jint mSize;
106   jint mOffset;
107 
108   // Disallow copy and assignment.
109   NativeUnsafeByteSequence(const NativeUnsafeByteSequence&);
110   void operator=(const NativeUnsafeByteSequence&);
111 };
112 
CharsetUtils_asciiBytesToChars(JNIEnv * env,jclass,jbyteArray javaBytes,jint offset,jint length,jcharArray javaChars)113 static void CharsetUtils_asciiBytesToChars(JNIEnv* env, jclass, jbyteArray javaBytes, jint offset,
114                                            jint length, jcharArray javaChars) {
115   ScopedByteArrayRO bytes(env, javaBytes);
116   if (bytes.get() == nullptr) {
117     return;
118   }
119   ScopedCharArrayRW chars(env, javaChars);
120   if (chars.get() == nullptr) {
121     return;
122   }
123 
124   const jbyte* src = &bytes[offset];
125   jchar* dst = &chars[0];
126   static const jchar REPLACEMENT_CHAR = 0xfffd;
127   for (int i = length - 1; i >= 0; --i) {
128     jchar ch = static_cast<jchar>(*src++ & 0xff);
129     *dst++ = (ch <= 0x7f) ? ch : REPLACEMENT_CHAR;
130   }
131 }
132 
CharsetUtils_isoLatin1BytesToChars(JNIEnv * env,jclass,jbyteArray javaBytes,jint offset,jint length,jcharArray javaChars)133 static void CharsetUtils_isoLatin1BytesToChars(JNIEnv* env, jclass, jbyteArray javaBytes,
134                                                jint offset, jint length, jcharArray javaChars) {
135   ScopedByteArrayRO bytes(env, javaBytes);
136   if (bytes.get() == nullptr) {
137     return;
138   }
139   ScopedCharArrayRW chars(env, javaChars);
140   if (chars.get() == nullptr) {
141     return;
142   }
143 
144   const jbyte* src = &bytes[offset];
145   jchar* dst = &chars[0];
146   for (int i = length - 1; i >= 0; --i) {
147     *dst++ = static_cast<jchar>(*src++ & 0xff);
148   }
149 }
150 
151 /**
152  * Translates the given characters to US-ASCII or ISO-8859-1 bytes, using the fact that
153  * Unicode code points between U+0000 and U+007f inclusive are identical to US-ASCII, while
154  * U+0000 to U+00ff inclusive are identical to ISO-8859-1.
155  */
charsToBytes(JNIEnv * env,jstring java_string,jint offset,jint length,jchar maxValidChar)156 static jbyteArray charsToBytes(JNIEnv* env, jstring java_string, jint offset, jint length,
157                                jchar maxValidChar) {
158   ScopedObjectAccess soa(env);
159   StackHandleScope<1> hs(soa.Self());
160   Handle<mirror::String> string(hs.NewHandle(soa.Decode<mirror::String>(java_string)));
161   if (string == nullptr) {
162     return nullptr;
163   }
164 
165   jbyteArray javaBytes = env->NewByteArray(length);
166   ScopedByteArrayRW bytes(env, javaBytes);
167   if (bytes.get() == nullptr) {
168     return nullptr;
169   }
170 
171   jbyte* dst = &bytes[0];
172   for (int i = 0; i < length; ++i) {
173     jchar ch = string->CharAt(offset + i);
174     if (ch > maxValidChar) {
175       ch = '?';
176     }
177     *dst++ = static_cast<jbyte>(ch);
178   }
179 
180   return javaBytes;
181 }
182 
CharsetUtils_toAsciiBytes(JNIEnv * env,jclass,jstring java_string,jint offset,jint length)183 static jbyteArray CharsetUtils_toAsciiBytes(JNIEnv* env, jclass, jstring java_string, jint offset,
184                                             jint length) {
185     return charsToBytes(env, java_string, offset, length, 0x7f);
186 }
187 
CharsetUtils_toIsoLatin1Bytes(JNIEnv * env,jclass,jstring java_string,jint offset,jint length)188 static jbyteArray CharsetUtils_toIsoLatin1Bytes(JNIEnv* env, jclass, jstring java_string,
189                                                 jint offset, jint length) {
190     return charsToBytes(env, java_string, offset, length, 0xff);
191 }
192 
CharsetUtils_toUtf8Bytes(JNIEnv * env,jclass,jstring java_string,jint offset,jint length)193 static jbyteArray CharsetUtils_toUtf8Bytes(JNIEnv* env, jclass, jstring java_string, jint offset,
194                                            jint length) {
195   ScopedObjectAccess soa(env);
196   StackHandleScope<1> hs(soa.Self());
197   Handle<mirror::String> string(hs.NewHandle(soa.Decode<mirror::String>(java_string)));
198   if (string == nullptr) {
199     return nullptr;
200   }
201 
202   NativeUnsafeByteSequence out(env);
203   if (!out.resize(length)) {
204     return nullptr;
205   }
206 
207   const int end = offset + length;
208   for (int i = offset; i < end; ++i) {
209     jint ch = string->CharAt(i);
210     if (ch < 0x80) {
211       // One byte.
212       if (!out.append(ch)) {
213         return nullptr;
214       }
215     } else if (ch < 0x800) {
216       // Two bytes.
217       if (!out.append((ch >> 6) | 0xc0) || !out.append((ch & 0x3f) | 0x80)) {
218         return nullptr;
219       }
220     } else if (U16_IS_SURROGATE(ch)) {
221       // A supplementary character.
222       jchar high = static_cast<jchar>(ch);
223       jchar low = (i + 1 != end) ? string->CharAt(i + 1) : 0;
224       if (!U16_IS_SURROGATE_LEAD(high) || !U16_IS_SURROGATE_TRAIL(low)) {
225         if (!out.append('?')) {
226           return nullptr;
227         }
228         continue;
229       }
230       // Now we know we have a *valid* surrogate pair, we can consume the low surrogate.
231       ++i;
232       ch = U16_GET_SUPPLEMENTARY(high, low);
233       // Four bytes.
234       jbyte b1 = (ch >> 18) | 0xf0;
235       jbyte b2 = ((ch >> 12) & 0x3f) | 0x80;
236       jbyte b3 = ((ch >> 6) & 0x3f) | 0x80;
237       jbyte b4 = (ch & 0x3f) | 0x80;
238       if (!out.append(b1) || !out.append(b2) || !out.append(b3) || !out.append(b4)) {
239         return nullptr;
240       }
241     } else {
242       // Three bytes.
243       jbyte b1 = (ch >> 12) | 0xe0;
244       jbyte b2 = ((ch >> 6) & 0x3f) | 0x80;
245       jbyte b3 = (ch & 0x3f) | 0x80;
246       if (!out.append(b1) || !out.append(b2) || !out.append(b3)) {
247         return nullptr;
248       }
249     }
250   }
251   return out.toByteArray();
252 }
253 
254 static JNINativeMethod gMethods[] = {
255   FAST_NATIVE_METHOD(CharsetUtils, asciiBytesToChars, "([BII[C)V"),
256   FAST_NATIVE_METHOD(CharsetUtils, isoLatin1BytesToChars, "([BII[C)V"),
257   FAST_NATIVE_METHOD(CharsetUtils, toAsciiBytes, "(Ljava/lang/String;II)[B"),
258   FAST_NATIVE_METHOD(CharsetUtils, toIsoLatin1Bytes, "(Ljava/lang/String;II)[B"),
259   FAST_NATIVE_METHOD(CharsetUtils, toUtf8Bytes, "(Ljava/lang/String;II)[B"),
260 };
261 
register_libcore_util_CharsetUtils(JNIEnv * env)262 void register_libcore_util_CharsetUtils(JNIEnv* env) {
263   REGISTER_NATIVE_METHODS("libcore/util/CharsetUtils");
264 }
265 
266 }  // namespace art
267