1 /*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "libcore_util_CharsetUtils.h"
18
19 #include <string.h>
20
21 #include "jni_internal.h"
22 #include "mirror/string-inl.h"
23 #include "mirror/string.h"
24 #include "native_util.h"
25 #include "nativehelper/scoped_primitive_array.h"
26 #include "nativehelper/jni_macros.h"
27 #include "scoped_fast_native_object_access-inl.h"
28 #include "unicode/utf16.h"
29
30 namespace art {
31
32 /**
33 * Approximates java.lang.UnsafeByteSequence so we don't have to pay the cost of calling back into
34 * Java when converting a char[] to a UTF-8 byte[]. This lets us have UTF-8 conversions slightly
35 * faster than ICU for large char[]s without paying for the NIO overhead with small char[]s.
36 *
37 * We could avoid this by keeping the UTF-8 bytes on the native heap until we're done and only
38 * creating a byte[] on the Java heap when we know how big it needs to be, but one shouldn't lie
39 * to the garbage collector (nor hide potentially large allocations from it).
40 *
41 * Because a call to append might require an allocation, it might fail. Callers should always
42 * check the return value of append.
43 */
44 class NativeUnsafeByteSequence {
45 public:
NativeUnsafeByteSequence(JNIEnv * env)46 explicit NativeUnsafeByteSequence(JNIEnv* env)
47 : mEnv(env), mJavaArray(nullptr), mRawArray(nullptr), mSize(-1), mOffset(0) {
48 }
49
~NativeUnsafeByteSequence()50 ~NativeUnsafeByteSequence() {
51 // Release our pointer to the raw array, copying changes back to the Java heap.
52 if (mRawArray != nullptr) {
53 mEnv->ReleaseByteArrayElements(mJavaArray, mRawArray, 0);
54 }
55 }
56
append(jbyte b)57 bool append(jbyte b) {
58 if (mOffset == mSize && !resize(mSize * 2)) {
59 return false;
60 }
61 mRawArray[mOffset++] = b;
62 return true;
63 }
64
resize(int newSize)65 bool resize(int newSize) {
66 if (newSize == mSize) {
67 return true;
68 }
69
70 // Allocate a new array.
71 jbyteArray newJavaArray = mEnv->NewByteArray(newSize);
72 if (newJavaArray == nullptr) {
73 return false;
74 }
75 jbyte* newRawArray = mEnv->GetByteArrayElements(newJavaArray, nullptr);
76 if (newRawArray == nullptr) {
77 return false;
78 }
79
80 // Copy data out of the old array and then let go of it.
81 // Note that we may be trimming the array.
82 if (mRawArray != nullptr) {
83 memcpy(newRawArray, mRawArray, mOffset);
84 mEnv->ReleaseByteArrayElements(mJavaArray, mRawArray, JNI_ABORT);
85 mEnv->DeleteLocalRef(mJavaArray);
86 }
87
88 // Point ourselves at the new array.
89 mJavaArray = newJavaArray;
90 mRawArray = newRawArray;
91 mSize = newSize;
92 return true;
93 }
94
toByteArray()95 jbyteArray toByteArray() {
96 // Trim any unused space, if necessary.
97 bool okay = resize(mOffset);
98 return okay ? mJavaArray : nullptr;
99 }
100
101 private:
102 JNIEnv* mEnv;
103 jbyteArray mJavaArray;
104 jbyte* mRawArray;
105 jint mSize;
106 jint mOffset;
107
108 // Disallow copy and assignment.
109 NativeUnsafeByteSequence(const NativeUnsafeByteSequence&);
110 void operator=(const NativeUnsafeByteSequence&);
111 };
112
CharsetUtils_asciiBytesToChars(JNIEnv * env,jclass,jbyteArray javaBytes,jint offset,jint length,jcharArray javaChars)113 static void CharsetUtils_asciiBytesToChars(JNIEnv* env, jclass, jbyteArray javaBytes, jint offset,
114 jint length, jcharArray javaChars) {
115 ScopedByteArrayRO bytes(env, javaBytes);
116 if (bytes.get() == nullptr) {
117 return;
118 }
119 ScopedCharArrayRW chars(env, javaChars);
120 if (chars.get() == nullptr) {
121 return;
122 }
123
124 const jbyte* src = &bytes[offset];
125 jchar* dst = &chars[0];
126 static const jchar REPLACEMENT_CHAR = 0xfffd;
127 for (int i = length - 1; i >= 0; --i) {
128 jchar ch = static_cast<jchar>(*src++ & 0xff);
129 *dst++ = (ch <= 0x7f) ? ch : REPLACEMENT_CHAR;
130 }
131 }
132
CharsetUtils_isoLatin1BytesToChars(JNIEnv * env,jclass,jbyteArray javaBytes,jint offset,jint length,jcharArray javaChars)133 static void CharsetUtils_isoLatin1BytesToChars(JNIEnv* env, jclass, jbyteArray javaBytes,
134 jint offset, jint length, jcharArray javaChars) {
135 ScopedByteArrayRO bytes(env, javaBytes);
136 if (bytes.get() == nullptr) {
137 return;
138 }
139 ScopedCharArrayRW chars(env, javaChars);
140 if (chars.get() == nullptr) {
141 return;
142 }
143
144 const jbyte* src = &bytes[offset];
145 jchar* dst = &chars[0];
146 for (int i = length - 1; i >= 0; --i) {
147 *dst++ = static_cast<jchar>(*src++ & 0xff);
148 }
149 }
150
151 /**
152 * Translates the given characters to US-ASCII or ISO-8859-1 bytes, using the fact that
153 * Unicode code points between U+0000 and U+007f inclusive are identical to US-ASCII, while
154 * U+0000 to U+00ff inclusive are identical to ISO-8859-1.
155 */
charsToBytes(JNIEnv * env,jstring java_string,jint offset,jint length,jchar maxValidChar)156 static jbyteArray charsToBytes(JNIEnv* env, jstring java_string, jint offset, jint length,
157 jchar maxValidChar) {
158 ScopedObjectAccess soa(env);
159 StackHandleScope<1> hs(soa.Self());
160 Handle<mirror::String> string(hs.NewHandle(soa.Decode<mirror::String>(java_string)));
161 if (string == nullptr) {
162 return nullptr;
163 }
164
165 jbyteArray javaBytes = env->NewByteArray(length);
166 ScopedByteArrayRW bytes(env, javaBytes);
167 if (bytes.get() == nullptr) {
168 return nullptr;
169 }
170
171 jbyte* dst = &bytes[0];
172 for (int i = 0; i < length; ++i) {
173 jchar ch = string->CharAt(offset + i);
174 if (ch > maxValidChar) {
175 ch = '?';
176 }
177 *dst++ = static_cast<jbyte>(ch);
178 }
179
180 return javaBytes;
181 }
182
CharsetUtils_toAsciiBytes(JNIEnv * env,jclass,jstring java_string,jint offset,jint length)183 static jbyteArray CharsetUtils_toAsciiBytes(JNIEnv* env, jclass, jstring java_string, jint offset,
184 jint length) {
185 return charsToBytes(env, java_string, offset, length, 0x7f);
186 }
187
CharsetUtils_toIsoLatin1Bytes(JNIEnv * env,jclass,jstring java_string,jint offset,jint length)188 static jbyteArray CharsetUtils_toIsoLatin1Bytes(JNIEnv* env, jclass, jstring java_string,
189 jint offset, jint length) {
190 return charsToBytes(env, java_string, offset, length, 0xff);
191 }
192
CharsetUtils_toUtf8Bytes(JNIEnv * env,jclass,jstring java_string,jint offset,jint length)193 static jbyteArray CharsetUtils_toUtf8Bytes(JNIEnv* env, jclass, jstring java_string, jint offset,
194 jint length) {
195 ScopedObjectAccess soa(env);
196 StackHandleScope<1> hs(soa.Self());
197 Handle<mirror::String> string(hs.NewHandle(soa.Decode<mirror::String>(java_string)));
198 if (string == nullptr) {
199 return nullptr;
200 }
201
202 NativeUnsafeByteSequence out(env);
203 if (!out.resize(length)) {
204 return nullptr;
205 }
206
207 const int end = offset + length;
208 for (int i = offset; i < end; ++i) {
209 jint ch = string->CharAt(i);
210 if (ch < 0x80) {
211 // One byte.
212 if (!out.append(ch)) {
213 return nullptr;
214 }
215 } else if (ch < 0x800) {
216 // Two bytes.
217 if (!out.append((ch >> 6) | 0xc0) || !out.append((ch & 0x3f) | 0x80)) {
218 return nullptr;
219 }
220 } else if (U16_IS_SURROGATE(ch)) {
221 // A supplementary character.
222 jchar high = static_cast<jchar>(ch);
223 jchar low = (i + 1 != end) ? string->CharAt(i + 1) : 0;
224 if (!U16_IS_SURROGATE_LEAD(high) || !U16_IS_SURROGATE_TRAIL(low)) {
225 if (!out.append('?')) {
226 return nullptr;
227 }
228 continue;
229 }
230 // Now we know we have a *valid* surrogate pair, we can consume the low surrogate.
231 ++i;
232 ch = U16_GET_SUPPLEMENTARY(high, low);
233 // Four bytes.
234 jbyte b1 = (ch >> 18) | 0xf0;
235 jbyte b2 = ((ch >> 12) & 0x3f) | 0x80;
236 jbyte b3 = ((ch >> 6) & 0x3f) | 0x80;
237 jbyte b4 = (ch & 0x3f) | 0x80;
238 if (!out.append(b1) || !out.append(b2) || !out.append(b3) || !out.append(b4)) {
239 return nullptr;
240 }
241 } else {
242 // Three bytes.
243 jbyte b1 = (ch >> 12) | 0xe0;
244 jbyte b2 = ((ch >> 6) & 0x3f) | 0x80;
245 jbyte b3 = (ch & 0x3f) | 0x80;
246 if (!out.append(b1) || !out.append(b2) || !out.append(b3)) {
247 return nullptr;
248 }
249 }
250 }
251 return out.toByteArray();
252 }
253
254 static JNINativeMethod gMethods[] = {
255 FAST_NATIVE_METHOD(CharsetUtils, asciiBytesToChars, "([BII[C)V"),
256 FAST_NATIVE_METHOD(CharsetUtils, isoLatin1BytesToChars, "([BII[C)V"),
257 FAST_NATIVE_METHOD(CharsetUtils, toAsciiBytes, "(Ljava/lang/String;II)[B"),
258 FAST_NATIVE_METHOD(CharsetUtils, toIsoLatin1Bytes, "(Ljava/lang/String;II)[B"),
259 FAST_NATIVE_METHOD(CharsetUtils, toUtf8Bytes, "(Ljava/lang/String;II)[B"),
260 };
261
register_libcore_util_CharsetUtils(JNIEnv * env)262 void register_libcore_util_CharsetUtils(JNIEnv* env) {
263 REGISTER_NATIVE_METHODS("libcore/util/CharsetUtils");
264 }
265
266 } // namespace art
267