1 /*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "java_lang_StringFactory.h"
18
19 #include "common_throws.h"
20 #include "handle_scope-inl.h"
21 #include "jni/jni_internal.h"
22 #include "mirror/object-inl.h"
23 #include "mirror/string-alloc-inl.h"
24 #include "native_util.h"
25 #include "nativehelper/jni_macros.h"
26 #include "nativehelper/scoped_local_ref.h"
27 #include "nativehelper/scoped_primitive_array.h"
28 #include "scoped_fast_native_object_access-inl.h"
29 #include "scoped_thread_state_change-inl.h"
30
31 namespace art {
32
StringFactory_newStringFromBytes(JNIEnv * env,jclass,jbyteArray java_data,jint high,jint offset,jint byte_count)33 static jstring StringFactory_newStringFromBytes(JNIEnv* env, jclass, jbyteArray java_data,
34 jint high, jint offset, jint byte_count) {
35 ScopedFastNativeObjectAccess soa(env);
36 if (UNLIKELY(java_data == nullptr)) {
37 ThrowNullPointerException("data == null");
38 return nullptr;
39 }
40 StackHandleScope<1> hs(soa.Self());
41 Handle<mirror::ByteArray> byte_array(hs.NewHandle(soa.Decode<mirror::ByteArray>(java_data)));
42 int32_t data_size = byte_array->GetLength();
43 if ((offset | byte_count) < 0 || byte_count > data_size - offset) {
44 soa.Self()->ThrowNewExceptionF("Ljava/lang/StringIndexOutOfBoundsException;",
45 "length=%d; regionStart=%d; regionLength=%d", data_size,
46 offset, byte_count);
47 return nullptr;
48 }
49 gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
50 ObjPtr<mirror::String> result = mirror::String::AllocFromByteArray(soa.Self(),
51 byte_count,
52 byte_array,
53 offset,
54 high,
55 allocator_type);
56 return soa.AddLocalReference<jstring>(result);
57 }
58
59 // The char array passed as `java_data` must not be a null reference.
StringFactory_newStringFromChars(JNIEnv * env,jclass,jint offset,jint char_count,jcharArray java_data)60 static jstring StringFactory_newStringFromChars(JNIEnv* env, jclass, jint offset,
61 jint char_count, jcharArray java_data) {
62 DCHECK(java_data != nullptr);
63 ScopedFastNativeObjectAccess soa(env);
64 StackHandleScope<1> hs(soa.Self());
65 Handle<mirror::CharArray> char_array(hs.NewHandle(soa.Decode<mirror::CharArray>(java_data)));
66 gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
67 ObjPtr<mirror::String> result = mirror::String::AllocFromCharArray(soa.Self(),
68 char_count,
69 char_array,
70 offset,
71 allocator_type);
72 return soa.AddLocalReference<jstring>(result);
73 }
74
StringFactory_newStringFromString(JNIEnv * env,jclass,jstring to_copy)75 static jstring StringFactory_newStringFromString(JNIEnv* env, jclass, jstring to_copy) {
76 ScopedFastNativeObjectAccess soa(env);
77 if (UNLIKELY(to_copy == nullptr)) {
78 ThrowNullPointerException("toCopy == null");
79 return nullptr;
80 }
81 StackHandleScope<1> hs(soa.Self());
82 Handle<mirror::String> string(hs.NewHandle(soa.Decode<mirror::String>(to_copy)));
83 gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
84 ObjPtr<mirror::String> result = mirror::String::AllocFromString(soa.Self(),
85 string->GetLength(),
86 string,
87 /*offset=*/ 0,
88 allocator_type);
89 return soa.AddLocalReference<jstring>(result);
90 }
91
StringFactory_newStringFromUtf8Bytes(JNIEnv * env,jclass,jbyteArray java_data,jint offset,jint byte_count)92 static jstring StringFactory_newStringFromUtf8Bytes(JNIEnv* env, jclass, jbyteArray java_data,
93 jint offset, jint byte_count) {
94 // Local Define in here
95 static const jchar kReplacementChar = 0xfffd;
96 static const int kDefaultBufferSize = 256;
97 static const int kTableUtf8Needed[] = {
98 // 0 1 2 3 4 5 6 7 8 9 a b c d e f
99 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xc0 - 0xcf
100 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xd0 - 0xdf
101 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xe0 - 0xef
102 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xf0 - 0xff
103 };
104
105 ScopedFastNativeObjectAccess soa(env);
106 if (UNLIKELY(java_data == nullptr)) {
107 ThrowNullPointerException("data == null");
108 return nullptr;
109 }
110
111 StackHandleScope<1> hs(soa.Self());
112 Handle<mirror::ByteArray> byte_array(hs.NewHandle(soa.Decode<mirror::ByteArray>(java_data)));
113 int32_t data_size = byte_array->GetLength();
114 if ((offset | byte_count) < 0 || byte_count > data_size - offset) {
115 soa.Self()->ThrowNewExceptionF("Ljava/lang/StringIndexOutOfBoundsException;",
116 "length=%d; regionStart=%d; regionLength=%d", data_size,
117 offset, byte_count);
118 return nullptr;
119 }
120
121 /*
122 * This code converts a UTF-8 byte sequence to a Java String (UTF-16).
123 * It implements the W3C recommended UTF-8 decoder.
124 * https://www.w3.org/TR/encoding/#utf-8-decoder
125 *
126 * Unicode 3.2 Well-Formed UTF-8 Byte Sequences
127 * Code Points First Second Third Fourth
128 * U+0000..U+007F 00..7F
129 * U+0080..U+07FF C2..DF 80..BF
130 * U+0800..U+0FFF E0 A0..BF 80..BF
131 * U+1000..U+CFFF E1..EC 80..BF 80..BF
132 * U+D000..U+D7FF ED 80..9F 80..BF
133 * U+E000..U+FFFF EE..EF 80..BF 80..BF
134 * U+10000..U+3FFFF F0 90..BF 80..BF 80..BF
135 * U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF
136 * U+100000..U+10FFFF F4 80..8F 80..BF 80..BF
137 *
138 * Please refer to Unicode as the authority.
139 * p.126 Table 3-7 in http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf
140 *
141 * Handling Malformed Input
142 * The maximal subpart should be replaced by a single U+FFFD. Maximal subpart is
143 * the longest code unit subsequence starting at an unconvertible offset that is either
144 * 1) the initial subsequence of a well-formed code unit sequence, or
145 * 2) a subsequence of length one:
146 * One U+FFFD should be emitted for every sequence of bytes that is an incomplete prefix
147 * of a valid sequence, and with the conversion to restart after the incomplete sequence.
148 *
149 * For example, in byte sequence "41 C0 AF 41 F4 80 80 41", the maximal subparts are
150 * "C0", "AF", and "F4 80 80". "F4 80 80" can be the initial subsequence of "F4 80 80 80",
151 * but "C0" can't be the initial subsequence of any well-formed code unit sequence.
152 * Thus, the output should be "A\ufffd\ufffdA\ufffdA".
153 *
154 * Please refer to section "Best Practices for Using U+FFFD." in
155 * http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf
156 */
157
158 // Initial value
159 jchar stack_buffer[kDefaultBufferSize];
160 std::unique_ptr<jchar[]> allocated_buffer;
161 jchar* v;
162 if (byte_count <= kDefaultBufferSize) {
163 v = stack_buffer;
164 } else {
165 allocated_buffer.reset(new jchar[byte_count]);
166 v = allocated_buffer.get();
167 }
168
169 jbyte* d = byte_array->GetData();
170 DCHECK(d != nullptr);
171
172 int idx = offset;
173 int last = offset + byte_count;
174 int s = 0;
175
176 int code_point = 0;
177 int utf8_bytes_seen = 0;
178 int utf8_bytes_needed = 0;
179 int lower_bound = 0x80;
180 int upper_bound = 0xbf;
181 while (idx < last) {
182 int b = d[idx++] & 0xff;
183 if (utf8_bytes_needed == 0) {
184 if ((b & 0x80) == 0) { // ASCII char. 0xxxxxxx
185 v[s++] = (jchar) b;
186 continue;
187 }
188
189 if ((b & 0x40) == 0) { // 10xxxxxx is illegal as first byte
190 v[s++] = kReplacementChar;
191 continue;
192 }
193
194 // 11xxxxxx
195 int tableLookupIndex = b & 0x3f;
196 utf8_bytes_needed = kTableUtf8Needed[tableLookupIndex];
197 if (utf8_bytes_needed == 0) {
198 v[s++] = kReplacementChar;
199 continue;
200 }
201
202 // utf8_bytes_needed
203 // 1: b & 0x1f
204 // 2: b & 0x0f
205 // 3: b & 0x07
206 code_point = b & (0x3f >> utf8_bytes_needed);
207 if (b == 0xe0) {
208 lower_bound = 0xa0;
209 } else if (b == 0xed) {
210 upper_bound = 0x9f;
211 } else if (b == 0xf0) {
212 lower_bound = 0x90;
213 } else if (b == 0xf4) {
214 upper_bound = 0x8f;
215 }
216 } else {
217 if (b < lower_bound || b > upper_bound) {
218 // The bytes seen are ill-formed. Substitute them with U+FFFD
219 v[s++] = kReplacementChar;
220 code_point = 0;
221 utf8_bytes_needed = 0;
222 utf8_bytes_seen = 0;
223 lower_bound = 0x80;
224 upper_bound = 0xbf;
225 /*
226 * According to the Unicode Standard,
227 * "a UTF-8 conversion process is required to never consume well-formed
228 * subsequences as part of its error handling for ill-formed subsequences"
229 * The current byte could be part of well-formed subsequences. Reduce the
230 * index by 1 to parse it in next loop.
231 */
232 idx--;
233 continue;
234 }
235
236 lower_bound = 0x80;
237 upper_bound = 0xbf;
238 code_point = (code_point << 6) | (b & 0x3f);
239 utf8_bytes_seen++;
240 if (utf8_bytes_needed != utf8_bytes_seen) {
241 continue;
242 }
243
244 // Encode chars from U+10000 up as surrogate pairs
245 if (code_point < 0x10000) {
246 v[s++] = (jchar) code_point;
247 } else {
248 v[s++] = (jchar) ((code_point >> 10) + 0xd7c0);
249 v[s++] = (jchar) ((code_point & 0x3ff) + 0xdc00);
250 }
251
252 utf8_bytes_seen = 0;
253 utf8_bytes_needed = 0;
254 code_point = 0;
255 }
256 }
257
258 // The bytes seen are ill-formed. Substitute them by U+FFFD
259 if (utf8_bytes_needed != 0) {
260 v[s++] = kReplacementChar;
261 }
262
263 ObjPtr<mirror::String> result = mirror::String::AllocFromUtf16(soa.Self(), s, v);
264 return soa.AddLocalReference<jstring>(result);
265 }
266
267 static JNINativeMethod gMethods[] = {
268 FAST_NATIVE_METHOD(StringFactory, newStringFromBytes, "([BIII)Ljava/lang/String;"),
269 FAST_NATIVE_METHOD(StringFactory, newStringFromChars, "(II[C)Ljava/lang/String;"),
270 FAST_NATIVE_METHOD(StringFactory, newStringFromString, "(Ljava/lang/String;)Ljava/lang/String;"),
271 FAST_NATIVE_METHOD(StringFactory, newStringFromUtf8Bytes, "([BII)Ljava/lang/String;"),
272 };
273
register_java_lang_StringFactory(JNIEnv * env)274 void register_java_lang_StringFactory(JNIEnv* env) {
275 REGISTER_NATIVE_METHODS("java/lang/StringFactory");
276 }
277
278 } // namespace art
279