1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "java_lang_StringFactory.h"
18 
19 #include "common_throws.h"
20 #include "handle_scope-inl.h"
21 #include "jni/jni_internal.h"
22 #include "mirror/object-inl.h"
23 #include "mirror/string-alloc-inl.h"
24 #include "native_util.h"
25 #include "nativehelper/jni_macros.h"
26 #include "nativehelper/scoped_local_ref.h"
27 #include "nativehelper/scoped_primitive_array.h"
28 #include "scoped_fast_native_object_access-inl.h"
29 #include "scoped_thread_state_change-inl.h"
30 
31 namespace art {
32 
StringFactory_newStringFromBytes(JNIEnv * env,jclass,jbyteArray java_data,jint high,jint offset,jint byte_count)33 static jstring StringFactory_newStringFromBytes(JNIEnv* env, jclass, jbyteArray java_data,
34                                                 jint high, jint offset, jint byte_count) {
35   ScopedFastNativeObjectAccess soa(env);
36   if (UNLIKELY(java_data == nullptr)) {
37     ThrowNullPointerException("data == null");
38     return nullptr;
39   }
40   StackHandleScope<1> hs(soa.Self());
41   Handle<mirror::ByteArray> byte_array(hs.NewHandle(soa.Decode<mirror::ByteArray>(java_data)));
42   int32_t data_size = byte_array->GetLength();
43   if ((offset | byte_count) < 0 || byte_count > data_size - offset) {
44     soa.Self()->ThrowNewExceptionF("Ljava/lang/StringIndexOutOfBoundsException;",
45                                    "length=%d; regionStart=%d; regionLength=%d", data_size,
46                                    offset, byte_count);
47     return nullptr;
48   }
49   gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
50   ObjPtr<mirror::String> result = mirror::String::AllocFromByteArray(soa.Self(),
51                                                                      byte_count,
52                                                                      byte_array,
53                                                                      offset,
54                                                                      high,
55                                                                      allocator_type);
56   return soa.AddLocalReference<jstring>(result);
57 }
58 
59 // The char array passed as `java_data` must not be a null reference.
StringFactory_newStringFromChars(JNIEnv * env,jclass,jint offset,jint char_count,jcharArray java_data)60 static jstring StringFactory_newStringFromChars(JNIEnv* env, jclass, jint offset,
61                                                 jint char_count, jcharArray java_data) {
62   DCHECK(java_data != nullptr);
63   ScopedFastNativeObjectAccess soa(env);
64   StackHandleScope<1> hs(soa.Self());
65   Handle<mirror::CharArray> char_array(hs.NewHandle(soa.Decode<mirror::CharArray>(java_data)));
66   gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
67   ObjPtr<mirror::String> result = mirror::String::AllocFromCharArray(soa.Self(),
68                                                                      char_count,
69                                                                      char_array,
70                                                                      offset,
71                                                                      allocator_type);
72   return soa.AddLocalReference<jstring>(result);
73 }
74 
StringFactory_newStringFromString(JNIEnv * env,jclass,jstring to_copy)75 static jstring StringFactory_newStringFromString(JNIEnv* env, jclass, jstring to_copy) {
76   ScopedFastNativeObjectAccess soa(env);
77   if (UNLIKELY(to_copy == nullptr)) {
78     ThrowNullPointerException("toCopy == null");
79     return nullptr;
80   }
81   StackHandleScope<1> hs(soa.Self());
82   Handle<mirror::String> string(hs.NewHandle(soa.Decode<mirror::String>(to_copy)));
83   gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
84   ObjPtr<mirror::String> result = mirror::String::AllocFromString(soa.Self(),
85                                                                   string->GetLength(),
86                                                                   string,
87                                                                   /*offset=*/ 0,
88                                                                   allocator_type);
89   return soa.AddLocalReference<jstring>(result);
90 }
91 
StringFactory_newStringFromUtf8Bytes(JNIEnv * env,jclass,jbyteArray java_data,jint offset,jint byte_count)92 static jstring StringFactory_newStringFromUtf8Bytes(JNIEnv* env, jclass, jbyteArray java_data,
93                                                     jint offset, jint byte_count) {
94   // Local Define in here
95   static const jchar kReplacementChar = 0xfffd;
96   static const int kDefaultBufferSize = 256;
97   static const int kTableUtf8Needed[] = {
98     //      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f
99     0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 0xc0 - 0xcf
100     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 0xd0 - 0xdf
101     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  // 0xe0 - 0xef
102     3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xf0 - 0xff
103   };
104 
105   ScopedFastNativeObjectAccess soa(env);
106   if (UNLIKELY(java_data == nullptr)) {
107     ThrowNullPointerException("data == null");
108     return nullptr;
109   }
110 
111   StackHandleScope<1> hs(soa.Self());
112   Handle<mirror::ByteArray> byte_array(hs.NewHandle(soa.Decode<mirror::ByteArray>(java_data)));
113   int32_t data_size = byte_array->GetLength();
114   if ((offset | byte_count) < 0 || byte_count > data_size - offset) {
115     soa.Self()->ThrowNewExceptionF("Ljava/lang/StringIndexOutOfBoundsException;",
116         "length=%d; regionStart=%d; regionLength=%d", data_size,
117         offset, byte_count);
118     return nullptr;
119   }
120 
121   /*
122    * This code converts a UTF-8 byte sequence to a Java String (UTF-16).
123    * It implements the W3C recommended UTF-8 decoder.
124    * https://www.w3.org/TR/encoding/#utf-8-decoder
125    *
126    * Unicode 3.2 Well-Formed UTF-8 Byte Sequences
127    * Code Points        First  Second Third Fourth
128    * U+0000..U+007F     00..7F
129    * U+0080..U+07FF     C2..DF 80..BF
130    * U+0800..U+0FFF     E0     A0..BF 80..BF
131    * U+1000..U+CFFF     E1..EC 80..BF 80..BF
132    * U+D000..U+D7FF     ED     80..9F 80..BF
133    * U+E000..U+FFFF     EE..EF 80..BF 80..BF
134    * U+10000..U+3FFFF   F0     90..BF 80..BF 80..BF
135    * U+40000..U+FFFFF   F1..F3 80..BF 80..BF 80..BF
136    * U+100000..U+10FFFF F4     80..8F 80..BF 80..BF
137    *
138    * Please refer to Unicode as the authority.
139    * p.126 Table 3-7 in http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf
140    *
141    * Handling Malformed Input
142    * The maximal subpart should be replaced by a single U+FFFD. Maximal subpart is
143    * the longest code unit subsequence starting at an unconvertible offset that is either
144    * 1) the initial subsequence of a well-formed code unit sequence, or
145    * 2) a subsequence of length one:
146    * One U+FFFD should be emitted for every sequence of bytes that is an incomplete prefix
147    * of a valid sequence, and with the conversion to restart after the incomplete sequence.
148    *
149    * For example, in byte sequence "41 C0 AF 41 F4 80 80 41", the maximal subparts are
150    * "C0", "AF", and "F4 80 80". "F4 80 80" can be the initial subsequence of "F4 80 80 80",
151    * but "C0" can't be the initial subsequence of any well-formed code unit sequence.
152    * Thus, the output should be "A\ufffd\ufffdA\ufffdA".
153    *
154    * Please refer to section "Best Practices for Using U+FFFD." in
155    * http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf
156    */
157 
158   // Initial value
159   jchar stack_buffer[kDefaultBufferSize];
160   std::unique_ptr<jchar[]> allocated_buffer;
161   jchar* v;
162   if (byte_count <= kDefaultBufferSize) {
163     v = stack_buffer;
164   } else {
165     allocated_buffer.reset(new jchar[byte_count]);
166     v = allocated_buffer.get();
167   }
168 
169   jbyte* d = byte_array->GetData();
170   DCHECK(d != nullptr);
171 
172   int idx = offset;
173   int last = offset + byte_count;
174   int s = 0;
175 
176   int code_point = 0;
177   int utf8_bytes_seen = 0;
178   int utf8_bytes_needed = 0;
179   int lower_bound = 0x80;
180   int upper_bound = 0xbf;
181   while (idx < last) {
182     int b = d[idx++] & 0xff;
183     if (utf8_bytes_needed == 0) {
184       if ((b & 0x80) == 0) {  // ASCII char. 0xxxxxxx
185         v[s++] = (jchar) b;
186         continue;
187       }
188 
189       if ((b & 0x40) == 0) {  // 10xxxxxx is illegal as first byte
190         v[s++] = kReplacementChar;
191         continue;
192       }
193 
194       // 11xxxxxx
195       int tableLookupIndex = b & 0x3f;
196       utf8_bytes_needed = kTableUtf8Needed[tableLookupIndex];
197       if (utf8_bytes_needed == 0) {
198         v[s++] = kReplacementChar;
199         continue;
200       }
201 
202       // utf8_bytes_needed
203       // 1: b & 0x1f
204       // 2: b & 0x0f
205       // 3: b & 0x07
206       code_point = b & (0x3f >> utf8_bytes_needed);
207       if (b == 0xe0) {
208         lower_bound = 0xa0;
209       } else if (b == 0xed) {
210         upper_bound = 0x9f;
211       } else if (b == 0xf0) {
212         lower_bound = 0x90;
213       } else if (b == 0xf4) {
214         upper_bound = 0x8f;
215       }
216     } else {
217       if (b < lower_bound || b > upper_bound) {
218         // The bytes seen are ill-formed. Substitute them with U+FFFD
219         v[s++] = kReplacementChar;
220         code_point = 0;
221         utf8_bytes_needed = 0;
222         utf8_bytes_seen = 0;
223         lower_bound = 0x80;
224         upper_bound = 0xbf;
225         /*
226          * According to the Unicode Standard,
227          * "a UTF-8 conversion process is required to never consume well-formed
228          * subsequences as part of its error handling for ill-formed subsequences"
229          * The current byte could be part of well-formed subsequences. Reduce the
230          * index by 1 to parse it in next loop.
231          */
232         idx--;
233         continue;
234       }
235 
236       lower_bound = 0x80;
237       upper_bound = 0xbf;
238       code_point = (code_point << 6) | (b & 0x3f);
239       utf8_bytes_seen++;
240       if (utf8_bytes_needed != utf8_bytes_seen) {
241         continue;
242       }
243 
244       // Encode chars from U+10000 up as surrogate pairs
245       if (code_point < 0x10000) {
246         v[s++] = (jchar) code_point;
247       } else {
248         v[s++] = (jchar) ((code_point >> 10) + 0xd7c0);
249         v[s++] = (jchar) ((code_point & 0x3ff) + 0xdc00);
250       }
251 
252       utf8_bytes_seen = 0;
253       utf8_bytes_needed = 0;
254       code_point = 0;
255     }
256   }
257 
258   // The bytes seen are ill-formed. Substitute them by U+FFFD
259   if (utf8_bytes_needed != 0) {
260     v[s++] = kReplacementChar;
261   }
262 
263   ObjPtr<mirror::String> result = mirror::String::AllocFromUtf16(soa.Self(), s, v);
264   return soa.AddLocalReference<jstring>(result);
265 }
266 
267 static JNINativeMethod gMethods[] = {
268   FAST_NATIVE_METHOD(StringFactory, newStringFromBytes, "([BIII)Ljava/lang/String;"),
269   FAST_NATIVE_METHOD(StringFactory, newStringFromChars, "(II[C)Ljava/lang/String;"),
270   FAST_NATIVE_METHOD(StringFactory, newStringFromString, "(Ljava/lang/String;)Ljava/lang/String;"),
271   FAST_NATIVE_METHOD(StringFactory, newStringFromUtf8Bytes, "([BII)Ljava/lang/String;"),
272 };
273 
register_java_lang_StringFactory(JNIEnv * env)274 void register_java_lang_StringFactory(JNIEnv* env) {
275   REGISTER_NATIVE_METHODS("java/lang/StringFactory");
276 }
277 
278 }  // namespace art
279