1 /*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "Matcher"
18
19 #include <memory>
20 #include <stdlib.h>
21
22 #include <android-base/logging.h>
23 #include <nativehelper/JNIHelp.h>
24 #include <nativehelper/ScopedPrimitiveArray.h>
25 #include <nativehelper/ScopedStringChars.h>
26 #include <nativehelper/jni_macros.h>
27
28 #include "IcuUtilities.h"
29 #include "JniException.h"
30 #include "ScopedJavaUnicodeString.h"
31 #include "unicode/parseerr.h"
32 #include "unicode/regex.h"
33
34 // ICU documentation: http://icu-project.org/apiref/icu4c/classRegexMatcher.html
35
36 /**
37 * Encapsulates an instance of ICU4C's RegexMatcher class along with a copy of
38 * the input it's currently operating on in the native heap.
39 *
40 * Rationale: We choose to make a copy here because it turns out to be a lot
41 * cheaper when a moving GC and/or string compression is enabled. This is
42 * because env->GetStringChars() always copies in this scenario. This becomes
43 * especially bad when the String in question is long and/or contains a large
44 * number of matches.
45 *
46 * Drawbacks: The native allocation associated with this class is no longer
47 * fixed size, so we're effectively lying to the NativeAllocationRegistry about
48 * the size of the object(s) we're allocating on the native heap. The peak
49 * memory usage doesn't change though, given that GetStringChars would have
50 * made an allocation of precisely the same size.
51 */
52 class MatcherState {
53 public:
MatcherState(icu::RegexMatcher * matcher)54 MatcherState(icu::RegexMatcher* matcher) :
55 mMatcher(matcher),
56 mUChars(nullptr),
57 mUText(nullptr),
58 mStatus(U_ZERO_ERROR) {
59 }
60
updateInput(JNIEnv * env,jstring input)61 bool updateInput(JNIEnv* env, jstring input) {
62 // First, close the UText struct, since we're about to allocate a new one.
63 if (mUText != nullptr) {
64 utext_close(mUText);
65 mUText = nullptr;
66 }
67
68 // Then delete the UChar* associated with the UText struct..
69 mUChars.reset(nullptr);
70
71 // TODO: We should investigate whether we can avoid an additional copy
72 // in the native heap when is_copy == JNI_TRUE. The problem with doing
73 // that is that we might call ReleaseStringChars with a different
74 // JNIEnv* on a different downcall. This is currently safe as
75 // implemented in ART, but is unlikely to be portable and the spec stays
76 // silent on the matter.
77 ScopedStringChars inputChars(env, input);
78 if (inputChars.get() == nullptr) {
79 // There will be an exception pending if we get here.
80 return false;
81 }
82
83 // Make a copy of |input| on the native heap. This copy will be live
84 // until the next call to updateInput or close.
85 mUChars.reset(new (std::nothrow) UChar[inputChars.size()]);
86 if (mUChars.get() == nullptr) {
87 env->ThrowNew(env->FindClass("Ljava/lang/OutOfMemoryError;"), "Out of memory");
88 return false;
89 }
90
91 static_assert(sizeof(UChar) == sizeof(jchar), "sizeof(Uchar) != sizeof(jchar)");
92 memcpy(mUChars.get(), inputChars.get(), inputChars.size() * sizeof(jchar));
93
94 // Reset any errors that might have occurred on previous patches.
95 mStatus = U_ZERO_ERROR;
96 mUText = utext_openUChars(nullptr, mUChars.get(), inputChars.size(), &mStatus);
97 if (mUText == nullptr) {
98 CHECK(maybeThrowIcuException(env, "utext_openUChars", mStatus));
99 return false;
100 }
101
102 // It is an error for ICU to have returned a non-null mUText but to
103 // still have indicated an error.
104 CHECK(U_SUCCESS(mStatus));
105
106 mMatcher->reset(mUText);
107 return true;
108 }
109
~MatcherState()110 ~MatcherState() {
111 if (mUText != nullptr) {
112 utext_close(mUText);
113 }
114 }
115
matcher()116 icu::RegexMatcher* matcher() {
117 return mMatcher.get();
118 }
119
status()120 UErrorCode& status() {
121 return mStatus;
122 }
123
updateOffsets(JNIEnv * env,jintArray javaOffsets)124 void updateOffsets(JNIEnv* env, jintArray javaOffsets) {
125 ScopedIntArrayRW offsets(env, javaOffsets);
126 if (offsets.get() == NULL) {
127 return;
128 }
129
130 for (size_t i = 0, groupCount = mMatcher->groupCount(); i <= groupCount; ++i) {
131 offsets[2*i + 0] = mMatcher->start(i, mStatus);
132 offsets[2*i + 1] = mMatcher->end(i, mStatus);
133 }
134 }
135
136 private:
137 std::unique_ptr<icu::RegexMatcher> mMatcher;
138 std::unique_ptr<UChar[]> mUChars;
139 UText* mUText;
140 UErrorCode mStatus;
141
142 // Disallow copy and assignment.
143 MatcherState(const MatcherState&);
144 void operator=(const MatcherState&);
145 };
146
toMatcherState(jlong address)147 static inline MatcherState* toMatcherState(jlong address) {
148 return reinterpret_cast<MatcherState*>(static_cast<uintptr_t>(address));
149 }
150
Matcher_free(void * address)151 static void Matcher_free(void* address) {
152 MatcherState* state = reinterpret_cast<MatcherState*>(address);
153 delete state;
154 }
155
Matcher_getNativeFinalizer(JNIEnv *,jclass)156 static jlong Matcher_getNativeFinalizer(JNIEnv*, jclass) {
157 return reinterpret_cast<jlong>(&Matcher_free);
158 }
159
Matcher_findImpl(JNIEnv * env,jclass,jlong addr,jint startIndex,jintArray offsets)160 static jboolean Matcher_findImpl(JNIEnv* env, jclass, jlong addr, jint startIndex, jintArray offsets) {
161 MatcherState* state = toMatcherState(addr);
162 UBool result = state->matcher()->find(startIndex, state->status());
163 if (result) {
164 state->updateOffsets(env, offsets);
165 return JNI_TRUE;
166 } else {
167 return JNI_FALSE;
168 }
169 }
170
Matcher_findNextImpl(JNIEnv * env,jclass,jlong addr,jintArray offsets)171 static jboolean Matcher_findNextImpl(JNIEnv* env, jclass, jlong addr, jintArray offsets) {
172 MatcherState* state = toMatcherState(addr);
173 UBool result = state->matcher()->find();
174 if (result) {
175 state->updateOffsets(env, offsets);
176 return JNI_TRUE;
177 } else {
178 return JNI_FALSE;
179 }
180 }
181
Matcher_groupCountImpl(JNIEnv *,jclass,jlong addr)182 static jint Matcher_groupCountImpl(JNIEnv*, jclass, jlong addr) {
183 MatcherState* state = toMatcherState(addr);
184 return state->matcher()->groupCount();
185 }
186
Matcher_hitEndImpl(JNIEnv *,jclass,jlong addr)187 static jboolean Matcher_hitEndImpl(JNIEnv*, jclass, jlong addr) {
188 MatcherState* state = toMatcherState(addr);
189 if (state->matcher()->hitEnd() != 0) {
190 return JNI_TRUE;
191 } else {
192 return JNI_FALSE;
193 }
194 }
195
Matcher_lookingAtImpl(JNIEnv * env,jclass,jlong addr,jintArray offsets)196 static jboolean Matcher_lookingAtImpl(JNIEnv* env, jclass, jlong addr, jintArray offsets) {
197 MatcherState* state = toMatcherState(addr);
198 UBool result = state->matcher()->lookingAt(state->status());
199 if (result) {
200 state->updateOffsets(env, offsets);
201 return JNI_TRUE;
202 } else {
203 return JNI_FALSE;
204 }
205 }
206
Matcher_matchesImpl(JNIEnv * env,jclass,jlong addr,jintArray offsets)207 static jboolean Matcher_matchesImpl(JNIEnv* env, jclass, jlong addr, jintArray offsets) {
208 MatcherState* state = toMatcherState(addr);
209 UBool result = state->matcher()->matches(state->status());
210 if (result) {
211 state->updateOffsets(env, offsets);
212 return JNI_TRUE;
213 } else {
214 return JNI_FALSE;
215 }
216 }
217
Matcher_openImpl(JNIEnv * env,jclass,jlong patternAddr)218 static jlong Matcher_openImpl(JNIEnv* env, jclass, jlong patternAddr) {
219 icu::RegexPattern* pattern = reinterpret_cast<icu::RegexPattern*>(static_cast<uintptr_t>(patternAddr));
220 UErrorCode status = U_ZERO_ERROR;
221 icu::RegexMatcher* result = pattern->matcher(status);
222 if (maybeThrowIcuException(env, "RegexPattern::matcher", status)) {
223 return 0;
224 }
225
226 return reinterpret_cast<uintptr_t>(new MatcherState(result));
227 }
228
Matcher_requireEndImpl(JNIEnv *,jclass,jlong addr)229 static jboolean Matcher_requireEndImpl(JNIEnv*, jclass, jlong addr) {
230 MatcherState* state = toMatcherState(addr);
231 if (state->matcher()->requireEnd() != 0) {
232 return JNI_TRUE;
233 } else {
234 return JNI_FALSE;
235 }
236 }
237
Matcher_setInputImpl(JNIEnv * env,jclass,jlong addr,jstring javaText,jint start,jint end)238 static void Matcher_setInputImpl(JNIEnv* env, jclass, jlong addr, jstring javaText, jint start, jint end) {
239 MatcherState* state = toMatcherState(addr);
240 if (state->updateInput(env, javaText)) {
241 state->matcher()->region(start, end, state->status());
242 }
243 }
244
Matcher_useAnchoringBoundsImpl(JNIEnv *,jclass,jlong addr,jboolean value)245 static void Matcher_useAnchoringBoundsImpl(JNIEnv*, jclass, jlong addr, jboolean value) {
246 MatcherState* state = toMatcherState(addr);
247 state->matcher()->useAnchoringBounds(value);
248 }
249
Matcher_useTransparentBoundsImpl(JNIEnv *,jclass,jlong addr,jboolean value)250 static void Matcher_useTransparentBoundsImpl(JNIEnv*, jclass, jlong addr, jboolean value) {
251 MatcherState* state = toMatcherState(addr);
252 state->matcher()->useTransparentBounds(value);
253 }
254
Matcher_getMatchedGroupIndex0(JNIEnv * env,jclass,jlong patternAddr,jstring javaGroupName)255 static jint Matcher_getMatchedGroupIndex0(JNIEnv* env, jclass, jlong patternAddr, jstring javaGroupName) {
256 icu::RegexPattern* pattern = reinterpret_cast<icu::RegexPattern*>(static_cast<uintptr_t>(patternAddr));
257 ScopedJavaUnicodeString groupName(env, javaGroupName);
258 UErrorCode status = U_ZERO_ERROR;
259
260 jint result = pattern->groupNumberFromName(groupName.unicodeString(), status);
261 if (U_SUCCESS(status)) {
262 return result;
263 }
264 if (status == U_REGEX_INVALID_CAPTURE_GROUP_NAME) {
265 return -1;
266 }
267 maybeThrowIcuException(env, "RegexPattern::groupNumberFromName", status);
268 return -1;
269 }
270
271
272 static JNINativeMethod gMethods[] = {
273 NATIVE_METHOD(Matcher, getMatchedGroupIndex0, "(JLjava/lang/String;)I"),
274 NATIVE_METHOD(Matcher, findImpl, "(JI[I)Z"),
275 NATIVE_METHOD(Matcher, findNextImpl, "(J[I)Z"),
276 NATIVE_METHOD(Matcher, getNativeFinalizer, "()J"),
277 NATIVE_METHOD(Matcher, groupCountImpl, "(J)I"),
278 NATIVE_METHOD(Matcher, hitEndImpl, "(J)Z"),
279 NATIVE_METHOD(Matcher, lookingAtImpl, "(J[I)Z"),
280 NATIVE_METHOD(Matcher, matchesImpl, "(J[I)Z"),
281 NATIVE_METHOD(Matcher, openImpl, "(J)J"),
282 NATIVE_METHOD(Matcher, requireEndImpl, "(J)Z"),
283 NATIVE_METHOD(Matcher, setInputImpl, "(JLjava/lang/String;II)V"),
284 NATIVE_METHOD(Matcher, useAnchoringBoundsImpl, "(JZ)V"),
285 NATIVE_METHOD(Matcher, useTransparentBoundsImpl, "(JZ)V"),
286 };
register_java_util_regex_Matcher(JNIEnv * env)287 void register_java_util_regex_Matcher(JNIEnv* env) {
288 jniRegisterNativeMethods(env, "java/util/regex/Matcher", gMethods, NELEM(gMethods));
289 }
290