1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_RUNTIME_UTF_INL_H_
18 #define ART_RUNTIME_UTF_INL_H_
19 
20 #include "utf.h"
21 
22 namespace art {
23 
GetUtf16FromUtf8(const char ** utf8_data_in)24 inline uint16_t GetUtf16FromUtf8(const char** utf8_data_in) {
25   uint8_t one = *(*utf8_data_in)++;
26   if ((one & 0x80) == 0) {
27     // one-byte encoding
28     return one;
29   }
30   // two- or three-byte encoding
31   uint8_t two = *(*utf8_data_in)++;
32   if ((one & 0x20) == 0) {
33     // two-byte encoding
34     return ((one & 0x1f) << 6) | (two & 0x3f);
35   }
36   // three-byte encoding
37   uint8_t three = *(*utf8_data_in)++;
38   return ((one & 0x0f) << 12) | ((two & 0x3f) << 6) | (three & 0x3f);
39 }
40 
CompareModifiedUtf8ToModifiedUtf8AsUtf16CodePointValues(const char * utf8_1,const char * utf8_2)41 inline int CompareModifiedUtf8ToModifiedUtf8AsUtf16CodePointValues(const char* utf8_1,
42                                                                    const char* utf8_2) {
43   uint16_t c1, c2;
44   do {
45     c1 = *utf8_1;
46     c2 = *utf8_2;
47     // Did we reach a terminating character?
48     if (c1 == 0) {
49       return (c2 == 0) ? 0 : -1;
50     } else if (c2 == 0) {
51       return 1;
52     }
53     // Assume 1-byte value and handle all cases first.
54     utf8_1++;
55     utf8_2++;
56     if ((c1 & 0x80) == 0) {
57       if (c1 == c2) {
58         // Matching 1-byte values.
59         continue;
60       } else {
61         // Non-matching values.
62         if ((c2 & 0x80) == 0) {
63           // 1-byte value, do nothing.
64         } else if ((c2 & 0x20) == 0) {
65           // 2-byte value.
66           c2 = ((c2 & 0x1f) << 6) | (*utf8_2 & 0x3f);
67         } else {
68           // 3-byte value.
69           c2 = ((c2 & 0x0f) << 12) | ((utf8_2[0] & 0x3f) << 6) | (utf8_2[1] & 0x3f);
70         }
71         return static_cast<int>(c1) - static_cast<int>(c2);
72       }
73     }
74     // Non-matching or multi-byte values.
75     if ((c1 & 0x20) == 0) {
76       // 2-byte value.
77       c1 = ((c1 & 0x1f) << 6) | (*utf8_1 & 0x3f);
78       utf8_1++;
79     } else {
80       // 3-byte value.
81       c1 = ((c1 & 0x0f) << 12) | ((utf8_1[0] & 0x3f) << 6) | (utf8_1[1] & 0x3f);
82       utf8_1 += 2;
83     }
84     if ((c2 & 0x80) == 0) {
85       // 1-byte value, do nothing.
86     } else if ((c2 & 0x20) == 0) {
87       // 2-byte value.
88       c2 = ((c2 & 0x1f) << 6) | (*utf8_2 & 0x3f);
89       utf8_2++;
90     } else {
91       // 3-byte value.
92       c2 = ((c2 & 0x0f) << 12) | ((utf8_2[0] & 0x3f) << 6) | (utf8_2[1] & 0x3f);
93       utf8_2 += 2;
94     }
95   } while (c1 == c2);
96   return static_cast<int>(c1) - static_cast<int>(c2);
97 }
98 
99 }  // namespace art
100 
101 #endif  // ART_RUNTIME_UTF_INL_H_
102