1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_RUNTIME_UTF_INL_H_ 18 #define ART_RUNTIME_UTF_INL_H_ 19 20 #include "utf.h" 21 22 namespace art { 23 GetUtf16FromUtf8(const char ** utf8_data_in)24inline uint16_t GetUtf16FromUtf8(const char** utf8_data_in) { 25 uint8_t one = *(*utf8_data_in)++; 26 if ((one & 0x80) == 0) { 27 // one-byte encoding 28 return one; 29 } 30 // two- or three-byte encoding 31 uint8_t two = *(*utf8_data_in)++; 32 if ((one & 0x20) == 0) { 33 // two-byte encoding 34 return ((one & 0x1f) << 6) | (two & 0x3f); 35 } 36 // three-byte encoding 37 uint8_t three = *(*utf8_data_in)++; 38 return ((one & 0x0f) << 12) | ((two & 0x3f) << 6) | (three & 0x3f); 39 } 40 CompareModifiedUtf8ToModifiedUtf8AsUtf16CodePointValues(const char * utf8_1,const char * utf8_2)41inline int CompareModifiedUtf8ToModifiedUtf8AsUtf16CodePointValues(const char* utf8_1, 42 const char* utf8_2) { 43 uint16_t c1, c2; 44 do { 45 c1 = *utf8_1; 46 c2 = *utf8_2; 47 // Did we reach a terminating character? 48 if (c1 == 0) { 49 return (c2 == 0) ? 0 : -1; 50 } else if (c2 == 0) { 51 return 1; 52 } 53 // Assume 1-byte value and handle all cases first. 54 utf8_1++; 55 utf8_2++; 56 if ((c1 & 0x80) == 0) { 57 if (c1 == c2) { 58 // Matching 1-byte values. 59 continue; 60 } else { 61 // Non-matching values. 62 if ((c2 & 0x80) == 0) { 63 // 1-byte value, do nothing. 64 } else if ((c2 & 0x20) == 0) { 65 // 2-byte value. 66 c2 = ((c2 & 0x1f) << 6) | (*utf8_2 & 0x3f); 67 } else { 68 // 3-byte value. 69 c2 = ((c2 & 0x0f) << 12) | ((utf8_2[0] & 0x3f) << 6) | (utf8_2[1] & 0x3f); 70 } 71 return static_cast<int>(c1) - static_cast<int>(c2); 72 } 73 } 74 // Non-matching or multi-byte values. 75 if ((c1 & 0x20) == 0) { 76 // 2-byte value. 77 c1 = ((c1 & 0x1f) << 6) | (*utf8_1 & 0x3f); 78 utf8_1++; 79 } else { 80 // 3-byte value. 81 c1 = ((c1 & 0x0f) << 12) | ((utf8_1[0] & 0x3f) << 6) | (utf8_1[1] & 0x3f); 82 utf8_1 += 2; 83 } 84 if ((c2 & 0x80) == 0) { 85 // 1-byte value, do nothing. 86 } else if ((c2 & 0x20) == 0) { 87 // 2-byte value. 88 c2 = ((c2 & 0x1f) << 6) | (*utf8_2 & 0x3f); 89 utf8_2++; 90 } else { 91 // 3-byte value. 92 c2 = ((c2 & 0x0f) << 12) | ((utf8_2[0] & 0x3f) << 6) | (utf8_2[1] & 0x3f); 93 utf8_2 += 2; 94 } 95 } while (c1 == c2); 96 return static_cast<int>(c1) - static_cast<int>(c2); 97 } 98 99 } // namespace art 100 101 #endif // ART_RUNTIME_UTF_INL_H_ 102