1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <cstdlib>
18 #include <string>
19 #include <vector>
20
21 #include <cutils/log.h>
22 #include <unicode/utf.h>
23 #include <unicode/utf8.h>
24
25 #include "minikin/U16StringPiece.h"
26
27 namespace minikin {
28
29 // src is of the form "U+1F431 | 'h' 'i'". Position of "|" gets saved to offset if non-null.
30 // Size is returned in an out parameter because gtest needs a void return for ASSERT to work.
ParseUnicode(uint16_t * buf,size_t buf_size,const char * src,size_t * result_size,size_t * offset)31 void ParseUnicode(uint16_t* buf, size_t buf_size, const char* src, size_t* result_size,
32 size_t* offset) {
33 size_t input_ix = 0;
34 size_t output_ix = 0;
35 bool seen_offset = false;
36
37 while (src[input_ix] != 0) {
38 switch (src[input_ix]) {
39 case '\'':
40 // single ASCII char
41 LOG_ALWAYS_FATAL_IF(static_cast<uint8_t>(src[input_ix]) >= 0x80);
42 input_ix++;
43 LOG_ALWAYS_FATAL_IF(src[input_ix] == 0);
44 LOG_ALWAYS_FATAL_IF(output_ix >= buf_size);
45 buf[output_ix++] = (uint16_t)src[input_ix++];
46 LOG_ALWAYS_FATAL_IF(src[input_ix] != '\'');
47 input_ix++;
48 break;
49 case 'u':
50 case 'U': {
51 // Unicode codepoint in hex syntax
52 input_ix++;
53 LOG_ALWAYS_FATAL_IF(src[input_ix] != '+');
54 input_ix++;
55 char* endptr = (char*)src + input_ix;
56 unsigned long int codepoint = strtoul(src + input_ix, &endptr, 16);
57 size_t num_hex_digits = endptr - (src + input_ix);
58
59 // also triggers on invalid number syntax, digits = 0
60 LOG_ALWAYS_FATAL_IF(num_hex_digits < 4u);
61 LOG_ALWAYS_FATAL_IF(num_hex_digits > 6u);
62 LOG_ALWAYS_FATAL_IF(codepoint > 0x10FFFFu);
63 input_ix += num_hex_digits;
64 if (U16_LENGTH(codepoint) == 1) {
65 LOG_ALWAYS_FATAL_IF(output_ix + 1 > buf_size);
66 buf[output_ix++] = codepoint;
67 } else {
68 // UTF-16 encoding
69 LOG_ALWAYS_FATAL_IF(output_ix + 2 > buf_size);
70 buf[output_ix++] = U16_LEAD(codepoint);
71 buf[output_ix++] = U16_TRAIL(codepoint);
72 }
73 break;
74 }
75 case ' ':
76 input_ix++;
77 break;
78 case '|':
79 LOG_ALWAYS_FATAL_IF(seen_offset);
80 LOG_ALWAYS_FATAL_IF(offset == nullptr);
81 *offset = output_ix;
82 seen_offset = true;
83 input_ix++;
84 break;
85 default:
86 LOG_ALWAYS_FATAL("Unexpected Character");
87 }
88 }
89 LOG_ALWAYS_FATAL_IF(result_size == nullptr);
90 *result_size = output_ix;
91 LOG_ALWAYS_FATAL_IF(!seen_offset && offset != nullptr);
92 }
93
parseUnicodeStringWithOffset(const std::string & in,size_t * offset)94 std::vector<uint16_t> parseUnicodeStringWithOffset(const std::string& in, size_t* offset) {
95 std::unique_ptr<uint16_t[]> buffer(new uint16_t[in.size()]);
96 size_t result_size = 0;
97 ParseUnicode(buffer.get(), in.size(), in.c_str(), &result_size, offset);
98 return std::vector<uint16_t>(buffer.get(), buffer.get() + result_size);
99 }
100
parseUnicodeString(const std::string & in)101 std::vector<uint16_t> parseUnicodeString(const std::string& in) {
102 return parseUnicodeStringWithOffset(in, nullptr);
103 }
104
utf8ToUtf16(const std::string & text)105 std::vector<uint16_t> utf8ToUtf16(const std::string& text) {
106 std::vector<uint16_t> result;
107 int32_t i = 0;
108 const int32_t textLength = static_cast<int32_t>(text.size());
109 uint32_t c = 0;
110 while (i < textLength) {
111 U8_NEXT(text.c_str(), i, textLength, c);
112 if (U16_LENGTH(c) == 1) {
113 result.push_back(c);
114 } else {
115 result.push_back(U16_LEAD(c));
116 result.push_back(U16_TRAIL(c));
117 }
118 }
119 return result;
120 }
121
utf16ToUtf8(const U16StringPiece & u16String)122 std::string utf16ToUtf8(const U16StringPiece& u16String) {
123 const uint32_t textLength = u16String.size();
124 uint32_t i = 0;
125 uint32_t c = 0;
126
127 std::string out;
128 out.reserve(textLength * 4);
129
130 while (i < textLength) {
131 U16_NEXT(u16String.data(), i, textLength, c);
132
133 char buf[U8_MAX_LENGTH] = {};
134 uint32_t outIndex = 0;
135 U8_APPEND_UNSAFE(buf, outIndex, c);
136 out.append(buf, outIndex);
137 }
138 return out;
139 }
140
141 } // namespace minikin
142