1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "Minikin"
18
19 #include "FontLanguage.h"
20
21 #include <hb.h>
22 #include <unicode/uloc.h>
23
24 namespace android {
25
26 #define SCRIPT_TAG(c1, c2, c3, c4) \
27 (((uint32_t)(c1)) << 24 | ((uint32_t)(c2)) << 16 | ((uint32_t)(c3)) << 8 | \
28 ((uint32_t)(c4)))
29
30 // Parse BCP 47 language identifier into internal structure
FontLanguage(const char * buf,size_t length)31 FontLanguage::FontLanguage(const char* buf, size_t length) : FontLanguage() {
32 size_t i;
33 for (i = 0; i < length; i++) {
34 char c = buf[i];
35 if (c == '-' || c == '_') break;
36 }
37 if (i == 2 || i == 3) { // only accept two or three letter language code.
38 mLanguage = buf[0] | (buf[1] << 8) | ((i == 3) ? (buf[2] << 16) : 0);
39 } else {
40 // We don't understand anything other than two-letter or three-letter
41 // language codes, so we skip parsing the rest of the string.
42 mLanguage = 0ul;
43 return;
44 }
45
46 size_t next;
47 for (i++; i < length; i = next + 1) {
48 for (next = i; next < length; next++) {
49 char c = buf[next];
50 if (c == '-' || c == '_') break;
51 }
52 if (next - i == 4 && 'A' <= buf[i] && buf[i] <= 'Z') {
53 mScript = SCRIPT_TAG(buf[i], buf[i + 1], buf[i + 2], buf[i + 3]);
54 }
55 }
56
57 mSubScriptBits = scriptToSubScriptBits(mScript);
58 }
59
60 //static
scriptToSubScriptBits(uint32_t script)61 uint8_t FontLanguage::scriptToSubScriptBits(uint32_t script) {
62 uint8_t subScriptBits = 0u;
63 switch (script) {
64 case SCRIPT_TAG('B', 'o', 'p', 'o'):
65 subScriptBits = kBopomofoFlag;
66 break;
67 case SCRIPT_TAG('H', 'a', 'n', 'g'):
68 subScriptBits = kHangulFlag;
69 break;
70 case SCRIPT_TAG('H', 'a', 'n', 'b'):
71 // Bopomofo is almost exclusively used in Taiwan.
72 subScriptBits = kHanFlag | kBopomofoFlag;
73 break;
74 case SCRIPT_TAG('H', 'a', 'n', 'i'):
75 subScriptBits = kHanFlag;
76 break;
77 case SCRIPT_TAG('H', 'a', 'n', 's'):
78 subScriptBits = kHanFlag | kSimplifiedChineseFlag;
79 break;
80 case SCRIPT_TAG('H', 'a', 'n', 't'):
81 subScriptBits = kHanFlag | kTraditionalChineseFlag;
82 break;
83 case SCRIPT_TAG('H', 'i', 'r', 'a'):
84 subScriptBits = kHiraganaFlag;
85 break;
86 case SCRIPT_TAG('H', 'r', 'k', 't'):
87 subScriptBits = kKatakanaFlag | kHiraganaFlag;
88 break;
89 case SCRIPT_TAG('J', 'p', 'a', 'n'):
90 subScriptBits = kHanFlag | kKatakanaFlag | kHiraganaFlag;
91 break;
92 case SCRIPT_TAG('K', 'a', 'n', 'a'):
93 subScriptBits = kKatakanaFlag;
94 break;
95 case SCRIPT_TAG('K', 'o', 'r', 'e'):
96 subScriptBits = kHanFlag | kHangulFlag;
97 break;
98 case SCRIPT_TAG('Z', 's', 'y', 'e'):
99 subScriptBits = kEmojiFlag;
100 break;
101 }
102 return subScriptBits;
103 }
104
getString() const105 std::string FontLanguage::getString() const {
106 if (mLanguage == 0ul) {
107 return "und";
108 }
109 char buf[16];
110 size_t i = 0;
111 buf[i++] = mLanguage & 0xFF ;
112 buf[i++] = (mLanguage >> 8) & 0xFF;
113 char third_letter = (mLanguage >> 16) & 0xFF;
114 if (third_letter != 0) buf[i++] = third_letter;
115 if (mScript != 0) {
116 buf[i++] = '-';
117 buf[i++] = (mScript >> 24) & 0xFFu;
118 buf[i++] = (mScript >> 16) & 0xFFu;
119 buf[i++] = (mScript >> 8) & 0xFFu;
120 buf[i++] = mScript & 0xFFu;
121 }
122 return std::string(buf, i);
123 }
124
isEqualScript(const FontLanguage & other) const125 bool FontLanguage::isEqualScript(const FontLanguage& other) const {
126 return other.mScript == mScript;
127 }
128
129 // static
supportsScript(uint8_t providedBits,uint8_t requestedBits)130 bool FontLanguage::supportsScript(uint8_t providedBits, uint8_t requestedBits) {
131 return requestedBits != 0 && (providedBits & requestedBits) == requestedBits;
132 }
133
supportsHbScript(hb_script_t script) const134 bool FontLanguage::supportsHbScript(hb_script_t script) const {
135 static_assert(SCRIPT_TAG('J', 'p', 'a', 'n') == HB_TAG('J', 'p', 'a', 'n'),
136 "The Minikin script and HarfBuzz hb_script_t have different encodings.");
137 if (script == mScript) return true;
138 return supportsScript(mSubScriptBits, scriptToSubScriptBits(script));
139 }
140
calcScoreFor(const FontLanguages & supported) const141 int FontLanguage::calcScoreFor(const FontLanguages& supported) const {
142 int score = 0;
143 for (size_t i = 0; i < supported.size(); ++i) {
144 if (isEqualScript(supported[i]) ||
145 supportsScript(supported[i].mSubScriptBits, mSubScriptBits)) {
146 if (mLanguage == supported[i].mLanguage) {
147 return 2;
148 } else {
149 score = 1;
150 }
151 }
152 }
153
154 if (score == 1) {
155 return score;
156 }
157
158 if (supportsScript(supported.getUnionOfSubScriptBits(), mSubScriptBits)) {
159 // Gives score of 2 only if the language matches all of the font languages except for the
160 // exact match case handled above.
161 return (mLanguage == supported[0].mLanguage && supported.isAllTheSameLanguage()) ? 2 : 1;
162 }
163
164 return 0;
165 }
166
FontLanguages(std::vector<FontLanguage> && languages)167 FontLanguages::FontLanguages(std::vector<FontLanguage>&& languages)
168 : mLanguages(std::move(languages)) {
169 if (mLanguages.empty()) {
170 return;
171 }
172
173 const FontLanguage& lang = mLanguages[0];
174
175 mIsAllTheSameLanguage = true;
176 mUnionOfSubScriptBits = lang.mSubScriptBits;
177 for (size_t i = 1; i < mLanguages.size(); ++i) {
178 mUnionOfSubScriptBits |= mLanguages[i].mSubScriptBits;
179 if (mIsAllTheSameLanguage && lang.mLanguage != mLanguages[i].mLanguage) {
180 mIsAllTheSameLanguage = false;
181 }
182 }
183 }
184
185 #undef SCRIPT_TAG
186 } // namespace android
187