1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "Minikin"
18 
19 #include "FontLanguage.h"
20 
21 #include <hb.h>
22 #include <unicode/uloc.h>
23 
24 namespace android {
25 
26 #define SCRIPT_TAG(c1, c2, c3, c4) \
27         (((uint32_t)(c1)) << 24 | ((uint32_t)(c2)) << 16 | ((uint32_t)(c3)) <<  8 | \
28          ((uint32_t)(c4)))
29 
30 // Parse BCP 47 language identifier into internal structure
FontLanguage(const char * buf,size_t length)31 FontLanguage::FontLanguage(const char* buf, size_t length) : FontLanguage() {
32     size_t i;
33     for (i = 0; i < length; i++) {
34         char c = buf[i];
35         if (c == '-' || c == '_') break;
36     }
37     if (i == 2 || i == 3) {  // only accept two or three letter language code.
38         mLanguage = buf[0] | (buf[1] << 8) | ((i == 3) ? (buf[2] << 16) : 0);
39     } else {
40         // We don't understand anything other than two-letter or three-letter
41         // language codes, so we skip parsing the rest of the string.
42         mLanguage = 0ul;
43         return;
44     }
45 
46     size_t next;
47     for (i++; i < length; i = next + 1) {
48         for (next = i; next < length; next++) {
49             char c = buf[next];
50             if (c == '-' || c == '_') break;
51         }
52         if (next - i == 4 && 'A' <= buf[i] && buf[i] <= 'Z') {
53             mScript = SCRIPT_TAG(buf[i], buf[i + 1], buf[i + 2], buf[i + 3]);
54         }
55     }
56 
57     mSubScriptBits = scriptToSubScriptBits(mScript);
58 }
59 
60 //static
scriptToSubScriptBits(uint32_t script)61 uint8_t FontLanguage::scriptToSubScriptBits(uint32_t script) {
62     uint8_t subScriptBits = 0u;
63     switch (script) {
64         case SCRIPT_TAG('B', 'o', 'p', 'o'):
65             subScriptBits = kBopomofoFlag;
66             break;
67         case SCRIPT_TAG('H', 'a', 'n', 'g'):
68             subScriptBits = kHangulFlag;
69             break;
70         case SCRIPT_TAG('H', 'a', 'n', 'b'):
71             // Bopomofo is almost exclusively used in Taiwan.
72             subScriptBits = kHanFlag | kBopomofoFlag;
73             break;
74         case SCRIPT_TAG('H', 'a', 'n', 'i'):
75             subScriptBits = kHanFlag;
76             break;
77         case SCRIPT_TAG('H', 'a', 'n', 's'):
78             subScriptBits = kHanFlag | kSimplifiedChineseFlag;
79             break;
80         case SCRIPT_TAG('H', 'a', 'n', 't'):
81             subScriptBits = kHanFlag | kTraditionalChineseFlag;
82             break;
83         case SCRIPT_TAG('H', 'i', 'r', 'a'):
84             subScriptBits = kHiraganaFlag;
85             break;
86         case SCRIPT_TAG('H', 'r', 'k', 't'):
87             subScriptBits = kKatakanaFlag | kHiraganaFlag;
88             break;
89         case SCRIPT_TAG('J', 'p', 'a', 'n'):
90             subScriptBits = kHanFlag | kKatakanaFlag | kHiraganaFlag;
91             break;
92         case SCRIPT_TAG('K', 'a', 'n', 'a'):
93             subScriptBits = kKatakanaFlag;
94             break;
95         case SCRIPT_TAG('K', 'o', 'r', 'e'):
96             subScriptBits = kHanFlag | kHangulFlag;
97             break;
98         case SCRIPT_TAG('Z', 's', 'y', 'e'):
99             subScriptBits = kEmojiFlag;
100             break;
101     }
102     return subScriptBits;
103 }
104 
getString() const105 std::string FontLanguage::getString() const {
106     if (mLanguage == 0ul) {
107         return "und";
108     }
109     char buf[16];
110     size_t i = 0;
111     buf[i++] = mLanguage & 0xFF ;
112     buf[i++] = (mLanguage >> 8) & 0xFF;
113     char third_letter = (mLanguage >> 16) & 0xFF;
114     if (third_letter != 0) buf[i++] = third_letter;
115     if (mScript != 0) {
116       buf[i++] = '-';
117       buf[i++] = (mScript >> 24) & 0xFFu;
118       buf[i++] = (mScript >> 16) & 0xFFu;
119       buf[i++] = (mScript >> 8) & 0xFFu;
120       buf[i++] = mScript & 0xFFu;
121     }
122     return std::string(buf, i);
123 }
124 
isEqualScript(const FontLanguage & other) const125 bool FontLanguage::isEqualScript(const FontLanguage& other) const {
126     return other.mScript == mScript;
127 }
128 
129 // static
supportsScript(uint8_t providedBits,uint8_t requestedBits)130 bool FontLanguage::supportsScript(uint8_t providedBits, uint8_t requestedBits) {
131     return requestedBits != 0 && (providedBits & requestedBits) == requestedBits;
132 }
133 
supportsHbScript(hb_script_t script) const134 bool FontLanguage::supportsHbScript(hb_script_t script) const {
135     static_assert(SCRIPT_TAG('J', 'p', 'a', 'n') == HB_TAG('J', 'p', 'a', 'n'),
136                   "The Minikin script and HarfBuzz hb_script_t have different encodings.");
137     if (script == mScript) return true;
138     return supportsScript(mSubScriptBits, scriptToSubScriptBits(script));
139 }
140 
calcScoreFor(const FontLanguages & supported) const141 int FontLanguage::calcScoreFor(const FontLanguages& supported) const {
142     int score = 0;
143     for (size_t i = 0; i < supported.size(); ++i) {
144         if (isEqualScript(supported[i]) ||
145                 supportsScript(supported[i].mSubScriptBits, mSubScriptBits)) {
146             if (mLanguage == supported[i].mLanguage) {
147                 return 2;
148             } else {
149                 score = 1;
150             }
151         }
152     }
153 
154     if (score == 1) {
155         return score;
156     }
157 
158     if (supportsScript(supported.getUnionOfSubScriptBits(), mSubScriptBits)) {
159         // Gives score of 2 only if the language matches all of the font languages except for the
160         // exact match case handled above.
161         return (mLanguage == supported[0].mLanguage && supported.isAllTheSameLanguage()) ? 2 : 1;
162     }
163 
164     return 0;
165 }
166 
FontLanguages(std::vector<FontLanguage> && languages)167 FontLanguages::FontLanguages(std::vector<FontLanguage>&& languages)
168     : mLanguages(std::move(languages)) {
169     if (mLanguages.empty()) {
170         return;
171     }
172 
173     const FontLanguage& lang = mLanguages[0];
174 
175     mIsAllTheSameLanguage = true;
176     mUnionOfSubScriptBits = lang.mSubScriptBits;
177     for (size_t i = 1; i < mLanguages.size(); ++i) {
178         mUnionOfSubScriptBits |= mLanguages[i].mSubScriptBits;
179         if (mIsAllTheSameLanguage && lang.mLanguage != mLanguages[i].mLanguage) {
180             mIsAllTheSameLanguage = false;
181         }
182     }
183 }
184 
185 #undef SCRIPT_TAG
186 }  // namespace android
187