1 /*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "Minikin"
18
19 #include "minikin/FontCollection.h"
20
21 #include <algorithm>
22
23 #include <log/log.h>
24 #include <unicode/unistr.h>
25 #include <unicode/unorm2.h>
26
27 #include "minikin/Emoji.h"
28
29 #include "Locale.h"
30 #include "LocaleListCache.h"
31 #include "MinikinInternal.h"
32
33 using std::vector;
34
35 namespace minikin {
36
37 template <typename T>
max(T a,T b)38 static inline T max(T a, T b) {
39 return a > b ? a : b;
40 }
41
42 const uint32_t EMOJI_STYLE_VS = 0xFE0F;
43 const uint32_t TEXT_STYLE_VS = 0xFE0E;
44
45 static std::atomic<uint32_t> gNextCollectionId = {0};
46
FontCollection(std::shared_ptr<FontFamily> && typeface)47 FontCollection::FontCollection(std::shared_ptr<FontFamily>&& typeface) : mMaxChar(0) {
48 std::vector<std::shared_ptr<FontFamily>> typefaces;
49 typefaces.push_back(typeface);
50 init(typefaces);
51 }
52
FontCollection(const vector<std::shared_ptr<FontFamily>> & typefaces)53 FontCollection::FontCollection(const vector<std::shared_ptr<FontFamily>>& typefaces) : mMaxChar(0) {
54 init(typefaces);
55 }
56
init(const vector<std::shared_ptr<FontFamily>> & typefaces)57 void FontCollection::init(const vector<std::shared_ptr<FontFamily>>& typefaces) {
58 mId = gNextCollectionId++;
59 vector<uint32_t> lastChar;
60 size_t nTypefaces = typefaces.size();
61 const FontStyle defaultStyle;
62 for (size_t i = 0; i < nTypefaces; i++) {
63 const std::shared_ptr<FontFamily>& family = typefaces[i];
64 if (family->getClosestMatch(defaultStyle).font == nullptr) {
65 continue;
66 }
67 const SparseBitSet& coverage = family->getCoverage();
68 mFamilies.push_back(family); // emplace_back would be better
69 if (family->hasVSTable()) {
70 mVSFamilyVec.push_back(family);
71 }
72 mMaxChar = max(mMaxChar, coverage.length());
73 lastChar.push_back(coverage.nextSetBit(0));
74
75 const std::unordered_set<AxisTag>& supportedAxes = family->supportedAxes();
76 mSupportedAxes.insert(supportedAxes.begin(), supportedAxes.end());
77 }
78 nTypefaces = mFamilies.size();
79 MINIKIN_ASSERT(nTypefaces > 0, "Font collection must have at least one valid typeface");
80 MINIKIN_ASSERT(nTypefaces <= MAX_FAMILY_COUNT,
81 "Font collection may only have up to %d font families.", MAX_FAMILY_COUNT);
82 size_t nPages = (mMaxChar + kPageMask) >> kLogCharsPerPage;
83 // TODO: Use variation selector map for mRanges construction.
84 // A font can have a glyph for a base code point and variation selector pair but no glyph for
85 // the base code point without variation selector. The family won't be listed in the range in
86 // this case.
87 for (size_t i = 0; i < nPages; i++) {
88 Range dummy;
89 mRanges.push_back(dummy);
90 Range* range = &mRanges.back();
91 range->start = mFamilyVec.size();
92 for (size_t j = 0; j < nTypefaces; j++) {
93 if (lastChar[j] < (i + 1) << kLogCharsPerPage) {
94 const std::shared_ptr<FontFamily>& family = mFamilies[j];
95 mFamilyVec.push_back(static_cast<uint8_t>(j));
96 uint32_t nextChar = family->getCoverage().nextSetBit((i + 1) << kLogCharsPerPage);
97 lastChar[j] = nextChar;
98 }
99 }
100 range->end = mFamilyVec.size();
101 }
102 // See the comment in Range for more details.
103 LOG_ALWAYS_FATAL_IF(mFamilyVec.size() >= 0xFFFF,
104 "Exceeded the maximum indexable cmap coverage.");
105 }
106
107 // Special scores for the font fallback.
108 const uint32_t kUnsupportedFontScore = 0;
109 const uint32_t kFirstFontScore = UINT32_MAX;
110
111 // Calculates a font score.
112 // The score of the font family is based on three subscores.
113 // - Coverage Score: How well the font family covers the given character or variation sequence.
114 // - Locale Score: How well the font family is appropriate for the locale.
115 // - Variant Score: Whether the font family matches the variant. Note that this variant is not the
116 // one in BCP47. This is our own font variant (e.g., elegant, compact).
117 //
118 // Then, there is a priority for these three subscores as follow:
119 // Coverage Score > Locale Score > Variant Score
120 // The returned score reflects this priority order.
121 //
122 // Note that there are two special scores.
123 // - kUnsupportedFontScore: When the font family doesn't support the variation sequence or even its
124 // base character.
125 // - kFirstFontScore: When the font is the first font family in the collection and it supports the
126 // given character or variation sequence.
calcFamilyScore(uint32_t ch,uint32_t vs,FontFamily::Variant variant,uint32_t localeListId,const std::shared_ptr<FontFamily> & fontFamily) const127 uint32_t FontCollection::calcFamilyScore(uint32_t ch, uint32_t vs, FontFamily::Variant variant,
128 uint32_t localeListId,
129 const std::shared_ptr<FontFamily>& fontFamily) const {
130 const uint32_t coverageScore = calcCoverageScore(ch, vs, localeListId, fontFamily);
131 if (coverageScore == kFirstFontScore || coverageScore == kUnsupportedFontScore) {
132 // No need to calculate other scores.
133 return coverageScore;
134 }
135
136 const uint32_t localeScore = calcLocaleMatchingScore(localeListId, *fontFamily);
137 const uint32_t variantScore = calcVariantMatchingScore(variant, *fontFamily);
138
139 // Subscores are encoded into 31 bits representation to meet the subscore priority.
140 // The highest 2 bits are for coverage score, then following 28 bits are for locale score,
141 // then the last 1 bit is for variant score.
142 return coverageScore << 29 | localeScore << 1 | variantScore;
143 }
144
145 // Calculates a font score based on variation sequence coverage.
146 // - Returns kUnsupportedFontScore if the font doesn't support the variation sequence or its base
147 // character.
148 // - Returns kFirstFontScore if the font family is the first font family in the collection and it
149 // supports the given character or variation sequence.
150 // - Returns 3 if the font family supports the variation sequence.
151 // - Returns 2 if the vs is a color variation selector (U+FE0F) and if the font is an emoji font.
152 // - Returns 2 if the vs is a text variation selector (U+FE0E) and if the font is not an emoji font.
153 // - Returns 1 if the variation selector is not specified or if the font family only supports the
154 // variation sequence's base character.
calcCoverageScore(uint32_t ch,uint32_t vs,uint32_t localeListId,const std::shared_ptr<FontFamily> & fontFamily) const155 uint32_t FontCollection::calcCoverageScore(uint32_t ch, uint32_t vs, uint32_t localeListId,
156 const std::shared_ptr<FontFamily>& fontFamily) const {
157 const bool hasVSGlyph = (vs != 0) && fontFamily->hasGlyph(ch, vs);
158 if (!hasVSGlyph && !fontFamily->getCoverage().get(ch)) {
159 // The font doesn't support either variation sequence or even the base character.
160 return kUnsupportedFontScore;
161 }
162
163 if ((vs == 0 || hasVSGlyph) && mFamilies[0] == fontFamily) {
164 // If the first font family supports the given character or variation sequence, always use
165 // it.
166 return kFirstFontScore;
167 }
168
169 if (vs != 0 && hasVSGlyph) {
170 return 3;
171 }
172
173 bool colorEmojiRequest;
174 if (vs == EMOJI_STYLE_VS) {
175 colorEmojiRequest = true;
176 } else if (vs == TEXT_STYLE_VS) {
177 colorEmojiRequest = false;
178 } else {
179 switch (LocaleListCache::getById(localeListId).getEmojiStyle()) {
180 case EmojiStyle::EMOJI:
181 colorEmojiRequest = true;
182 break;
183 case EmojiStyle::TEXT:
184 colorEmojiRequest = false;
185 break;
186 case EmojiStyle::EMPTY:
187 case EmojiStyle::DEFAULT:
188 default:
189 // Do not give any extra score for the default emoji style.
190 return 1;
191 break;
192 }
193 }
194
195 return colorEmojiRequest == fontFamily->isColorEmojiFamily() ? 2 : 1;
196 }
197
198 // Calculate font scores based on the script matching, subtag matching and primary locale matching.
199 //
200 // 1. If only the font's language matches or there is no matches between requested font and
201 // supported font, then the font obtains a score of 0.
202 // 2. Without a match in language, considering subtag may change font's EmojiStyle over script,
203 // a match in subtag gets a score of 2 and a match in scripts gains a score of 1.
204 // 3. Regarding to two elements matchings, language-and-subtag matching has a score of 4, while
205 // language-and-script obtains a socre of 3 with the same reason above.
206 //
207 // If two locales in the requested list have the same locale score, the font matching with higher
208 // priority locale gets a higher score. For example, in the case the user requested locale list is
209 // "ja-Jpan,en-Latn". The score of for the font of "ja-Jpan" gets a higher score than the font of
210 // "en-Latn".
211 //
212 // To achieve score calculation with priorities, the locale score is determined as follows:
213 // LocaleScore = s(0) * 5^(m - 1) + s(1) * 5^(m - 2) + ... + s(m - 2) * 5 + s(m - 1)
214 // Here, m is the maximum number of locales to be compared, and s(i) is the i-th locale's matching
215 // score. The possible values of s(i) are 0, 1, 2, 3 and 4.
calcLocaleMatchingScore(uint32_t userLocaleListId,const FontFamily & fontFamily)216 uint32_t FontCollection::calcLocaleMatchingScore(uint32_t userLocaleListId,
217 const FontFamily& fontFamily) {
218 const LocaleList& localeList = LocaleListCache::getById(userLocaleListId);
219 const LocaleList& fontLocaleList = LocaleListCache::getById(fontFamily.localeListId());
220
221 const size_t maxCompareNum = std::min(localeList.size(), FONT_LOCALE_LIMIT);
222 uint32_t score = 0;
223 for (size_t i = 0; i < maxCompareNum; ++i) {
224 score = score * 5u + localeList[i].calcScoreFor(fontLocaleList);
225 }
226 return score;
227 }
228
229 // Calculates a font score based on variant ("compact" or "elegant") matching.
230 // - Returns 1 if the font doesn't have variant or the variant matches with the text style.
231 // - No score if the font has a variant but it doesn't match with the text style.
calcVariantMatchingScore(FontFamily::Variant variant,const FontFamily & fontFamily)232 uint32_t FontCollection::calcVariantMatchingScore(FontFamily::Variant variant,
233 const FontFamily& fontFamily) {
234 const FontFamily::Variant familyVariant = fontFamily.variant();
235 if (familyVariant == FontFamily::Variant::DEFAULT) {
236 return 1;
237 }
238 if (familyVariant == variant) {
239 return 1;
240 }
241 if (variant == FontFamily::Variant::DEFAULT && familyVariant == FontFamily::Variant::COMPACT) {
242 // If default is requested, prefer compat variation.
243 return 1;
244 }
245 return 0;
246 }
247
248 // Implement heuristic for choosing best-match font. Here are the rules:
249 // 1. If first font in the collection has the character, it wins.
250 // 2. Calculate a score for the font family. See comments in calcFamilyScore for the detail.
251 // 3. Highest score wins, with ties resolved to the first font.
252 // This method never returns nullptr.
getFamilyForChar(uint32_t ch,uint32_t vs,uint32_t localeListId,FontFamily::Variant variant) const253 const std::shared_ptr<FontFamily>& FontCollection::getFamilyForChar(
254 uint32_t ch, uint32_t vs, uint32_t localeListId, FontFamily::Variant variant) const {
255 if (ch >= mMaxChar) {
256 return mFamilies[0];
257 }
258
259 Range range = mRanges[ch >> kLogCharsPerPage];
260
261 if (vs != 0) {
262 range = {0, static_cast<uint16_t>(mFamilies.size())};
263 }
264
265 int bestFamilyIndex = -1;
266 uint32_t bestScore = kUnsupportedFontScore;
267 for (size_t i = range.start; i < range.end; i++) {
268 const std::shared_ptr<FontFamily>& family =
269 vs == 0 ? mFamilies[mFamilyVec[i]] : mFamilies[i];
270 const uint32_t score = calcFamilyScore(ch, vs, variant, localeListId, family);
271 if (score == kFirstFontScore) {
272 // If the first font family supports the given character or variation sequence, always
273 // use it.
274 return family;
275 }
276 if (score > bestScore) {
277 bestScore = score;
278 bestFamilyIndex = i;
279 }
280 }
281 if (bestFamilyIndex == -1) {
282 UErrorCode errorCode = U_ZERO_ERROR;
283 const UNormalizer2* normalizer = unorm2_getNFDInstance(&errorCode);
284 if (U_SUCCESS(errorCode)) {
285 UChar decomposed[4];
286 int len = unorm2_getRawDecomposition(normalizer, ch, decomposed, 4, &errorCode);
287 if (U_SUCCESS(errorCode) && len > 0) {
288 int off = 0;
289 U16_NEXT_UNSAFE(decomposed, off, ch);
290 return getFamilyForChar(ch, vs, localeListId, variant);
291 }
292 }
293 return mFamilies[0];
294 }
295 return vs == 0 ? mFamilies[mFamilyVec[bestFamilyIndex]] : mFamilies[bestFamilyIndex];
296 }
297
298 // Characters where we want to continue using existing font run for (or stick to the next run if
299 // they start a string), even if the font does not support them explicitly. These are handled
300 // properly by Minikin or HarfBuzz even if the font does not explicitly support them and it's
301 // usually meaningless to switch to a different font to display them.
doesNotNeedFontSupport(uint32_t c)302 static bool doesNotNeedFontSupport(uint32_t c) {
303 return c == 0x00AD // SOFT HYPHEN
304 || c == 0x034F // COMBINING GRAPHEME JOINER
305 || c == 0x061C // ARABIC LETTER MARK
306 || (0x200C <= c && c <= 0x200F) // ZERO WIDTH NON-JOINER..RIGHT-TO-LEFT MARK
307 || (0x202A <= c && c <= 0x202E) // LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
308 || (0x2066 <= c && c <= 0x2069) // LEFT-TO-RIGHT ISOLATE..POP DIRECTIONAL ISOLATE
309 || c == 0xFEFF // BYTE ORDER MARK
310 || isVariationSelector(c);
311 }
312
313 // Characters where we want to continue using existing font run instead of
314 // recomputing the best match in the fallback list.
315 static const uint32_t stickyWhitelist[] = {
316 '!', ',', '-', '.', ':', ';', '?',
317 0x00A0, // NBSP
318 0x2010, // HYPHEN
319 0x2011, // NB_HYPHEN
320 0x202F, // NNBSP
321 0x2640, // FEMALE_SIGN,
322 0x2642, // MALE_SIGN,
323 0x2695, // STAFF_OF_AESCULAPIUS
324 };
325
isStickyWhitelisted(uint32_t c)326 static bool isStickyWhitelisted(uint32_t c) {
327 for (size_t i = 0; i < sizeof(stickyWhitelist) / sizeof(stickyWhitelist[0]); i++) {
328 if (stickyWhitelist[i] == c) return true;
329 }
330 return false;
331 }
332
isCombining(uint32_t c)333 static inline bool isCombining(uint32_t c) {
334 return (U_GET_GC_MASK(c) & U_GC_M_MASK) != 0;
335 }
336
hasVariationSelector(uint32_t baseCodepoint,uint32_t variationSelector) const337 bool FontCollection::hasVariationSelector(uint32_t baseCodepoint,
338 uint32_t variationSelector) const {
339 if (!isVariationSelector(variationSelector)) {
340 return false;
341 }
342 if (baseCodepoint >= mMaxChar) {
343 return false;
344 }
345
346 // Currently mRanges can not be used here since it isn't aware of the variation sequence.
347 for (size_t i = 0; i < mVSFamilyVec.size(); i++) {
348 if (mVSFamilyVec[i]->hasGlyph(baseCodepoint, variationSelector)) {
349 return true;
350 }
351 }
352
353 // Even if there is no cmap format 14 subtable entry for the given sequence, should return true
354 // for <char, text presentation selector> case since we have special fallback rule for the
355 // sequence. Note that we don't need to restrict this to already standardized variation
356 // sequences, since Unicode is adding variation sequences more frequently now and may even move
357 // towards allowing text and emoji variation selectors on any character.
358 if (variationSelector == TEXT_STYLE_VS) {
359 for (size_t i = 0; i < mFamilies.size(); ++i) {
360 if (!mFamilies[i]->isColorEmojiFamily() && mFamilies[i]->hasGlyph(baseCodepoint, 0)) {
361 return true;
362 }
363 }
364 }
365
366 return false;
367 }
368
369 constexpr uint32_t REPLACEMENT_CHARACTER = 0xFFFD;
370
itemize(const uint16_t * string,size_t string_size,const MinikinPaint & paint,vector<Run> * result) const371 void FontCollection::itemize(const uint16_t* string, size_t string_size, const MinikinPaint& paint,
372 vector<Run>* result) const {
373 const FontFamily::Variant familyVariant = paint.familyVariant;
374 const FontStyle style = paint.fontStyle;
375 const uint32_t localeListId = paint.localeListId;
376
377 const FontFamily* lastFamily = nullptr;
378 Run* run = nullptr;
379
380 if (string_size == 0) {
381 return;
382 }
383
384 const uint32_t kEndOfString = 0xFFFFFFFF;
385
386 uint32_t nextCh = 0;
387 uint32_t prevCh = 0;
388 size_t nextUtf16Pos = 0;
389 size_t readLength = 0;
390 U16_NEXT(string, readLength, string_size, nextCh);
391 if (U_IS_SURROGATE(nextCh)) {
392 nextCh = REPLACEMENT_CHARACTER;
393 }
394
395 do {
396 const uint32_t ch = nextCh;
397 const size_t utf16Pos = nextUtf16Pos;
398 nextUtf16Pos = readLength;
399 if (readLength < string_size) {
400 U16_NEXT(string, readLength, string_size, nextCh);
401 if (U_IS_SURROGATE(nextCh)) {
402 nextCh = REPLACEMENT_CHARACTER;
403 }
404 } else {
405 nextCh = kEndOfString;
406 }
407
408 bool shouldContinueRun = false;
409 if (doesNotNeedFontSupport(ch)) {
410 // Always continue if the character is a format character not needed to be in the font.
411 shouldContinueRun = true;
412 } else if (lastFamily != nullptr && (isStickyWhitelisted(ch) || isCombining(ch))) {
413 // Continue using existing font as long as it has coverage and is whitelisted.
414 shouldContinueRun = lastFamily->getCoverage().get(ch);
415 }
416
417 if (!shouldContinueRun) {
418 const std::shared_ptr<FontFamily>& family = getFamilyForChar(
419 ch, isVariationSelector(nextCh) ? nextCh : 0, localeListId, familyVariant);
420 if (utf16Pos == 0 || family.get() != lastFamily) {
421 size_t start = utf16Pos;
422 // Workaround for combining marks and emoji modifiers until we implement
423 // per-cluster font selection: if a combining mark or an emoji modifier is found in
424 // a different font that also supports the previous character, attach previous
425 // character to the new run. U+20E3 COMBINING ENCLOSING KEYCAP, used in emoji, is
426 // handled properly by this since it's a combining mark too.
427 if (utf16Pos != 0 &&
428 (isCombining(ch) || (isEmojiModifier(ch) && isEmojiBase(prevCh))) &&
429 family != nullptr && family->getCoverage().get(prevCh)) {
430 const size_t prevChLength = U16_LENGTH(prevCh);
431 if (run != nullptr) {
432 run->end -= prevChLength;
433 if (run->start == run->end) {
434 result->pop_back();
435 }
436 }
437 start -= prevChLength;
438 }
439 if (lastFamily == nullptr) {
440 // This is the first family ever assigned. We are either seeing the very first
441 // character (which means start would already be zero), or we have only seen
442 // characters that don't need any font support (which means we need to adjust
443 // start to be 0 to include those characters).
444 start = 0;
445 }
446 result->push_back({family->getClosestMatch(style), static_cast<int>(start), 0});
447 run = &result->back();
448 lastFamily = family.get();
449 }
450 }
451 prevCh = ch;
452 if (run != nullptr) {
453 run->end = nextUtf16Pos; // exclusive
454 }
455 } while (nextCh != kEndOfString);
456
457 if (lastFamily == nullptr) {
458 // No character needed any font support, so it doesn't really matter which font they end up
459 // getting displayed in. We put the whole string in one run, using the first font.
460 result->push_back({mFamilies[0]->getClosestMatch(style), 0, static_cast<int>(string_size)});
461 }
462 }
463
baseFontFaked(FontStyle style)464 FakedFont FontCollection::baseFontFaked(FontStyle style) {
465 return mFamilies[0]->getClosestMatch(style);
466 }
467
createCollectionWithVariation(const std::vector<FontVariation> & variations)468 std::shared_ptr<FontCollection> FontCollection::createCollectionWithVariation(
469 const std::vector<FontVariation>& variations) {
470 if (variations.empty() || mSupportedAxes.empty()) {
471 return nullptr;
472 }
473
474 bool hasSupportedAxis = false;
475 for (const FontVariation& variation : variations) {
476 if (mSupportedAxes.find(variation.axisTag) != mSupportedAxes.end()) {
477 hasSupportedAxis = true;
478 break;
479 }
480 }
481 if (!hasSupportedAxis) {
482 // None of variation axes are supported by this font collection.
483 return nullptr;
484 }
485
486 std::vector<std::shared_ptr<FontFamily>> families;
487 for (const std::shared_ptr<FontFamily>& family : mFamilies) {
488 std::shared_ptr<FontFamily> newFamily = family->createFamilyWithVariation(variations);
489 if (newFamily) {
490 families.push_back(newFamily);
491 } else {
492 families.push_back(family);
493 }
494 }
495
496 return std::shared_ptr<FontCollection>(new FontCollection(families));
497 }
498
getId() const499 uint32_t FontCollection::getId() const {
500 return mId;
501 }
502
503 } // namespace minikin
504