1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "Minikin"
18 
19 #include "minikin/FontCollection.h"
20 
21 #include <log/log.h>
22 #include <unicode/unorm2.h>
23 
24 #include <algorithm>
25 #include <unordered_set>
26 
27 #include "FeatureFlags.h"
28 #include "Locale.h"
29 #include "LocaleListCache.h"
30 #include "MinikinInternal.h"
31 #include "minikin/Characters.h"
32 #include "minikin/Emoji.h"
33 #include "minikin/FontFileParser.h"
34 #include "minikin/MinikinExtent.h"
35 #include "minikin/MinikinPaint.h"
36 
37 using std::vector;
38 
39 namespace minikin {
40 
41 template <typename T>
max(T a,T b)42 static inline T max(T a, T b) {
43     return a > b ? a : b;
44 }
45 
46 const uint32_t EMOJI_STYLE_VS = 0xFE0F;
47 const uint32_t TEXT_STYLE_VS = 0xFE0E;
48 
49 static std::atomic<uint32_t> gNextCollectionId = {0};
50 
51 namespace {
52 
isEmojiBreak(uint32_t prevCh,uint32_t ch)53 inline bool isEmojiBreak(uint32_t prevCh, uint32_t ch) {
54     return !(isEmojiModifier(ch) || (isRegionalIndicator(prevCh) && isRegionalIndicator(ch)) ||
55              isKeyCap(ch) || isTagChar(ch) || ch == CHAR_ZWJ || prevCh == CHAR_ZWJ);
56 }
57 
58 // Lower is better
getGlyphScore(U16StringPiece text,uint32_t start,uint32_t end,const HbFontUniquePtr & font)59 uint32_t getGlyphScore(U16StringPiece text, uint32_t start, uint32_t end,
60                        const HbFontUniquePtr& font) {
61     HbBufferUniquePtr buffer(hb_buffer_create());
62     hb_buffer_set_direction(buffer.get(), HB_DIRECTION_LTR);
63     hb_buffer_add_utf16(buffer.get(), text.data() + start, end - start, 0, end - start);
64     hb_buffer_guess_segment_properties(buffer.get());
65 
66     unsigned int numGlyphs = -1;
67     hb_shape(font.get(), buffer.get(), nullptr, 0);
68     hb_glyph_info_t* info = hb_buffer_get_glyph_infos(buffer.get(), &numGlyphs);
69 
70     // HarfBuzz squashed unsupported tag sequence into first emoji glyph. So, we cannot use glyph
71     // count for the font selection score. Give extra score if the base score is different from the
72     // first glyph.
73     if (numGlyphs == 1) {
74         constexpr uint32_t TAG_SEQUENCE_FALLBACK_PENALTY = 0x10000;
75 
76         uint32_t ch = 0;
77         const uint16_t* string = text.data();
78         const uint32_t string_size = text.size();
79         uint32_t readLength = 0;
80 
81         U16_NEXT(string, readLength, string_size, ch);
82         if (U_IS_SURROGATE(ch)) {
83             return numGlyphs;  // Broken surrogate pair.
84         }
85 
86         if (readLength >= string_size) {
87             return numGlyphs;  // No more characters remaining.
88         }
89 
90         uint32_t nextCh = 0;
91         U16_NEXT(string, readLength, string_size, nextCh);
92 
93         if (!isTagChar(nextCh)) {
94             return numGlyphs;  // Not a tag sequence.
95         }
96 
97         uint32_t composedGlyphId = info[0].codepoint;
98 
99         // Shape only the first base emoji.
100         hb_buffer_reset(buffer.get());
101         hb_buffer_set_direction(buffer.get(), HB_DIRECTION_LTR);
102         hb_buffer_add_codepoints(buffer.get(), &ch, 1, 0, 1);
103         hb_buffer_guess_segment_properties(buffer.get());
104 
105         unsigned int numGlyphs = -1;
106         hb_shape(font.get(), buffer.get(), nullptr, 0);
107         info = hb_buffer_get_glyph_infos(buffer.get(), &numGlyphs);
108 
109         if (numGlyphs != 1) {
110             // If the single code point of the first base emoji is decomposed to multiple glyphs,
111             // we don't support it.
112             return numGlyphs;
113         }
114 
115         uint32_t baseGlyphId = info[0].codepoint;
116         if (composedGlyphId == baseGlyphId) {
117             return numGlyphs + TAG_SEQUENCE_FALLBACK_PENALTY;
118         } else {
119             return numGlyphs;
120         }
121     }
122 
123     return numGlyphs;
124 }
125 
126 }  // namespace
127 
128 // static
create(std::shared_ptr<FontFamily> && typeface)129 std::shared_ptr<FontCollection> FontCollection::create(std::shared_ptr<FontFamily>&& typeface) {
130     std::vector<std::shared_ptr<FontFamily>> typefaces;
131     typefaces.push_back(typeface);
132     return create(typefaces);
133 }
134 
135 // static
create(const vector<std::shared_ptr<FontFamily>> & typefaces)136 std::shared_ptr<FontCollection> FontCollection::create(
137         const vector<std::shared_ptr<FontFamily>>& typefaces) {
138     // TODO(b/174672300): Revert back to make_shared.
139     return std::shared_ptr<FontCollection>(new FontCollection(typefaces));
140 }
141 
FontCollection(const vector<std::shared_ptr<FontFamily>> & typefaces)142 FontCollection::FontCollection(const vector<std::shared_ptr<FontFamily>>& typefaces)
143         : mMaxChar(0), mSupportedAxes(nullptr) {
144     init(typefaces);
145 }
146 
init(const vector<std::shared_ptr<FontFamily>> & typefaces)147 void FontCollection::init(const vector<std::shared_ptr<FontFamily>>& typefaces) {
148     mId = gNextCollectionId++;
149     vector<uint32_t> lastChar;
150     size_t nTypefaces = typefaces.size();
151     const FontStyle defaultStyle;
152     auto families = std::make_shared<vector<std::shared_ptr<FontFamily>>>();
153     std::unordered_set<AxisTag> supportedAxesSet;
154     for (size_t i = 0; i < nTypefaces; i++) {
155         const std::shared_ptr<FontFamily>& family = typefaces[i];
156         if (family->getClosestMatch(defaultStyle).font == nullptr) {
157             continue;
158         }
159         const SparseBitSet& coverage = family->getCoverage();
160         families->emplace_back(family);
161         if (family->hasVSTable()) {
162             mVSFamilyVec.push_back(family);
163         }
164         mMaxChar = max(mMaxChar, coverage.length());
165         lastChar.push_back(coverage.nextSetBit(0));
166 
167         for (size_t i = 0; i < family->getSupportedAxesCount(); i++) {
168             supportedAxesSet.insert(family->getSupportedAxisAt(i));
169         }
170     }
171     // mMaybeSharedFamilies is not shared.
172     mMaybeSharedFamilies = families;
173     mFamilyCount = families->size();
174     mFamilyIndices = nullptr;
175     MINIKIN_ASSERT(mFamilyCount > 0, "Font collection must have at least one valid typeface");
176     MINIKIN_ASSERT(mFamilyCount <= MAX_FAMILY_COUNT,
177                    "Font collection may only have up to %d font families.", MAX_FAMILY_COUNT);
178     // Although OpenType supports up to 2^16-1 axes per font,
179     // mSupportedAxesCount may exceed 2^16-1 as we have multiple fonts.
180     mSupportedAxesCount = static_cast<uint32_t>(supportedAxesSet.size());
181     if (mSupportedAxesCount > 0) {
182         mSupportedAxes = sortedArrayFromSet(supportedAxesSet);
183     }
184     size_t nPages = (mMaxChar + kPageMask) >> kLogCharsPerPage;
185     // TODO: Use variation selector map for mRanges construction.
186     // A font can have a glyph for a base code point and variation selector pair but no glyph for
187     // the base code point without variation selector. The family won't be listed in the range in
188     // this case.
189     mOwnedRanges = std::make_unique<Range[]>(nPages);
190     mRanges = mOwnedRanges.get();
191     mRangesCount = nPages;
192     for (size_t i = 0; i < nPages; i++) {
193         Range* range = &mOwnedRanges[i];
194         range->start = mOwnedFamilyVec.size();
195         for (size_t j = 0; j < getFamilyCount(); j++) {
196             if (lastChar[j] < (i + 1) << kLogCharsPerPage) {
197                 const std::shared_ptr<FontFamily>& family = getFamilyAt(j);
198                 mOwnedFamilyVec.push_back(static_cast<uint8_t>(j));
199                 uint32_t nextChar = family->getCoverage().nextSetBit((i + 1) << kLogCharsPerPage);
200                 lastChar[j] = nextChar;
201             }
202         }
203         range->end = mOwnedFamilyVec.size();
204     }
205     // See the comment in Range for more details.
206     LOG_ALWAYS_FATAL_IF(mOwnedFamilyVec.size() >= 0xFFFF,
207                         "Exceeded the maximum indexable cmap coverage.");
208     mFamilyVec = mOwnedFamilyVec.data();
209     mFamilyVecCount = mOwnedFamilyVec.size();
210 }
211 
FontCollection(BufferReader * reader,const std::shared_ptr<std::vector<std::shared_ptr<FontFamily>>> & families)212 FontCollection::FontCollection(
213         BufferReader* reader,
214         const std::shared_ptr<std::vector<std::shared_ptr<FontFamily>>>& families)
215         : mSupportedAxes(nullptr) {
216     mId = gNextCollectionId++;
217     mMaxChar = reader->read<uint32_t>();
218     mMaybeSharedFamilies = families;
219     std::tie(mFamilyIndices, mFamilyCount) = reader->readArray<uint32_t>();
220     for (size_t i = 0; i < getFamilyCount(); i++) {
221         const auto& family = getFamilyAt(i);
222         if (family->hasVSTable()) mVSFamilyVec.emplace_back(family);
223     }
224     // Range is two packed uint16_t
225     static_assert(sizeof(Range) == 4);
226     std::tie(mRanges, mRangesCount) = reader->readArray<Range>();
227     std::tie(mFamilyVec, mFamilyVecCount) = reader->readArray<uint8_t>();
228     const auto& [axesPtr, axesCount] = reader->readArray<AxisTag>();
229     mSupportedAxesCount = axesCount;
230     if (axesCount > 0) {
231         mSupportedAxes = std::unique_ptr<AxisTag[]>(new AxisTag[axesCount]);
232         std::copy(axesPtr, axesPtr + axesCount, mSupportedAxes.get());
233     }
234 }
235 
writeTo(BufferWriter * writer,const std::unordered_map<std::shared_ptr<FontFamily>,uint32_t> & fontFamilyToIndexMap) const236 void FontCollection::writeTo(BufferWriter* writer,
237                              const std::unordered_map<std::shared_ptr<FontFamily>, uint32_t>&
238                                      fontFamilyToIndexMap) const {
239     writer->write<uint32_t>(mMaxChar);
240     std::vector<uint32_t> indices;
241     indices.reserve(getFamilyCount());
242     for (size_t i = 0; i < getFamilyCount(); ++i) {
243         const std::shared_ptr<FontFamily>& fontFamily = getFamilyAt(i);
244         auto it = fontFamilyToIndexMap.find(fontFamily);
245         if (it == fontFamilyToIndexMap.end()) {
246             ALOGE("fontFamily not found in fontFamilyToIndexMap");
247         } else {
248             indices.push_back(it->second);
249         }
250     }
251     writer->writeArray<uint32_t>(indices.data(), indices.size());
252     writer->writeArray<Range>(mRanges, mRangesCount);
253     writer->writeArray<uint8_t>(mFamilyVec, mFamilyVecCount);
254     // No need to serialize mVSFamilyVec as it can be reconstructed easily from mFamilies.
255     writer->writeArray<AxisTag>(mSupportedAxes.get(), mSupportedAxesCount);
256 }
257 
258 // static
readVector(BufferReader * reader)259 std::vector<std::shared_ptr<FontCollection>> FontCollection::readVector(BufferReader* reader) {
260     auto allFontFamilies = std::make_shared<std::vector<std::shared_ptr<FontFamily>>>(
261             FontFamily::readVector(reader));
262     uint32_t count = reader->read<uint32_t>();
263     std::vector<std::shared_ptr<FontCollection>> fontCollections;
264     fontCollections.reserve(count);
265     for (uint32_t i = 0; i < count; i++) {
266         fontCollections.emplace_back(new FontCollection(reader, allFontFamilies));
267     }
268     return fontCollections;
269 }
270 
271 // static
writeVector(BufferWriter * writer,const std::vector<std::shared_ptr<FontCollection>> & fontCollections)272 void FontCollection::writeVector(
273         BufferWriter* writer, const std::vector<std::shared_ptr<FontCollection>>& fontCollections) {
274     std::vector<std::shared_ptr<FontFamily>> allFontFamilies;
275     // Note: operator== for shared_ptr compares raw pointer values.
276     std::unordered_map<std::shared_ptr<FontFamily>, uint32_t> fontFamilyToIndexMap;
277     collectAllFontFamilies(fontCollections, &allFontFamilies, &fontFamilyToIndexMap);
278 
279     FontFamily::writeVector(writer, allFontFamilies);
280     writer->write<uint32_t>(fontCollections.size());
281     for (const auto& fontCollection : fontCollections) {
282         fontCollection->writeTo(writer, fontFamilyToIndexMap);
283     }
284 }
285 
286 // static
collectAllFontFamilies(const std::vector<std::shared_ptr<FontCollection>> & fontCollections,std::vector<std::shared_ptr<FontFamily>> * outAllFontFamilies,std::unordered_map<std::shared_ptr<FontFamily>,uint32_t> * outFontFamilyToIndexMap)287 void FontCollection::collectAllFontFamilies(
288         const std::vector<std::shared_ptr<FontCollection>>& fontCollections,
289         std::vector<std::shared_ptr<FontFamily>>* outAllFontFamilies,
290         std::unordered_map<std::shared_ptr<FontFamily>, uint32_t>* outFontFamilyToIndexMap) {
291     for (const auto& fontCollection : fontCollections) {
292         for (size_t i = 0; i < fontCollection->getFamilyCount(); ++i) {
293             const std::shared_ptr<FontFamily>& fontFamily = fontCollection->getFamilyAt(i);
294             bool inserted =
295                     outFontFamilyToIndexMap->emplace(fontFamily, outAllFontFamilies->size()).second;
296             if (inserted) {
297                 outAllFontFamilies->push_back(fontFamily);
298             }
299         }
300     }
301 }
302 
303 // Special scores for the font fallback.
304 const uint32_t kUnsupportedFontScore = 0;
305 const uint32_t kFirstFontScore = UINT32_MAX;
306 
307 // Calculates a font score.
308 // The score of the font family is based on three subscores.
309 //  - Coverage Score: How well the font family covers the given character or variation sequence.
310 //  - Locale Score: How well the font family is appropriate for the locale.
311 //  - Variant Score: Whether the font family matches the variant. Note that this variant is not the
312 //    one in BCP47. This is our own font variant (e.g., elegant, compact).
313 //
314 // Then, there is a priority for these three subscores as follow:
315 //   Coverage Score > Locale Score > Variant Score
316 // The returned score reflects this priority order.
317 //
318 // Note that there are two special scores.
319 //  - kUnsupportedFontScore: When the font family doesn't support the variation sequence or even its
320 //    base character.
321 //  - kFirstFontScore: When the font is the first font family in the collection and it supports the
322 //    given character or variation sequence.
calcFamilyScore(uint32_t ch,uint32_t vs,FamilyVariant variant,uint32_t localeListId,const std::shared_ptr<FontFamily> & fontFamily) const323 uint32_t FontCollection::calcFamilyScore(uint32_t ch, uint32_t vs, FamilyVariant variant,
324                                          uint32_t localeListId,
325                                          const std::shared_ptr<FontFamily>& fontFamily) const {
326     const uint32_t coverageScore = calcCoverageScore(ch, vs, localeListId, fontFamily);
327     if (coverageScore == kFirstFontScore || coverageScore == kUnsupportedFontScore) {
328         // No need to calculate other scores.
329         return coverageScore;
330     }
331 
332     const uint32_t localeScore = calcLocaleMatchingScore(localeListId, *fontFamily);
333     const uint32_t variantScore = calcVariantMatchingScore(variant, *fontFamily);
334 
335     // Subscores are encoded into 31 bits representation to meet the subscore priority.
336     // The highest 2 bits are for coverage score, then following 28 bits are for locale score,
337     // then the last 1 bit is for variant score.
338     return coverageScore << 29 | localeScore << 1 | variantScore;
339 }
340 
341 // Returns true if
342 //  - the fontFamily is a developer specified custom fallback.
343 //  - no custom fallback is provided and the fontFamily is a default fallback.
isPrimaryFamily(const std::shared_ptr<FontFamily> & fontFamily) const344 bool FontCollection::isPrimaryFamily(const std::shared_ptr<FontFamily>& fontFamily) const {
345     // If the font family is provided by developers, it is primary.
346     if (fontFamily->isCustomFallback()) {
347         return true;
348     }
349 
350     if (getFamilyAt(0)->isCustomFallback()) {
351         return false;
352     } else {
353         return fontFamily->isDefaultFallback();
354     }
355 }
356 
357 // Calculates a font score based on variation sequence coverage.
358 // - Returns kUnsupportedFontScore if the font doesn't support the variation sequence or its base
359 //   character.
360 // - Returns kFirstFontScore if the font family is the first font family in the collection and it
361 //   supports the given character or variation sequence.
362 // - Returns 3 if the font family supports the variation sequence.
363 // - Returns 2 if the vs is a color variation selector (U+FE0F) and if the font is an emoji font.
364 // - Returns 2 if the vs is a text variation selector (U+FE0E) and if the font is not an emoji font.
365 // - Returns 1 if the variation selector is not specified or if the font family only supports the
366 //   variation sequence's base character.
calcCoverageScore(uint32_t ch,uint32_t vs,uint32_t localeListId,const std::shared_ptr<FontFamily> & fontFamily) const367 uint32_t FontCollection::calcCoverageScore(uint32_t ch, uint32_t vs, uint32_t localeListId,
368                                            const std::shared_ptr<FontFamily>& fontFamily) const {
369     const bool hasVSGlyph = (vs != 0) && fontFamily->hasGlyph(ch, vs);
370     if (!hasVSGlyph && !fontFamily->getCoverage().get(ch)) {
371         // The font doesn't support either variation sequence or even the base character.
372         return kUnsupportedFontScore;
373     }
374 
375     if ((vs == 0 || hasVSGlyph) && isPrimaryFamily(fontFamily)) {
376         // If the first font family supports the given character or variation sequence, always use
377         // it.
378         return kFirstFontScore;
379     }
380 
381     if (vs != 0 && hasVSGlyph) {
382         return 3;
383     }
384 
385     bool colorEmojiRequest;
386     if (vs == EMOJI_STYLE_VS) {
387         colorEmojiRequest = true;
388     } else if (vs == TEXT_STYLE_VS) {
389         colorEmojiRequest = false;
390     } else {
391         switch (LocaleListCache::getById(localeListId).getEmojiStyle()) {
392             case EmojiStyle::EMOJI:
393                 colorEmojiRequest = true;
394                 break;
395             case EmojiStyle::TEXT:
396                 colorEmojiRequest = false;
397                 break;
398             case EmojiStyle::EMPTY:
399             case EmojiStyle::DEFAULT:
400             default:
401                 // Do not give any extra score for the default emoji style.
402                 return 1;
403                 break;
404         }
405     }
406 
407     return colorEmojiRequest == fontFamily->isColorEmojiFamily() ? 2 : 1;
408 }
409 
410 // Calculate font scores based on the script matching, subtag matching and primary locale matching.
411 //
412 // 1. If only the font's language matches or there is no matches between requested font and
413 //    supported font, then the font obtains a score of 0.
414 // 2. Without a match in language, considering subtag may change font's EmojiStyle over script,
415 //    a match in subtag gets a score of 2 and a match in scripts gains a score of 1.
416 // 3. Regarding to two elements matchings, language-and-subtag matching has a score of 4, while
417 //    language-and-script obtains a socre of 3 with the same reason above.
418 //
419 // If two locales in the requested list have the same locale score, the font matching with higher
420 // priority locale gets a higher score. For example, in the case the user requested locale list is
421 // "ja-Jpan,en-Latn". The score of for the font of "ja-Jpan" gets a higher score than the font of
422 // "en-Latn".
423 //
424 // To achieve score calculation with priorities, the locale score is determined as follows:
425 //   LocaleScore = s(0) * 5^(m - 1) + s(1) * 5^(m - 2) + ... + s(m - 2) * 5 + s(m - 1)
426 // Here, m is the maximum number of locales to be compared, and s(i) is the i-th locale's matching
427 // score. The possible values of s(i) are 0, 1, 2, 3 and 4.
calcLocaleMatchingScore(uint32_t userLocaleListId,const FontFamily & fontFamily)428 uint32_t FontCollection::calcLocaleMatchingScore(uint32_t userLocaleListId,
429                                                  const FontFamily& fontFamily) {
430     const LocaleList& localeList = LocaleListCache::getById(userLocaleListId);
431     const LocaleList& fontLocaleList = LocaleListCache::getById(fontFamily.localeListId());
432 
433     const size_t maxCompareNum = std::min(localeList.size(), FONT_LOCALE_LIMIT);
434     uint32_t score = 0;
435     for (size_t i = 0; i < maxCompareNum; ++i) {
436         score = score * 5u + localeList[i].calcScoreFor(fontLocaleList);
437     }
438     return score;
439 }
440 
441 // Calculates a font score based on variant ("compact" or "elegant") matching.
442 //  - Returns 1 if the font doesn't have variant or the variant matches with the text style.
443 //  - No score if the font has a variant but it doesn't match with the text style.
calcVariantMatchingScore(FamilyVariant variant,const FontFamily & fontFamily)444 uint32_t FontCollection::calcVariantMatchingScore(FamilyVariant variant,
445                                                   const FontFamily& fontFamily) {
446     const FamilyVariant familyVariant = fontFamily.variant();
447     if (familyVariant == FamilyVariant::DEFAULT) {
448         return 1;
449     }
450     if (familyVariant == variant) {
451         return 1;
452     }
453     if (variant == FamilyVariant::DEFAULT && familyVariant == FamilyVariant::COMPACT) {
454         // If default is requested, prefer compat variation.
455         return 1;
456     }
457     return 0;
458 }
459 
460 // Implement heuristic for choosing best-match font. Here are the rules:
461 // 1. If first font in the collection has the character, it wins.
462 // 2. Calculate a score for the font family. See comments in calcFamilyScore for the detail.
463 // 3. Highest score wins, with ties resolved to the first font.
464 // This method never returns nullptr.
getFamilyForChar(uint32_t ch,uint32_t vs,uint32_t localeListId,FamilyVariant variant) const465 FontCollection::FamilyMatchResult FontCollection::getFamilyForChar(uint32_t ch, uint32_t vs,
466                                                                    uint32_t localeListId,
467                                                                    FamilyVariant variant) const {
468     if (ch >= mMaxChar) {
469         return FamilyMatchResult::Builder().add(0).build();
470     }
471 
472     Range range = mRanges[ch >> kLogCharsPerPage];
473 
474     if (vs != 0) {
475         range = {0, static_cast<uint16_t>(getFamilyCount())};
476     }
477 
478     uint32_t bestScore = kUnsupportedFontScore;
479     FamilyMatchResult::Builder builder;
480 
481     for (size_t i = range.start; i < range.end; i++) {
482         const uint8_t familyIndex = vs == 0 ? mFamilyVec[i] : i;
483         const std::shared_ptr<FontFamily>& family = getFamilyAt(familyIndex);
484         const uint32_t score = calcFamilyScore(ch, vs, variant, localeListId, family);
485         if (score == kFirstFontScore) {
486             // If the first font family supports the given character or variation sequence, always
487             // use it.
488             return builder.add(familyIndex).build();
489         }
490         if (score != kUnsupportedFontScore && score >= bestScore) {
491             if (score > bestScore) {
492                 builder.reset();
493                 bestScore = score;
494             }
495             builder.add(familyIndex);
496         }
497     }
498     if (builder.empty()) {
499         UErrorCode errorCode = U_ZERO_ERROR;
500         const UNormalizer2* normalizer = unorm2_getNFDInstance(&errorCode);
501         if (U_SUCCESS(errorCode)) {
502             UChar decomposed[4];
503             int len = unorm2_getRawDecomposition(normalizer, ch, decomposed, 4, &errorCode);
504             if (U_SUCCESS(errorCode) && len > 0) {
505                 int off = 0;
506                 U16_NEXT_UNSAFE(decomposed, off, ch);
507                 return getFamilyForChar(ch, vs, localeListId, variant);
508             }
509         }
510         return FamilyMatchResult::Builder().add(0).build();
511     }
512     return builder.build();
513 }
514 
515 // Characters where we want to continue using existing font run for (or stick to the next run if
516 // they start a string), even if the font does not support them explicitly. These are handled
517 // properly by Minikin or HarfBuzz even if the font does not explicitly support them and it's
518 // usually meaningless to switch to a different font to display them.
doesNotNeedFontSupport(uint32_t c)519 static bool doesNotNeedFontSupport(uint32_t c) {
520     return c == 0x00AD                      // SOFT HYPHEN
521            || c == 0x034F                   // COMBINING GRAPHEME JOINER
522            || c == 0x061C                   // ARABIC LETTER MARK
523            || (0x200C <= c && c <= 0x200F)  // ZERO WIDTH NON-JOINER..RIGHT-TO-LEFT MARK
524            || (0x202A <= c && c <= 0x202E)  // LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
525            || (0x2066 <= c && c <= 0x2069)  // LEFT-TO-RIGHT ISOLATE..POP DIRECTIONAL ISOLATE
526            || c == 0xFEFF                   // BYTE ORDER MARK
527            || isVariationSelector(c);
528 }
529 
530 // Characters where we want to continue using existing font run instead of
531 // recomputing the best match in the fallback list.
532 static const uint32_t stickyAllowlist[] = {
533         '!',    ',', '-', '.', ':', ';', '?',
534         0x00A0,  // NBSP
535         0x2010,  // HYPHEN
536         0x2011,  // NB_HYPHEN
537         0x202F,  // NNBSP
538         0x2640,  // FEMALE_SIGN,
539         0x2642,  // MALE_SIGN,
540         0x2695,  // STAFF_OF_AESCULAPIUS
541 };
542 
isStickyAllowlisted(uint32_t c)543 static bool isStickyAllowlisted(uint32_t c) {
544     for (size_t i = 0; i < sizeof(stickyAllowlist) / sizeof(stickyAllowlist[0]); i++) {
545         if (stickyAllowlist[i] == c) return true;
546     }
547     return false;
548 }
549 
isCombining(uint32_t c)550 static inline bool isCombining(uint32_t c) {
551     return (U_GET_GC_MASK(c) & U_GC_M_MASK) != 0;
552 }
553 
hasVariationSelector(uint32_t baseCodepoint,uint32_t variationSelector) const554 bool FontCollection::hasVariationSelector(uint32_t baseCodepoint,
555                                           uint32_t variationSelector) const {
556     if (!isVariationSelector(variationSelector)) {
557         return false;
558     }
559     if (baseCodepoint >= mMaxChar) {
560         return false;
561     }
562 
563     // Currently mRanges can not be used here since it isn't aware of the variation sequence.
564     for (size_t i = 0; i < mVSFamilyVec.size(); i++) {
565         if (mVSFamilyVec[i]->hasGlyph(baseCodepoint, variationSelector)) {
566             return true;
567         }
568     }
569 
570     // Even if there is no cmap format 14 subtable entry for the given sequence, should return true
571     // for <char, text presentation selector> case since we have special fallback rule for the
572     // sequence. Note that we don't need to restrict this to already standardized variation
573     // sequences, since Unicode is adding variation sequences more frequently now and may even move
574     // towards allowing text and emoji variation selectors on any character.
575     if (variationSelector == TEXT_STYLE_VS) {
576         for (size_t i = 0; i < getFamilyCount(); ++i) {
577             const std::shared_ptr<FontFamily>& family = getFamilyAt(i);
578             if (!family->isColorEmojiFamily() && family->hasGlyph(baseCodepoint, 0)) {
579                 return true;
580             }
581         }
582     }
583 
584     return false;
585 }
586 
587 constexpr uint32_t REPLACEMENT_CHARACTER = 0xFFFD;
588 
intersect(FontCollection::FamilyMatchResult l,FontCollection::FamilyMatchResult r)589 FontCollection::FamilyMatchResult FontCollection::FamilyMatchResult::intersect(
590         FontCollection::FamilyMatchResult l, FontCollection::FamilyMatchResult r) {
591     if (l == r) {
592         return l;
593     }
594 
595     uint32_t li = 0;
596     uint32_t ri = 0;
597     FamilyMatchResult::Builder b;
598     while (li < l.size() && ri < r.size()) {
599         if (l[li] < r[ri]) {
600             li++;
601         } else if (l[li] > r[ri]) {
602             ri++;
603         } else {  // l[li] == r[ri]
604             b.add(l[li]);
605             li++;
606             ri++;
607         }
608     }
609     return b.build();
610 }
611 
filterFamilyByLocale(const LocaleList & localeList,const std::function<void (const FontFamily & family)> & callback) const612 void FontCollection::filterFamilyByLocale(
613         const LocaleList& localeList,
614         const std::function<void(const FontFamily& family)>& callback) const {
615     if (localeList.empty()) {
616         return;
617     }
618     // Only use the first family for the default line height.
619     const Locale& locale = localeList[0];
620     for (uint8_t i = 0; i < mFamilyCount; ++i) {
621         const auto& family = getFamilyAt(i);
622 
623         uint32_t fontLocaleId = family->localeListId();
624         if (fontLocaleId == LocaleListCache::kInvalidListId) {
625             continue;
626         }
627         const LocaleList& fontLocaleList = LocaleListCache::getById(fontLocaleId);
628         for (uint32_t i = 0; i < fontLocaleList.size(); ++i) {
629             if (fontLocaleList[i].isEqualScript(locale)) {
630                 callback(*family.get());
631                 break;
632             }
633         }
634     }
635 }
636 
getReferenceExtentForLocale(const MinikinPaint & paint) const637 MinikinExtent FontCollection::getReferenceExtentForLocale(const MinikinPaint& paint) const {
638     uint32_t localeId = paint.localeListId;
639     LocaleExtentKey key = {localeId, paint.size};
640 
641     std::lock_guard<std::mutex> lock(mMutex);
642     auto e = mExtentCacheForLocale.get(key);
643 
644     if (e.ascent != 0 || e.descent != 0) {
645         return e;
646     }
647 
648     MinikinExtent result(0, 0);
649     for (uint8_t i = 0; i < mFamilyCount; ++i) {
650         const auto& family = getFamilyAt(i);
651         if (!family->isCustomFallback()) {
652             break;
653         }
654 
655         // Use this family
656         MinikinExtent extent(0, 0);
657         FakedFont font = getFamilyAt(i)->getClosestMatch(paint.fontStyle);
658         font.typeface()->GetFontExtent(&extent, paint, font.fakery);
659         result.extendBy(extent);
660     }
661 
662     if (localeId == LocaleListCache::kInvalidListId) {
663         mExtentCacheForLocale.put(key, result);
664         return result;
665     }
666 
667     // If default is requested, use compact one.
668     const FamilyVariant requestVariant = paint.familyVariant == FamilyVariant::DEFAULT
669                                                  ? FamilyVariant::COMPACT
670                                                  : paint.familyVariant;
671     const LocaleList& requestedLocaleList = LocaleListCache::getById(localeId);
672 
673     bool familyFound = false;
674     filterFamilyByLocale(requestedLocaleList, [&](const FontFamily& family) {
675         const FamilyVariant familyVariant = family.variant() == FamilyVariant::DEFAULT
676                                                     ? FamilyVariant::COMPACT
677                                                     : family.variant();
678 
679         if (familyVariant != requestVariant) {
680             return;
681         }
682 
683         MinikinExtent extent(0, 0);
684         FakedFont font = family.getClosestMatch(paint.fontStyle);
685         font.typeface()->GetFontExtent(&extent, paint, font.fakery);
686         result.extendBy(extent);
687 
688         familyFound = true;
689     });
690 
691     // If nothing matches, try non-variant match cases since it is used for fallback.
692     filterFamilyByLocale(requestedLocaleList, [&](const FontFamily& family) {
693         // Use this family
694         MinikinExtent extent(0, 0);
695         FakedFont font = family.getClosestMatch(paint.fontStyle);
696         font.typeface()->GetFontExtent(&extent, paint, font.fakery);
697         result.extendBy(extent);
698 
699         familyFound = true;
700     });
701 
702     // If nothing matches, use default font.
703     if (!familyFound) {
704         FakedFont font = getFamilyAt(0)->getClosestMatch(paint.fontStyle);
705         font.typeface()->GetFontExtent(&result, paint, font.fakery);
706     }
707 
708     mExtentCacheForLocale.put(key, result);
709     return result;
710 }
711 
itemize(U16StringPiece text,FontStyle,uint32_t localeListId,FamilyVariant familyVariant,uint32_t runMax) const712 std::vector<FontCollection::Run> FontCollection::itemize(U16StringPiece text, FontStyle,
713                                                          uint32_t localeListId,
714                                                          FamilyVariant familyVariant,
715                                                          uint32_t runMax) const {
716     const uint16_t* string = text.data();
717     const uint32_t string_size = text.size();
718 
719     FamilyMatchResult lastFamilyIndices = FamilyMatchResult();
720 
721     if (string_size == 0) {
722         return std::vector<Run>();
723     }
724 
725     const uint32_t kEndOfString = 0xFFFFFFFF;
726     std::vector<Run> result;
727     Run* run = nullptr;
728 
729     uint32_t nextCh = 0;
730     uint32_t prevCh = 0;
731     size_t nextUtf16Pos = 0;
732     size_t readLength = 0;
733     U16_NEXT(string, readLength, string_size, nextCh);
734     if (U_IS_SURROGATE(nextCh)) {
735         nextCh = REPLACEMENT_CHARACTER;
736     }
737 
738     do {
739         const uint32_t ch = nextCh;
740         const size_t utf16Pos = nextUtf16Pos;
741         nextUtf16Pos = readLength;
742         if (readLength < string_size) {
743             U16_NEXT(string, readLength, string_size, nextCh);
744             if (U_IS_SURROGATE(nextCh)) {
745                 nextCh = REPLACEMENT_CHARACTER;
746             }
747         } else {
748             nextCh = kEndOfString;
749         }
750 
751         bool shouldContinueRun = false;
752         if (doesNotNeedFontSupport(ch)) {
753             // Always continue if the character is a format character not needed to be in the font.
754             shouldContinueRun = true;
755         } else if (!lastFamilyIndices.empty() && (isStickyAllowlisted(ch) || isCombining(ch))) {
756             // Continue using existing font as long as it has coverage and is whitelisted.
757 
758             const std::shared_ptr<FontFamily>& lastFamily = getFamilyAt(lastFamilyIndices[0]);
759             if (lastFamily->isColorEmojiFamily()) {
760                 // If the last family is color emoji font, find the longest family.
761                 shouldContinueRun = false;
762                 for (uint8_t ix : lastFamilyIndices) {
763                     shouldContinueRun |= getFamilyAt(ix)->getCoverage().get(ch);
764                 }
765             } else {
766                 shouldContinueRun = lastFamily->getCoverage().get(ch);
767             }
768         }
769 
770         if (!shouldContinueRun) {
771             FamilyMatchResult familyIndices = getFamilyForChar(
772                     ch, isVariationSelector(nextCh) ? nextCh : 0, localeListId, familyVariant);
773             bool breakRun;
774             if (utf16Pos == 0 || lastFamilyIndices.empty()) {
775                 breakRun = true;
776             } else {
777                 const std::shared_ptr<FontFamily>& lastFamily = getFamilyAt(lastFamilyIndices[0]);
778                 if (lastFamily->isColorEmojiFamily()) {
779                     FamilyMatchResult intersection =
780                             FamilyMatchResult::intersect(familyIndices, lastFamilyIndices);
781                     if (intersection.empty()) {
782                         breakRun = true;  // None of last family can draw the given char.
783                     } else {
784                         breakRun = isEmojiBreak(prevCh, ch);
785                         if (!breakRun) {
786                             // To select sequence supported families, update family indices with the
787                             // intersection between the supported families between prev char and
788                             // current char.
789                             familyIndices = intersection;
790                             lastFamilyIndices = intersection;
791                             run->familyMatch = intersection;
792                         }
793                     }
794                 } else {
795                     breakRun = familyIndices[0] != lastFamilyIndices[0];
796                 }
797             }
798 
799             if (breakRun) {
800                 size_t start = utf16Pos;
801                 // Workaround for combining marks and emoji modifiers until we implement
802                 // per-cluster font selection: if a combining mark or an emoji modifier is found in
803                 // a different font that also supports the previous character, attach previous
804                 // character to the new run. U+20E3 COMBINING ENCLOSING KEYCAP, used in emoji, is
805                 // handled properly by this since it's a combining mark too.
806                 if (utf16Pos != 0 &&
807                     (isCombining(ch) || (isEmojiModifier(ch) && isEmojiBase(prevCh)))) {
808                     for (uint8_t ix : familyIndices) {
809                         if (getFamilyAt(ix)->getCoverage().get(prevCh)) {
810                             const size_t prevChLength = U16_LENGTH(prevCh);
811                             if (run != nullptr) {
812                                 run->end -= prevChLength;
813                                 if (run->start == run->end) {
814                                     result.pop_back();
815                                 }
816                             }
817                             start -= prevChLength;
818                             break;
819                         }
820                     }
821                 }
822                 if (lastFamilyIndices.empty()) {
823                     // This is the first family ever assigned. We are either seeing the very first
824                     // character (which means start would already be zero), or we have only seen
825                     // characters that don't need any font support (which means we need to adjust
826                     // start to be 0 to include those characters).
827                     start = 0;
828                 }
829                 result.push_back({familyIndices, static_cast<int>(start), 0});
830                 run = &result.back();
831                 lastFamilyIndices = run->familyMatch;
832             }
833         }
834         prevCh = ch;
835         if (run != nullptr) {
836             run->end = nextUtf16Pos;  // exclusive
837         }
838 
839         // Stop searching the remaining characters if the result length gets runMax + 2.
840         // When result.size gets runMax + 2 here, the run between [0, runMax) was finalized.
841         // If the result.size() equals to runMax, the run may be still expanding.
842         // if the result.size() equals to runMax + 2, the last run may be removed and the last run
843         // may be exntended the previous run with above workaround.
844         if (result.size() >= 2 && runMax == result.size() - 2) {
845             break;
846         }
847     } while (nextCh != kEndOfString);
848 
849     if (lastFamilyIndices.empty()) {
850         // No character needed any font support, so it doesn't really matter which font they end up
851         // getting displayed in. We put the whole string in one run, using the first font.
852         result.push_back(
853                 {FamilyMatchResult::Builder().add(0).build(), 0, static_cast<int>(string_size)});
854     }
855 
856     if (result.size() > runMax) {
857         // The itemization has terminated since it reaches the runMax. Remove last unfinalized runs.
858         return std::vector<Run>(result.begin(), result.begin() + runMax);
859     }
860 
861     return result;
862 }
863 
getBestFont(U16StringPiece text,const Run & run,FontStyle style)864 FakedFont FontCollection::getBestFont(U16StringPiece text, const Run& run, FontStyle style) {
865     uint8_t bestIndex = 0;
866     uint32_t bestScore = 0xFFFFFFFF;
867 
868     const std::shared_ptr<FontFamily>& family = getFamilyAt(run.familyMatch[0]);
869     if (family->isColorEmojiFamily() && run.familyMatch.size() > 1) {
870         for (size_t i = 0; i < run.familyMatch.size(); ++i) {
871             const std::shared_ptr<FontFamily>& family = getFamilyAt(run.familyMatch[i]);
872             const HbFontUniquePtr& font = family->getFont(0)->baseFont();
873             uint32_t score = getGlyphScore(text, run.start, run.end, font);
874 
875             if (score < bestScore) {
876                 bestIndex = run.familyMatch[i];
877                 bestScore = score;
878             }
879         }
880     } else {
881         bestIndex = run.familyMatch[0];
882     }
883     return getFamilyAt(bestIndex)->getClosestMatch(style);
884 }
885 
baseFontFaked(FontStyle style)886 FakedFont FontCollection::baseFontFaked(FontStyle style) {
887     return getFamilyAt(0)->getClosestMatch(style);
888 }
889 
createCollectionWithVariation(const std::vector<FontVariation> & variations)890 std::shared_ptr<FontCollection> FontCollection::createCollectionWithVariation(
891         const std::vector<FontVariation>& variations) {
892     if (variations.empty() || mSupportedAxesCount == 0) {
893         return nullptr;
894     }
895 
896     bool hasSupportedAxis = false;
897     for (const FontVariation& variation : variations) {
898         if (std::binary_search(mSupportedAxes.get(), mSupportedAxes.get() + mSupportedAxesCount,
899                                variation.axisTag)) {
900             hasSupportedAxis = true;
901             break;
902         }
903     }
904     if (!hasSupportedAxis) {
905         // None of variation axes are supported by this font collection.
906         return nullptr;
907     }
908 
909     std::vector<std::shared_ptr<FontFamily>> families;
910     for (size_t i = 0; i < getFamilyCount(); ++i) {
911         const std::shared_ptr<FontFamily>& family = getFamilyAt(i);
912         std::shared_ptr<FontFamily> newFamily =
913                 features::lazy_variation_instance() ? FontFamily::create(family, variations)
914                                                     : family->createFamilyWithVariation(variations);
915         if (newFamily) {
916             families.push_back(newFamily);
917         } else {
918             families.push_back(family);
919         }
920     }
921 
922     return std::shared_ptr<FontCollection>(new FontCollection(families));
923 }
924 
createCollectionWithFamilies(std::vector<std::shared_ptr<FontFamily>> && families) const925 std::shared_ptr<FontCollection> FontCollection::createCollectionWithFamilies(
926         std::vector<std::shared_ptr<FontFamily>>&& families) const {
927     families.reserve(families.size() + getFamilyCount());
928     for (size_t i = 0; i < getFamilyCount(); i++) {
929         families.push_back(getFamilyAt(i));
930     }
931     return FontCollection::create(families);
932 }
933 
getId() const934 uint32_t FontCollection::getId() const {
935     return mId;
936 }
937 
938 }  // namespace minikin
939