1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "Minikin"
18 
19 #include "minikin/FontCollection.h"
20 
21 #include <algorithm>
22 
23 #include <log/log.h>
24 #include <unicode/unistr.h>
25 #include <unicode/unorm2.h>
26 
27 #include "minikin/Emoji.h"
28 
29 #include "Locale.h"
30 #include "LocaleListCache.h"
31 #include "MinikinInternal.h"
32 
33 using std::vector;
34 
35 namespace minikin {
36 
37 template <typename T>
max(T a,T b)38 static inline T max(T a, T b) {
39     return a > b ? a : b;
40 }
41 
42 const uint32_t EMOJI_STYLE_VS = 0xFE0F;
43 const uint32_t TEXT_STYLE_VS = 0xFE0E;
44 
45 static std::atomic<uint32_t> gNextCollectionId = {0};
46 
FontCollection(std::shared_ptr<FontFamily> && typeface)47 FontCollection::FontCollection(std::shared_ptr<FontFamily>&& typeface) : mMaxChar(0) {
48     std::vector<std::shared_ptr<FontFamily>> typefaces;
49     typefaces.push_back(typeface);
50     init(typefaces);
51 }
52 
FontCollection(const vector<std::shared_ptr<FontFamily>> & typefaces)53 FontCollection::FontCollection(const vector<std::shared_ptr<FontFamily>>& typefaces) : mMaxChar(0) {
54     init(typefaces);
55 }
56 
init(const vector<std::shared_ptr<FontFamily>> & typefaces)57 void FontCollection::init(const vector<std::shared_ptr<FontFamily>>& typefaces) {
58     mId = gNextCollectionId++;
59     vector<uint32_t> lastChar;
60     size_t nTypefaces = typefaces.size();
61     const FontStyle defaultStyle;
62     for (size_t i = 0; i < nTypefaces; i++) {
63         const std::shared_ptr<FontFamily>& family = typefaces[i];
64         if (family->getClosestMatch(defaultStyle).font == nullptr) {
65             continue;
66         }
67         const SparseBitSet& coverage = family->getCoverage();
68         mFamilies.push_back(family);  // emplace_back would be better
69         if (family->hasVSTable()) {
70             mVSFamilyVec.push_back(family);
71         }
72         mMaxChar = max(mMaxChar, coverage.length());
73         lastChar.push_back(coverage.nextSetBit(0));
74 
75         const std::unordered_set<AxisTag>& supportedAxes = family->supportedAxes();
76         mSupportedAxes.insert(supportedAxes.begin(), supportedAxes.end());
77     }
78     nTypefaces = mFamilies.size();
79     MINIKIN_ASSERT(nTypefaces > 0, "Font collection must have at least one valid typeface");
80     MINIKIN_ASSERT(nTypefaces <= MAX_FAMILY_COUNT,
81                    "Font collection may only have up to %d font families.", MAX_FAMILY_COUNT);
82     size_t nPages = (mMaxChar + kPageMask) >> kLogCharsPerPage;
83     // TODO: Use variation selector map for mRanges construction.
84     // A font can have a glyph for a base code point and variation selector pair but no glyph for
85     // the base code point without variation selector. The family won't be listed in the range in
86     // this case.
87     for (size_t i = 0; i < nPages; i++) {
88         Range dummy;
89         mRanges.push_back(dummy);
90         Range* range = &mRanges.back();
91         range->start = mFamilyVec.size();
92         for (size_t j = 0; j < nTypefaces; j++) {
93             if (lastChar[j] < (i + 1) << kLogCharsPerPage) {
94                 const std::shared_ptr<FontFamily>& family = mFamilies[j];
95                 mFamilyVec.push_back(static_cast<uint8_t>(j));
96                 uint32_t nextChar = family->getCoverage().nextSetBit((i + 1) << kLogCharsPerPage);
97                 lastChar[j] = nextChar;
98             }
99         }
100         range->end = mFamilyVec.size();
101     }
102     // See the comment in Range for more details.
103     LOG_ALWAYS_FATAL_IF(mFamilyVec.size() >= 0xFFFF,
104                         "Exceeded the maximum indexable cmap coverage.");
105 }
106 
107 // Special scores for the font fallback.
108 const uint32_t kUnsupportedFontScore = 0;
109 const uint32_t kFirstFontScore = UINT32_MAX;
110 
111 // Calculates a font score.
112 // The score of the font family is based on three subscores.
113 //  - Coverage Score: How well the font family covers the given character or variation sequence.
114 //  - Locale Score: How well the font family is appropriate for the locale.
115 //  - Variant Score: Whether the font family matches the variant. Note that this variant is not the
116 //    one in BCP47. This is our own font variant (e.g., elegant, compact).
117 //
118 // Then, there is a priority for these three subscores as follow:
119 //   Coverage Score > Locale Score > Variant Score
120 // The returned score reflects this priority order.
121 //
122 // Note that there are two special scores.
123 //  - kUnsupportedFontScore: When the font family doesn't support the variation sequence or even its
124 //    base character.
125 //  - kFirstFontScore: When the font is the first font family in the collection and it supports the
126 //    given character or variation sequence.
calcFamilyScore(uint32_t ch,uint32_t vs,FontFamily::Variant variant,uint32_t localeListId,const std::shared_ptr<FontFamily> & fontFamily) const127 uint32_t FontCollection::calcFamilyScore(uint32_t ch, uint32_t vs, FontFamily::Variant variant,
128                                          uint32_t localeListId,
129                                          const std::shared_ptr<FontFamily>& fontFamily) const {
130     const uint32_t coverageScore = calcCoverageScore(ch, vs, localeListId, fontFamily);
131     if (coverageScore == kFirstFontScore || coverageScore == kUnsupportedFontScore) {
132         // No need to calculate other scores.
133         return coverageScore;
134     }
135 
136     const uint32_t localeScore = calcLocaleMatchingScore(localeListId, *fontFamily);
137     const uint32_t variantScore = calcVariantMatchingScore(variant, *fontFamily);
138 
139     // Subscores are encoded into 31 bits representation to meet the subscore priority.
140     // The highest 2 bits are for coverage score, then following 28 bits are for locale score,
141     // then the last 1 bit is for variant score.
142     return coverageScore << 29 | localeScore << 1 | variantScore;
143 }
144 
145 // Calculates a font score based on variation sequence coverage.
146 // - Returns kUnsupportedFontScore if the font doesn't support the variation sequence or its base
147 //   character.
148 // - Returns kFirstFontScore if the font family is the first font family in the collection and it
149 //   supports the given character or variation sequence.
150 // - Returns 3 if the font family supports the variation sequence.
151 // - Returns 2 if the vs is a color variation selector (U+FE0F) and if the font is an emoji font.
152 // - Returns 2 if the vs is a text variation selector (U+FE0E) and if the font is not an emoji font.
153 // - Returns 1 if the variation selector is not specified or if the font family only supports the
154 //   variation sequence's base character.
calcCoverageScore(uint32_t ch,uint32_t vs,uint32_t localeListId,const std::shared_ptr<FontFamily> & fontFamily) const155 uint32_t FontCollection::calcCoverageScore(uint32_t ch, uint32_t vs, uint32_t localeListId,
156                                            const std::shared_ptr<FontFamily>& fontFamily) const {
157     const bool hasVSGlyph = (vs != 0) && fontFamily->hasGlyph(ch, vs);
158     if (!hasVSGlyph && !fontFamily->getCoverage().get(ch)) {
159         // The font doesn't support either variation sequence or even the base character.
160         return kUnsupportedFontScore;
161     }
162 
163     if ((vs == 0 || hasVSGlyph) && mFamilies[0] == fontFamily) {
164         // If the first font family supports the given character or variation sequence, always use
165         // it.
166         return kFirstFontScore;
167     }
168 
169     if (vs != 0 && hasVSGlyph) {
170         return 3;
171     }
172 
173     bool colorEmojiRequest;
174     if (vs == EMOJI_STYLE_VS) {
175         colorEmojiRequest = true;
176     } else if (vs == TEXT_STYLE_VS) {
177         colorEmojiRequest = false;
178     } else {
179         switch (LocaleListCache::getById(localeListId).getEmojiStyle()) {
180             case EmojiStyle::EMOJI:
181                 colorEmojiRequest = true;
182                 break;
183             case EmojiStyle::TEXT:
184                 colorEmojiRequest = false;
185                 break;
186             case EmojiStyle::EMPTY:
187             case EmojiStyle::DEFAULT:
188             default:
189                 // Do not give any extra score for the default emoji style.
190                 return 1;
191                 break;
192         }
193     }
194 
195     return colorEmojiRequest == fontFamily->isColorEmojiFamily() ? 2 : 1;
196 }
197 
198 // Calculate font scores based on the script matching, subtag matching and primary locale matching.
199 //
200 // 1. If only the font's language matches or there is no matches between requested font and
201 //    supported font, then the font obtains a score of 0.
202 // 2. Without a match in language, considering subtag may change font's EmojiStyle over script,
203 //    a match in subtag gets a score of 2 and a match in scripts gains a score of 1.
204 // 3. Regarding to two elements matchings, language-and-subtag matching has a score of 4, while
205 //    language-and-script obtains a socre of 3 with the same reason above.
206 //
207 // If two locales in the requested list have the same locale score, the font matching with higher
208 // priority locale gets a higher score. For example, in the case the user requested locale list is
209 // "ja-Jpan,en-Latn". The score of for the font of "ja-Jpan" gets a higher score than the font of
210 // "en-Latn".
211 //
212 // To achieve score calculation with priorities, the locale score is determined as follows:
213 //   LocaleScore = s(0) * 5^(m - 1) + s(1) * 5^(m - 2) + ... + s(m - 2) * 5 + s(m - 1)
214 // Here, m is the maximum number of locales to be compared, and s(i) is the i-th locale's matching
215 // score. The possible values of s(i) are 0, 1, 2, 3 and 4.
calcLocaleMatchingScore(uint32_t userLocaleListId,const FontFamily & fontFamily)216 uint32_t FontCollection::calcLocaleMatchingScore(uint32_t userLocaleListId,
217                                                  const FontFamily& fontFamily) {
218     const LocaleList& localeList = LocaleListCache::getById(userLocaleListId);
219     const LocaleList& fontLocaleList = LocaleListCache::getById(fontFamily.localeListId());
220 
221     const size_t maxCompareNum = std::min(localeList.size(), FONT_LOCALE_LIMIT);
222     uint32_t score = 0;
223     for (size_t i = 0; i < maxCompareNum; ++i) {
224         score = score * 5u + localeList[i].calcScoreFor(fontLocaleList);
225     }
226     return score;
227 }
228 
229 // Calculates a font score based on variant ("compact" or "elegant") matching.
230 //  - Returns 1 if the font doesn't have variant or the variant matches with the text style.
231 //  - No score if the font has a variant but it doesn't match with the text style.
calcVariantMatchingScore(FontFamily::Variant variant,const FontFamily & fontFamily)232 uint32_t FontCollection::calcVariantMatchingScore(FontFamily::Variant variant,
233                                                   const FontFamily& fontFamily) {
234     const FontFamily::Variant familyVariant = fontFamily.variant();
235     if (familyVariant == FontFamily::Variant::DEFAULT) {
236         return 1;
237     }
238     if (familyVariant == variant) {
239         return 1;
240     }
241     if (variant == FontFamily::Variant::DEFAULT && familyVariant == FontFamily::Variant::COMPACT) {
242         // If default is requested, prefer compat variation.
243         return 1;
244     }
245     return 0;
246 }
247 
248 // Implement heuristic for choosing best-match font. Here are the rules:
249 // 1. If first font in the collection has the character, it wins.
250 // 2. Calculate a score for the font family. See comments in calcFamilyScore for the detail.
251 // 3. Highest score wins, with ties resolved to the first font.
252 // This method never returns nullptr.
getFamilyForChar(uint32_t ch,uint32_t vs,uint32_t localeListId,FontFamily::Variant variant) const253 const std::shared_ptr<FontFamily>& FontCollection::getFamilyForChar(
254         uint32_t ch, uint32_t vs, uint32_t localeListId, FontFamily::Variant variant) const {
255     if (ch >= mMaxChar) {
256         return mFamilies[0];
257     }
258 
259     Range range = mRanges[ch >> kLogCharsPerPage];
260 
261     if (vs != 0) {
262         range = {0, static_cast<uint16_t>(mFamilies.size())};
263     }
264 
265     int bestFamilyIndex = -1;
266     uint32_t bestScore = kUnsupportedFontScore;
267     for (size_t i = range.start; i < range.end; i++) {
268         const std::shared_ptr<FontFamily>& family =
269                 vs == 0 ? mFamilies[mFamilyVec[i]] : mFamilies[i];
270         const uint32_t score = calcFamilyScore(ch, vs, variant, localeListId, family);
271         if (score == kFirstFontScore) {
272             // If the first font family supports the given character or variation sequence, always
273             // use it.
274             return family;
275         }
276         if (score > bestScore) {
277             bestScore = score;
278             bestFamilyIndex = i;
279         }
280     }
281     if (bestFamilyIndex == -1) {
282         UErrorCode errorCode = U_ZERO_ERROR;
283         const UNormalizer2* normalizer = unorm2_getNFDInstance(&errorCode);
284         if (U_SUCCESS(errorCode)) {
285             UChar decomposed[4];
286             int len = unorm2_getRawDecomposition(normalizer, ch, decomposed, 4, &errorCode);
287             if (U_SUCCESS(errorCode) && len > 0) {
288                 int off = 0;
289                 U16_NEXT_UNSAFE(decomposed, off, ch);
290                 return getFamilyForChar(ch, vs, localeListId, variant);
291             }
292         }
293         return mFamilies[0];
294     }
295     return vs == 0 ? mFamilies[mFamilyVec[bestFamilyIndex]] : mFamilies[bestFamilyIndex];
296 }
297 
298 // Characters where we want to continue using existing font run for (or stick to the next run if
299 // they start a string), even if the font does not support them explicitly. These are handled
300 // properly by Minikin or HarfBuzz even if the font does not explicitly support them and it's
301 // usually meaningless to switch to a different font to display them.
doesNotNeedFontSupport(uint32_t c)302 static bool doesNotNeedFontSupport(uint32_t c) {
303     return c == 0x00AD                      // SOFT HYPHEN
304            || c == 0x034F                   // COMBINING GRAPHEME JOINER
305            || c == 0x061C                   // ARABIC LETTER MARK
306            || (0x200C <= c && c <= 0x200F)  // ZERO WIDTH NON-JOINER..RIGHT-TO-LEFT MARK
307            || (0x202A <= c && c <= 0x202E)  // LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
308            || (0x2066 <= c && c <= 0x2069)  // LEFT-TO-RIGHT ISOLATE..POP DIRECTIONAL ISOLATE
309            || c == 0xFEFF                   // BYTE ORDER MARK
310            || isVariationSelector(c);
311 }
312 
313 // Characters where we want to continue using existing font run instead of
314 // recomputing the best match in the fallback list.
315 static const uint32_t stickyWhitelist[] = {
316         '!',    ',', '-', '.', ':', ';', '?',
317         0x00A0,  // NBSP
318         0x2010,  // HYPHEN
319         0x2011,  // NB_HYPHEN
320         0x202F,  // NNBSP
321         0x2640,  // FEMALE_SIGN,
322         0x2642,  // MALE_SIGN,
323         0x2695,  // STAFF_OF_AESCULAPIUS
324 };
325 
isStickyWhitelisted(uint32_t c)326 static bool isStickyWhitelisted(uint32_t c) {
327     for (size_t i = 0; i < sizeof(stickyWhitelist) / sizeof(stickyWhitelist[0]); i++) {
328         if (stickyWhitelist[i] == c) return true;
329     }
330     return false;
331 }
332 
isCombining(uint32_t c)333 static inline bool isCombining(uint32_t c) {
334     return (U_GET_GC_MASK(c) & U_GC_M_MASK) != 0;
335 }
336 
hasVariationSelector(uint32_t baseCodepoint,uint32_t variationSelector) const337 bool FontCollection::hasVariationSelector(uint32_t baseCodepoint,
338                                           uint32_t variationSelector) const {
339     if (!isVariationSelector(variationSelector)) {
340         return false;
341     }
342     if (baseCodepoint >= mMaxChar) {
343         return false;
344     }
345 
346     // Currently mRanges can not be used here since it isn't aware of the variation sequence.
347     for (size_t i = 0; i < mVSFamilyVec.size(); i++) {
348         if (mVSFamilyVec[i]->hasGlyph(baseCodepoint, variationSelector)) {
349             return true;
350         }
351     }
352 
353     // Even if there is no cmap format 14 subtable entry for the given sequence, should return true
354     // for <char, text presentation selector> case since we have special fallback rule for the
355     // sequence. Note that we don't need to restrict this to already standardized variation
356     // sequences, since Unicode is adding variation sequences more frequently now and may even move
357     // towards allowing text and emoji variation selectors on any character.
358     if (variationSelector == TEXT_STYLE_VS) {
359         for (size_t i = 0; i < mFamilies.size(); ++i) {
360             if (!mFamilies[i]->isColorEmojiFamily() && mFamilies[i]->hasGlyph(baseCodepoint, 0)) {
361                 return true;
362             }
363         }
364     }
365 
366     return false;
367 }
368 
369 constexpr uint32_t REPLACEMENT_CHARACTER = 0xFFFD;
370 
itemize(const uint16_t * string,size_t string_size,const MinikinPaint & paint,vector<Run> * result) const371 void FontCollection::itemize(const uint16_t* string, size_t string_size, const MinikinPaint& paint,
372                              vector<Run>* result) const {
373     const FontFamily::Variant familyVariant = paint.familyVariant;
374     const FontStyle style = paint.fontStyle;
375     const uint32_t localeListId = paint.localeListId;
376 
377     const FontFamily* lastFamily = nullptr;
378     Run* run = nullptr;
379 
380     if (string_size == 0) {
381         return;
382     }
383 
384     const uint32_t kEndOfString = 0xFFFFFFFF;
385 
386     uint32_t nextCh = 0;
387     uint32_t prevCh = 0;
388     size_t nextUtf16Pos = 0;
389     size_t readLength = 0;
390     U16_NEXT(string, readLength, string_size, nextCh);
391     if (U_IS_SURROGATE(nextCh)) {
392         nextCh = REPLACEMENT_CHARACTER;
393     }
394 
395     do {
396         const uint32_t ch = nextCh;
397         const size_t utf16Pos = nextUtf16Pos;
398         nextUtf16Pos = readLength;
399         if (readLength < string_size) {
400             U16_NEXT(string, readLength, string_size, nextCh);
401             if (U_IS_SURROGATE(nextCh)) {
402                 nextCh = REPLACEMENT_CHARACTER;
403             }
404         } else {
405             nextCh = kEndOfString;
406         }
407 
408         bool shouldContinueRun = false;
409         if (doesNotNeedFontSupport(ch)) {
410             // Always continue if the character is a format character not needed to be in the font.
411             shouldContinueRun = true;
412         } else if (lastFamily != nullptr && (isStickyWhitelisted(ch) || isCombining(ch))) {
413             // Continue using existing font as long as it has coverage and is whitelisted.
414             shouldContinueRun = lastFamily->getCoverage().get(ch);
415         }
416 
417         if (!shouldContinueRun) {
418             const std::shared_ptr<FontFamily>& family = getFamilyForChar(
419                     ch, isVariationSelector(nextCh) ? nextCh : 0, localeListId, familyVariant);
420             if (utf16Pos == 0 || family.get() != lastFamily) {
421                 size_t start = utf16Pos;
422                 // Workaround for combining marks and emoji modifiers until we implement
423                 // per-cluster font selection: if a combining mark or an emoji modifier is found in
424                 // a different font that also supports the previous character, attach previous
425                 // character to the new run. U+20E3 COMBINING ENCLOSING KEYCAP, used in emoji, is
426                 // handled properly by this since it's a combining mark too.
427                 if (utf16Pos != 0 &&
428                     (isCombining(ch) || (isEmojiModifier(ch) && isEmojiBase(prevCh))) &&
429                     family != nullptr && family->getCoverage().get(prevCh)) {
430                     const size_t prevChLength = U16_LENGTH(prevCh);
431                     if (run != nullptr) {
432                         run->end -= prevChLength;
433                         if (run->start == run->end) {
434                             result->pop_back();
435                         }
436                     }
437                     start -= prevChLength;
438                 }
439                 if (lastFamily == nullptr) {
440                     // This is the first family ever assigned. We are either seeing the very first
441                     // character (which means start would already be zero), or we have only seen
442                     // characters that don't need any font support (which means we need to adjust
443                     // start to be 0 to include those characters).
444                     start = 0;
445                 }
446                 result->push_back({family->getClosestMatch(style), static_cast<int>(start), 0});
447                 run = &result->back();
448                 lastFamily = family.get();
449             }
450         }
451         prevCh = ch;
452         if (run != nullptr) {
453             run->end = nextUtf16Pos;  // exclusive
454         }
455     } while (nextCh != kEndOfString);
456 
457     if (lastFamily == nullptr) {
458         // No character needed any font support, so it doesn't really matter which font they end up
459         // getting displayed in. We put the whole string in one run, using the first font.
460         result->push_back({mFamilies[0]->getClosestMatch(style), 0, static_cast<int>(string_size)});
461     }
462 }
463 
baseFontFaked(FontStyle style)464 FakedFont FontCollection::baseFontFaked(FontStyle style) {
465     return mFamilies[0]->getClosestMatch(style);
466 }
467 
createCollectionWithVariation(const std::vector<FontVariation> & variations)468 std::shared_ptr<FontCollection> FontCollection::createCollectionWithVariation(
469         const std::vector<FontVariation>& variations) {
470     if (variations.empty() || mSupportedAxes.empty()) {
471         return nullptr;
472     }
473 
474     bool hasSupportedAxis = false;
475     for (const FontVariation& variation : variations) {
476         if (mSupportedAxes.find(variation.axisTag) != mSupportedAxes.end()) {
477             hasSupportedAxis = true;
478             break;
479         }
480     }
481     if (!hasSupportedAxis) {
482         // None of variation axes are supported by this font collection.
483         return nullptr;
484     }
485 
486     std::vector<std::shared_ptr<FontFamily>> families;
487     for (const std::shared_ptr<FontFamily>& family : mFamilies) {
488         std::shared_ptr<FontFamily> newFamily = family->createFamilyWithVariation(variations);
489         if (newFamily) {
490             families.push_back(newFamily);
491         } else {
492             families.push_back(family);
493         }
494     }
495 
496     return std::shared_ptr<FontCollection>(new FontCollection(families));
497 }
498 
getId() const499 uint32_t FontCollection::getId() const {
500     return mId;
501 }
502 
503 }  // namespace minikin
504