1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 // Determine coverage of font given its raw "cmap" OpenType table
18 
19 #include "minikin/CmapCoverage.h"
20 
21 #include <algorithm>
22 #include <vector>
23 
24 #include "minikin/SparseBitSet.h"
25 
26 #include "MinikinInternal.h"
27 
28 namespace minikin {
29 
30 // These could perhaps be optimized to use __builtin_bswap16 and friends.
readU16(const uint8_t * data,size_t offset)31 static uint32_t readU16(const uint8_t* data, size_t offset) {
32     return ((uint32_t)data[offset]) << 8 | ((uint32_t)data[offset + 1]);
33 }
34 
readU24(const uint8_t * data,size_t offset)35 static uint32_t readU24(const uint8_t* data, size_t offset) {
36     return ((uint32_t)data[offset]) << 16 | ((uint32_t)data[offset + 1]) << 8 |
37            ((uint32_t)data[offset + 2]);
38 }
39 
readU32(const uint8_t * data,size_t offset)40 static uint32_t readU32(const uint8_t* data, size_t offset) {
41     return ((uint32_t)data[offset]) << 24 | ((uint32_t)data[offset + 1]) << 16 |
42            ((uint32_t)data[offset + 2]) << 8 | ((uint32_t)data[offset + 3]);
43 }
44 
45 // The start must be larger than or equal to coverage.back() if coverage is not empty.
46 // Returns true if the range is appended. Otherwise returns false as an error.
addRange(std::vector<uint32_t> & coverage,uint32_t start,uint32_t end)47 static bool addRange(std::vector<uint32_t>& coverage, uint32_t start, uint32_t end) {
48     if (coverage.empty() || coverage.back() < start) {
49         coverage.push_back(start);
50         coverage.push_back(end);
51         return true;
52     } else if (coverage.back() == start) {
53         coverage.back() = end;
54         return true;
55     } else {
56         // Reject unordered range input since SparseBitSet assumes that the given range vector is
57         // sorted. OpenType specification says cmap entries are sorted in order of code point
58         // values, thus for OpenType compliant font files, we don't reach here.
59         android_errorWriteLog(0x534e4554, "32178311");
60         return false;
61     }
62 }
63 
64 // Returns true if the range is appended. Otherwise returns false as an error.
addRangeCmap4(std::vector<uint32_t> & coverage,uint32_t start,uint32_t end)65 static bool addRangeCmap4(std::vector<uint32_t>& coverage, uint32_t start, uint32_t end) {
66     if (!coverage.empty() && coverage.back() > end) {
67         // Reject unordered end code points.
68         return false;
69     }
70     if (coverage.empty() || coverage.back() < start) {
71         coverage.push_back(start);
72         coverage.push_back(end);
73         return true;
74     } else {
75         coverage.back() = end;
76         return true;
77     }
78 }
79 
80 // Returns Range from given ranges vector. Returns invalidRange if i is out of range.
getRange(const std::vector<uint32_t> & r,size_t i)81 static inline Range getRange(const std::vector<uint32_t>& r, size_t i) {
82     return i + 1 < r.size() ? Range({r[i], r[i + 1]}) : Range::invalidRange();
83 }
84 
85 // Merge two sorted lists of ranges into one sorted list.
mergeRanges(const std::vector<uint32_t> & lRanges,const std::vector<uint32_t> & rRanges)86 static std::vector<uint32_t> mergeRanges(const std::vector<uint32_t>& lRanges,
87                                          const std::vector<uint32_t>& rRanges) {
88     std::vector<uint32_t> out;
89 
90     const size_t lsize = lRanges.size();
91     const size_t rsize = rRanges.size();
92     out.reserve(lsize + rsize);
93     size_t ri = 0;
94     size_t li = 0;
95     while (li < lsize || ri < rsize) {
96         Range left = getRange(lRanges, li);
97         Range right = getRange(rRanges, ri);
98 
99         if (!right.isValid()) {
100             // No ranges left in rRanges. Just put all remaining ranges in lRanges.
101             do {
102                 Range r = getRange(lRanges, li);
103                 addRange(out, r.getStart(), r.getEnd());  // Input is sorted. Never returns false.
104                 li += 2;
105             } while (li < lsize);
106             break;
107         } else if (!left.isValid()) {
108             // No ranges left in lRanges. Just put all remaining ranges in rRanges.
109             do {
110                 Range r = getRange(rRanges, ri);
111                 addRange(out, r.getStart(), r.getEnd());  // Input is sorted. Never returns false.
112                 ri += 2;
113             } while (ri < rsize);
114             break;
115         } else if (!Range::intersects(left, right)) {
116             // No intersection. Add smaller range.
117             if (left.getStart() < right.getStart()) {
118                 // Input is sorted. Never returns false.
119                 addRange(out, left.getStart(), left.getEnd());
120                 li += 2;
121             } else {
122                 // Input is sorted. Never returns false.
123                 addRange(out, right.getStart(), right.getEnd());
124                 ri += 2;
125             }
126         } else {
127             Range merged = Range::merge(left, right);
128             li += 2;
129             ri += 2;
130             left = getRange(lRanges, li);
131             right = getRange(rRanges, ri);
132             while (Range::intersects(merged, left) || Range::intersects(merged, right)) {
133                 if (Range::intersects(merged, left)) {
134                     merged = Range::merge(merged, left);
135                     li += 2;
136                     left = getRange(lRanges, li);
137                 } else {
138                     merged = Range::merge(merged, right);
139                     ri += 2;
140                     right = getRange(rRanges, ri);
141                 }
142             }
143             // Input is sorted. Never returns false.
144             addRange(out, merged.getStart(), merged.getEnd());
145         }
146     }
147 
148     return out;
149 }
150 
151 // Get the coverage information out of a Format 4 subtable, storing it in the coverage vector
getCoverageFormat4(std::vector<uint32_t> & coverage,const uint8_t * data,size_t size)152 static bool getCoverageFormat4(std::vector<uint32_t>& coverage, const uint8_t* data, size_t size) {
153     const size_t kSegCountOffset = 6;
154     const size_t kEndCountOffset = 14;
155     const size_t kHeaderSize = 16;
156     const size_t kSegmentSize = 8;  // total size of array elements for one segment
157     if (kEndCountOffset > size) {
158         return false;
159     }
160     size_t segCount = readU16(data, kSegCountOffset) >> 1;
161     if (kHeaderSize + segCount * kSegmentSize > size) {
162         return false;
163     }
164     for (size_t i = 0; i < segCount; i++) {
165         uint32_t end = readU16(data, kEndCountOffset + 2 * i);
166         uint32_t start = readU16(data, kHeaderSize + 2 * (segCount + i));
167         if (end < start) {
168             // invalid segment range: size must be positive
169             android_errorWriteLog(0x534e4554, "26413177");
170             return false;
171         }
172         uint32_t rangeOffset = readU16(data, kHeaderSize + 2 * (3 * segCount + i));
173         if (rangeOffset == 0) {
174             uint32_t delta = readU16(data, kHeaderSize + 2 * (2 * segCount + i));
175             if (((end + delta) & 0xffff) > end - start) {
176                 if (!addRangeCmap4(coverage, start, end + 1)) {
177                     return false;
178                 }
179             } else {
180                 for (uint32_t j = start; j < end + 1; j++) {
181                     if (((j + delta) & 0xffff) != 0) {
182                         if (!addRangeCmap4(coverage, j, j + 1)) {
183                             return false;
184                         }
185                     }
186                 }
187             }
188         } else {
189             for (uint32_t j = start; j < end + 1; j++) {
190                 uint32_t actualRangeOffset =
191                         kHeaderSize + 6 * segCount + rangeOffset + (i + j - start) * 2;
192                 if (actualRangeOffset + 2 > size) {
193                     // invalid rangeOffset is considered a "warning" by OpenType Sanitizer
194                     continue;
195                 }
196                 uint32_t glyphId = readU16(data, actualRangeOffset);
197                 if (glyphId != 0) {
198                     if (!addRangeCmap4(coverage, j, j + 1)) {
199                         return false;
200                     }
201                 }
202             }
203         }
204     }
205     return true;
206 }
207 
208 // Get the coverage information out of a Format 12 subtable, storing it in the coverage vector
getCoverageFormat12(std::vector<uint32_t> & coverage,const uint8_t * data,size_t size)209 static bool getCoverageFormat12(std::vector<uint32_t>& coverage, const uint8_t* data, size_t size) {
210     const size_t kNGroupsOffset = 12;
211     const size_t kFirstGroupOffset = 16;
212     const size_t kGroupSize = 12;
213     const size_t kStartCharCodeOffset = 0;
214     const size_t kEndCharCodeOffset = 4;
215     const size_t kMaxNGroups = 0xfffffff0 / kGroupSize;  // protection against overflow
216     // For all values < kMaxNGroups, kFirstGroupOffset + nGroups * kGroupSize fits in 32 bits.
217     if (kFirstGroupOffset > size) {
218         return false;
219     }
220     uint32_t nGroups = readU32(data, kNGroupsOffset);
221     if (nGroups >= kMaxNGroups || kFirstGroupOffset + nGroups * kGroupSize > size) {
222         android_errorWriteLog(0x534e4554, "25645298");
223         return false;
224     }
225     for (uint32_t i = 0; i < nGroups; i++) {
226         uint32_t groupOffset = kFirstGroupOffset + i * kGroupSize;
227         uint32_t start = readU32(data, groupOffset + kStartCharCodeOffset);
228         uint32_t end = readU32(data, groupOffset + kEndCharCodeOffset);
229         if (end < start) {
230             // invalid group range: size must be positive
231             android_errorWriteLog(0x534e4554, "26413177");
232             return false;
233         }
234 
235         // No need to read outside of Unicode code point range.
236         if (start > MAX_UNICODE_CODE_POINT) {
237             return true;
238         }
239         if (end > MAX_UNICODE_CODE_POINT) {
240             // file is inclusive, vector is exclusive
241             if (end == 0xFFFFFFFF) {
242                 android_errorWriteLog(0x534e4554, "62134807");
243             }
244             return addRange(coverage, start, MAX_UNICODE_CODE_POINT + 1);
245         }
246         if (!addRange(coverage, start, end + 1)) {  // file is inclusive, vector is exclusive
247             return false;
248         }
249     }
250     return true;
251 }
252 
253 // Lower value has higher priority. 0 for the highest priority table.
254 // kLowestPriority for unsupported tables.
255 // This order comes from HarfBuzz's hb-ot-font.cc and needs to be kept in sync with it.
256 constexpr uint8_t kLowestPriority = 255;
getTablePriority(uint16_t platformId,uint16_t encodingId)257 uint8_t getTablePriority(uint16_t platformId, uint16_t encodingId) {
258     if (platformId == 3 && encodingId == 10) {
259         return 0;
260     }
261     if (platformId == 0 && encodingId == 6) {
262         return 1;
263     }
264     if (platformId == 0 && encodingId == 4) {
265         return 2;
266     }
267     if (platformId == 3 && encodingId == 1) {
268         return 3;
269     }
270     if (platformId == 0 && encodingId == 3) {
271         return 4;
272     }
273     if (platformId == 0 && encodingId == 2) {
274         return 5;
275     }
276     if (platformId == 0 && encodingId == 1) {
277         return 6;
278     }
279     if (platformId == 0 && encodingId == 0) {
280         return 7;
281     }
282     // Tables other than above are not supported.
283     return kLowestPriority;
284 }
285 
286 // Get merged coverage information from default UVS Table and non-default UVS Table. Note that this
287 // function assumes code points in both default UVS Table and non-default UVS table are stored in
288 // ascending order. This is required by the standard.
getVSCoverage(std::vector<uint32_t> * out_ranges,const uint8_t * data,size_t size,uint32_t defaultUVSTableOffset,uint32_t nonDefaultUVSTableOffset,const SparseBitSet & baseCoverage)289 static bool getVSCoverage(std::vector<uint32_t>* out_ranges, const uint8_t* data, size_t size,
290                           uint32_t defaultUVSTableOffset, uint32_t nonDefaultUVSTableOffset,
291                           const SparseBitSet& baseCoverage) {
292     // Need to merge supported ranges from default UVS Table and non-default UVS Table.
293     // First, collect all supported code points from non default UVS table.
294     std::vector<uint32_t> rangesFromNonDefaultUVSTable;
295     if (nonDefaultUVSTableOffset != 0) {
296         constexpr size_t kHeaderSize = 4;
297         constexpr size_t kUVSMappingRecordSize = 5;
298 
299         const uint8_t* nonDefaultUVSTable = data + nonDefaultUVSTableOffset;
300         // This subtraction doesn't underflow since the caller already checked
301         // size > nonDefaultUVSTableOffset.
302         const size_t nonDefaultUVSTableRemaining = size - nonDefaultUVSTableOffset;
303         if (nonDefaultUVSTableRemaining < kHeaderSize) {
304             return false;
305         }
306         const uint64_t numRecords = readU32(nonDefaultUVSTable, 0);
307         const uint64_t sizeToRead = numRecords * kUVSMappingRecordSize + kHeaderSize;
308         if (sizeToRead > nonDefaultUVSTableRemaining) {
309             if (sizeToRead > UINT_MAX) {
310                 android_errorWriteLog(0x534e4554, "70808908");
311             }
312             return false;
313         }
314         for (uint32_t i = 0; i < numRecords; ++i) {
315             const size_t recordOffset = kHeaderSize + kUVSMappingRecordSize * i;
316             const uint32_t codePoint = readU24(nonDefaultUVSTable, recordOffset);
317             if (!addRange(rangesFromNonDefaultUVSTable, codePoint, codePoint + 1)) {
318                 return false;
319             }
320         }
321     }
322 
323     // Then, construct range from default UVS Table with merging code points from non default UVS
324     // table.
325     std::vector<uint32_t> rangesFromDefaultUVSTable;
326     if (defaultUVSTableOffset != 0) {
327         constexpr size_t kHeaderSize = 4;
328         constexpr size_t kUnicodeRangeRecordSize = 4;
329 
330         const uint8_t* defaultUVSTable = data + defaultUVSTableOffset;
331         // This subtraction doesn't underflow since the caller already checked
332         // size > defaultUVSTableOffset.
333         const size_t defaultUVSTableRemaining = size - defaultUVSTableOffset;
334 
335         if (defaultUVSTableRemaining < kHeaderSize) {
336             return false;
337         }
338         const uint64_t numRecords = readU32(defaultUVSTable, 0);
339         const uint64_t sizeToRead = numRecords * kUnicodeRangeRecordSize + kHeaderSize;
340         if (sizeToRead > defaultUVSTableRemaining) {
341             if (sizeToRead > UINT_MAX) {
342                 android_errorWriteLog(0x534e4554, "70808908");
343             }
344             return false;
345         }
346 
347         for (uint32_t i = 0; i < numRecords; ++i) {
348             const size_t recordOffset = kHeaderSize + kUnicodeRangeRecordSize * i;
349             const uint32_t startCp = readU24(defaultUVSTable, recordOffset);
350             const uint8_t rangeLength = defaultUVSTable[recordOffset + 3];
351 
352             // Then insert range from default UVS Table, but exclude if the base codepoint is not
353             // supported.
354             for (uint32_t cp = startCp; cp <= startCp + rangeLength; ++cp) {
355                 // All codepoints in default UVS table should go to the glyphs of the codepoints
356                 // without variation selectors. We need to check the default glyph availability and
357                 // exclude the codepoint if it is not supported by defualt cmap table.
358                 if (baseCoverage.get(cp)) {
359                     if (!addRange(rangesFromDefaultUVSTable, cp, cp + 1 /* exclusive */)) {
360                         return false;
361                     }
362                 }
363             }
364         }
365     }
366     *out_ranges = mergeRanges(rangesFromDefaultUVSTable, rangesFromNonDefaultUVSTable);
367     return true;
368 }
369 
getCoverageFormat14(std::vector<std::unique_ptr<SparseBitSet>> * out,const uint8_t * data,size_t size,const SparseBitSet & baseCoverage)370 static void getCoverageFormat14(std::vector<std::unique_ptr<SparseBitSet>>* out,
371                                 const uint8_t* data, size_t size,
372                                 const SparseBitSet& baseCoverage) {
373     constexpr size_t kHeaderSize = 10;
374     constexpr size_t kRecordSize = 11;
375     constexpr size_t kLengthOffset = 2;
376     constexpr size_t kNumRecordOffset = 6;
377 
378     out->clear();
379     if (size < kHeaderSize) {
380         return;
381     }
382 
383     const uint32_t length = readU32(data, kLengthOffset);
384     if (size < length) {
385         return;
386     }
387 
388     const uint64_t numRecords = readU32(data, kNumRecordOffset);
389     const uint64_t sizeToRead = kHeaderSize + kRecordSize * numRecords;
390     if (numRecords == 0 || sizeToRead > length) {
391         if (sizeToRead > UINT_MAX) {
392             android_errorWriteLog(0x534e4554, "70808908");
393         }
394         return;
395     }
396 
397     for (uint32_t i = 0; i < numRecords; ++i) {
398         // Insert from the largest code points since it determines the size of the output vector.
399         const uint32_t recordHeadOffset = kHeaderSize + kRecordSize * (numRecords - i - 1);
400         const uint32_t vsCodePoint = readU24(data, recordHeadOffset);
401         const uint32_t defaultUVSOffset = readU32(data, recordHeadOffset + 3);
402         const uint32_t nonDefaultUVSOffset = readU32(data, recordHeadOffset + 7);
403         if (defaultUVSOffset > length || nonDefaultUVSOffset > length) {
404             continue;
405         }
406 
407         const uint16_t vsIndex = getVsIndex(vsCodePoint);
408         if (vsIndex == INVALID_VS_INDEX) {
409             continue;
410         }
411         std::vector<uint32_t> ranges;
412         if (!getVSCoverage(&ranges, data, length, defaultUVSOffset, nonDefaultUVSOffset,
413                            baseCoverage)) {
414             continue;
415         }
416         if (out->size() < vsIndex + 1) {
417             out->resize(vsIndex + 1);
418         }
419         (*out)[vsIndex].reset(new SparseBitSet(ranges.data(), ranges.size() >> 1));
420     }
421 
422     out->shrink_to_fit();
423 }
424 
getCoverage(const uint8_t * cmap_data,size_t cmap_size,std::vector<std::unique_ptr<SparseBitSet>> * out)425 SparseBitSet CmapCoverage::getCoverage(const uint8_t* cmap_data, size_t cmap_size,
426                                        std::vector<std::unique_ptr<SparseBitSet>>* out) {
427     constexpr size_t kHeaderSize = 4;
428     constexpr size_t kNumTablesOffset = 2;
429     constexpr size_t kTableSize = 8;
430     constexpr size_t kPlatformIdOffset = 0;
431     constexpr size_t kEncodingIdOffset = 2;
432     constexpr size_t kOffsetOffset = 4;
433     constexpr size_t kFormatOffset = 0;
434     constexpr uint32_t kNoTable = UINT32_MAX;
435 
436     if (kHeaderSize > cmap_size) {
437         return SparseBitSet();
438     }
439     uint32_t numTables = readU16(cmap_data, kNumTablesOffset);
440     if (kHeaderSize + numTables * kTableSize > cmap_size) {
441         return SparseBitSet();
442     }
443 
444     uint32_t bestTableOffset = kNoTable;
445     uint16_t bestTableFormat = 0;
446     uint8_t bestTablePriority = kLowestPriority;
447     uint32_t vsTableOffset = kNoTable;
448     for (uint32_t i = 0; i < numTables; ++i) {
449         const uint32_t tableHeadOffset = kHeaderSize + i * kTableSize;
450         const uint16_t platformId = readU16(cmap_data, tableHeadOffset + kPlatformIdOffset);
451         const uint16_t encodingId = readU16(cmap_data, tableHeadOffset + kEncodingIdOffset);
452         const uint32_t offset = readU32(cmap_data, tableHeadOffset + kOffsetOffset);
453 
454         if (offset > cmap_size - 2) {
455             continue;  // Invalid table: not enough space to read.
456         }
457         const uint16_t format = readU16(cmap_data, offset + kFormatOffset);
458 
459         if (platformId == 0 /* Unicode */ && encodingId == 5 /* Variation Sequences */) {
460             if (vsTableOffset == kNoTable && format == 14) {
461                 vsTableOffset = offset;
462             } else {
463                 // Ignore the (0, 5) table if we have already seen another valid one or it's in a
464                 // format we don't understand.
465             }
466         } else {
467             uint32_t length;
468             uint32_t language;
469 
470             if (format == 4) {
471                 constexpr size_t lengthOffset = 2;
472                 constexpr size_t languageOffset = 4;
473                 constexpr size_t minTableSize = languageOffset + 2;
474                 if (offset > cmap_size - minTableSize) {
475                     continue;  // Invalid table: not enough space to read.
476                 }
477                 length = readU16(cmap_data, offset + lengthOffset);
478                 language = readU16(cmap_data, offset + languageOffset);
479             } else if (format == 12) {
480                 constexpr size_t lengthOffset = 4;
481                 constexpr size_t languageOffset = 8;
482                 constexpr size_t minTableSize = languageOffset + 4;
483                 if (offset > cmap_size - minTableSize) {
484                     continue;  // Invalid table: not enough space to read.
485                 }
486                 length = readU32(cmap_data, offset + lengthOffset);
487                 language = readU32(cmap_data, offset + languageOffset);
488             } else {
489                 continue;
490             }
491 
492             if (length > cmap_size - offset) {
493                 continue;  // Invalid table: table length is larger than whole cmap data size.
494             }
495             if (language != 0) {
496                 // Unsupported or invalid table: this is either a subtable for the Macintosh
497                 // platform (which we don't support), or an invalid subtable since language field
498                 // should be zero for non-Macintosh subtables.
499                 continue;
500             }
501             const uint8_t priority = getTablePriority(platformId, encodingId);
502             if (priority < bestTablePriority) {
503                 bestTableOffset = offset;
504                 bestTablePriority = priority;
505                 bestTableFormat = format;
506             }
507         }
508         if (vsTableOffset != kNoTable && bestTablePriority == 0 /* highest priority */) {
509             // Already found the highest priority table and variation sequences table. No need to
510             // look at remaining tables.
511             break;
512         }
513     }
514 
515     SparseBitSet coverage;
516 
517     if (bestTableOffset != kNoTable) {
518         const uint8_t* tableData = cmap_data + bestTableOffset;
519         const size_t tableSize = cmap_size - bestTableOffset;
520         bool success;
521         std::vector<uint32_t> coverageVec;
522         if (bestTableFormat == 4) {
523             success = getCoverageFormat4(coverageVec, tableData, tableSize);
524         } else {
525             success = getCoverageFormat12(coverageVec, tableData, tableSize);
526         }
527 
528         if (success) {
529             coverage = SparseBitSet(&coverageVec.front(), coverageVec.size() >> 1);
530         }
531     }
532 
533     if (vsTableOffset != kNoTable) {
534         const uint8_t* tableData = cmap_data + vsTableOffset;
535         const size_t tableSize = cmap_size - vsTableOffset;
536         getCoverageFormat14(out, tableData, tableSize, coverage);
537     }
538     return coverage;
539 }
540 
541 }  // namespace minikin
542