1 /*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 // Determine coverage of font given its raw "cmap" OpenType table
18
19 #include "minikin/CmapCoverage.h"
20
21 #include <algorithm>
22 #include <vector>
23
24 #include "minikin/Range.h"
25 #include "minikin/SparseBitSet.h"
26
27 #include "MinikinInternal.h"
28
29 namespace minikin {
30
31 // These could perhaps be optimized to use __builtin_bswap16 and friends.
readU16(const uint8_t * data,size_t offset)32 static uint32_t readU16(const uint8_t* data, size_t offset) {
33 return ((uint32_t)data[offset]) << 8 | ((uint32_t)data[offset + 1]);
34 }
35
readU24(const uint8_t * data,size_t offset)36 static uint32_t readU24(const uint8_t* data, size_t offset) {
37 return ((uint32_t)data[offset]) << 16 | ((uint32_t)data[offset + 1]) << 8 |
38 ((uint32_t)data[offset + 2]);
39 }
40
readU32(const uint8_t * data,size_t offset)41 static uint32_t readU32(const uint8_t* data, size_t offset) {
42 return ((uint32_t)data[offset]) << 24 | ((uint32_t)data[offset + 1]) << 16 |
43 ((uint32_t)data[offset + 2]) << 8 | ((uint32_t)data[offset + 3]);
44 }
45
46 // The start must be larger than or equal to coverage.back() if coverage is not empty.
47 // Returns true if the range is appended. Otherwise returns false as an error.
addRange(std::vector<uint32_t> & coverage,uint32_t start,uint32_t end)48 static bool addRange(std::vector<uint32_t>& coverage, uint32_t start, uint32_t end) {
49 if (coverage.empty() || coverage.back() < start) {
50 coverage.push_back(start);
51 coverage.push_back(end);
52 return true;
53 } else if (coverage.back() == start) {
54 coverage.back() = end;
55 return true;
56 } else {
57 // Reject unordered range input since SparseBitSet assumes that the given range vector is
58 // sorted. OpenType specification says cmap entries are sorted in order of code point
59 // values, thus for OpenType compliant font files, we don't reach here.
60 android_errorWriteLog(0x534e4554, "32178311");
61 return false;
62 }
63 }
64
65 // Returns true if the range is appended. Otherwise returns false as an error.
addRangeCmap4(std::vector<uint32_t> & coverage,uint32_t start,uint32_t end)66 static bool addRangeCmap4(std::vector<uint32_t>& coverage, uint32_t start, uint32_t end) {
67 if (!coverage.empty() && coverage.back() > end) {
68 // Reject unordered end code points.
69 return false;
70 }
71 if (coverage.empty() || coverage.back() < start) {
72 coverage.push_back(start);
73 coverage.push_back(end);
74 return true;
75 } else {
76 coverage.back() = end;
77 return true;
78 }
79 }
80
81 // Returns Range from given ranges vector. Returns invalidRange if i is out of range.
getRange(const std::vector<uint32_t> & r,size_t i)82 static inline Range getRange(const std::vector<uint32_t>& r, size_t i) {
83 return i + 1 < r.size() ? Range({r[i], r[i + 1]}) : Range::invalidRange();
84 }
85
86 // Merge two sorted lists of ranges into one sorted list.
mergeRanges(const std::vector<uint32_t> & lRanges,const std::vector<uint32_t> & rRanges)87 static std::vector<uint32_t> mergeRanges(const std::vector<uint32_t>& lRanges,
88 const std::vector<uint32_t>& rRanges) {
89 std::vector<uint32_t> out;
90
91 const size_t lsize = lRanges.size();
92 const size_t rsize = rRanges.size();
93 out.reserve(lsize + rsize);
94 size_t ri = 0;
95 size_t li = 0;
96 while (li < lsize || ri < rsize) {
97 Range left = getRange(lRanges, li);
98 Range right = getRange(rRanges, ri);
99
100 if (!right.isValid()) {
101 // No ranges left in rRanges. Just put all remaining ranges in lRanges.
102 do {
103 Range r = getRange(lRanges, li);
104 addRange(out, r.getStart(), r.getEnd()); // Input is sorted. Never returns false.
105 li += 2;
106 } while (li < lsize);
107 break;
108 } else if (!left.isValid()) {
109 // No ranges left in lRanges. Just put all remaining ranges in rRanges.
110 do {
111 Range r = getRange(rRanges, ri);
112 addRange(out, r.getStart(), r.getEnd()); // Input is sorted. Never returns false.
113 ri += 2;
114 } while (ri < rsize);
115 break;
116 } else if (!Range::intersects(left, right)) {
117 // No intersection. Add smaller range.
118 if (left.getStart() < right.getStart()) {
119 // Input is sorted. Never returns false.
120 addRange(out, left.getStart(), left.getEnd());
121 li += 2;
122 } else {
123 // Input is sorted. Never returns false.
124 addRange(out, right.getStart(), right.getEnd());
125 ri += 2;
126 }
127 } else {
128 Range merged = Range::merge(left, right);
129 li += 2;
130 ri += 2;
131 left = getRange(lRanges, li);
132 right = getRange(rRanges, ri);
133 while (Range::intersects(merged, left) || Range::intersects(merged, right)) {
134 if (Range::intersects(merged, left)) {
135 merged = Range::merge(merged, left);
136 li += 2;
137 left = getRange(lRanges, li);
138 } else {
139 merged = Range::merge(merged, right);
140 ri += 2;
141 right = getRange(rRanges, ri);
142 }
143 }
144 // Input is sorted. Never returns false.
145 addRange(out, merged.getStart(), merged.getEnd());
146 }
147 }
148
149 return out;
150 }
151
152 // Get the coverage information out of a Format 4 subtable, storing it in the coverage vector
getCoverageFormat4(std::vector<uint32_t> & coverage,const uint8_t * data,size_t size)153 static bool getCoverageFormat4(std::vector<uint32_t>& coverage, const uint8_t* data, size_t size) {
154 const size_t kSegCountOffset = 6;
155 const size_t kEndCountOffset = 14;
156 const size_t kHeaderSize = 16;
157 const size_t kSegmentSize = 8; // total size of array elements for one segment
158 if (kEndCountOffset > size) {
159 return false;
160 }
161 size_t segCount = readU16(data, kSegCountOffset) >> 1;
162 if (kHeaderSize + segCount * kSegmentSize > size) {
163 return false;
164 }
165 for (size_t i = 0; i < segCount; i++) {
166 uint32_t end = readU16(data, kEndCountOffset + 2 * i);
167 uint32_t start = readU16(data, kHeaderSize + 2 * (segCount + i));
168 if (end < start) {
169 // invalid segment range: size must be positive
170 android_errorWriteLog(0x534e4554, "26413177");
171 return false;
172 }
173 uint32_t rangeOffset = readU16(data, kHeaderSize + 2 * (3 * segCount + i));
174 if (rangeOffset == 0) {
175 uint32_t delta = readU16(data, kHeaderSize + 2 * (2 * segCount + i));
176 if (((end + delta) & 0xffff) > end - start) {
177 if (!addRangeCmap4(coverage, start, end + 1)) {
178 return false;
179 }
180 } else {
181 for (uint32_t j = start; j < end + 1; j++) {
182 if (((j + delta) & 0xffff) != 0) {
183 if (!addRangeCmap4(coverage, j, j + 1)) {
184 return false;
185 }
186 }
187 }
188 }
189 } else {
190 for (uint32_t j = start; j < end + 1; j++) {
191 uint32_t actualRangeOffset =
192 kHeaderSize + 6 * segCount + rangeOffset + (i + j - start) * 2;
193 if (actualRangeOffset + 2 > size) {
194 // invalid rangeOffset is considered a "warning" by OpenType Sanitizer
195 continue;
196 }
197 uint32_t glyphId = readU16(data, actualRangeOffset);
198 if (glyphId != 0) {
199 if (!addRangeCmap4(coverage, j, j + 1)) {
200 return false;
201 }
202 }
203 }
204 }
205 }
206 return true;
207 }
208
209 // Get the coverage information out of a Format 12 subtable, storing it in the coverage vector
getCoverageFormat12(std::vector<uint32_t> & coverage,const uint8_t * data,size_t size)210 static bool getCoverageFormat12(std::vector<uint32_t>& coverage, const uint8_t* data, size_t size) {
211 const size_t kNGroupsOffset = 12;
212 const size_t kFirstGroupOffset = 16;
213 const size_t kGroupSize = 12;
214 const size_t kStartCharCodeOffset = 0;
215 const size_t kEndCharCodeOffset = 4;
216 const size_t kMaxNGroups = 0xfffffff0 / kGroupSize; // protection against overflow
217 // For all values < kMaxNGroups, kFirstGroupOffset + nGroups * kGroupSize fits in 32 bits.
218 if (kFirstGroupOffset > size) {
219 return false;
220 }
221 uint32_t nGroups = readU32(data, kNGroupsOffset);
222 if (nGroups >= kMaxNGroups || kFirstGroupOffset + nGroups * kGroupSize > size) {
223 android_errorWriteLog(0x534e4554, "25645298");
224 return false;
225 }
226 for (uint32_t i = 0; i < nGroups; i++) {
227 uint32_t groupOffset = kFirstGroupOffset + i * kGroupSize;
228 uint32_t start = readU32(data, groupOffset + kStartCharCodeOffset);
229 uint32_t end = readU32(data, groupOffset + kEndCharCodeOffset);
230 if (end < start) {
231 // invalid group range: size must be positive
232 android_errorWriteLog(0x534e4554, "26413177");
233 return false;
234 }
235
236 // No need to read outside of Unicode code point range.
237 if (start > MAX_UNICODE_CODE_POINT) {
238 return true;
239 }
240 if (end > MAX_UNICODE_CODE_POINT) {
241 // file is inclusive, vector is exclusive
242 if (end == 0xFFFFFFFF) {
243 android_errorWriteLog(0x534e4554, "62134807");
244 }
245 return addRange(coverage, start, MAX_UNICODE_CODE_POINT + 1);
246 }
247 if (!addRange(coverage, start, end + 1)) { // file is inclusive, vector is exclusive
248 return false;
249 }
250 }
251 return true;
252 }
253
254 // Lower value has higher priority. 0 for the highest priority table.
255 // kLowestPriority for unsupported tables.
256 // This order comes from HarfBuzz's hb-ot-font.cc and needs to be kept in sync with it.
257 constexpr uint8_t kLowestPriority = 255;
getTablePriority(uint16_t platformId,uint16_t encodingId)258 uint8_t getTablePriority(uint16_t platformId, uint16_t encodingId) {
259 if (platformId == 3 && encodingId == 10) {
260 return 0;
261 }
262 if (platformId == 0 && encodingId == 6) {
263 return 1;
264 }
265 if (platformId == 0 && encodingId == 4) {
266 return 2;
267 }
268 if (platformId == 3 && encodingId == 1) {
269 return 3;
270 }
271 if (platformId == 0 && encodingId == 3) {
272 return 4;
273 }
274 if (platformId == 0 && encodingId == 2) {
275 return 5;
276 }
277 if (platformId == 0 && encodingId == 1) {
278 return 6;
279 }
280 if (platformId == 0 && encodingId == 0) {
281 return 7;
282 }
283 // Tables other than above are not supported.
284 return kLowestPriority;
285 }
286
287 // Get merged coverage information from default UVS Table and non-default UVS Table. Note that this
288 // function assumes code points in both default UVS Table and non-default UVS table are stored in
289 // ascending order. This is required by the standard.
getVSCoverage(std::vector<uint32_t> * out_ranges,const uint8_t * data,size_t size,uint32_t defaultUVSTableOffset,uint32_t nonDefaultUVSTableOffset,const SparseBitSet & baseCoverage)290 static bool getVSCoverage(std::vector<uint32_t>* out_ranges, const uint8_t* data, size_t size,
291 uint32_t defaultUVSTableOffset, uint32_t nonDefaultUVSTableOffset,
292 const SparseBitSet& baseCoverage) {
293 // Need to merge supported ranges from default UVS Table and non-default UVS Table.
294 // First, collect all supported code points from non default UVS table.
295 std::vector<uint32_t> rangesFromNonDefaultUVSTable;
296 if (nonDefaultUVSTableOffset != 0) {
297 constexpr size_t kHeaderSize = 4;
298 constexpr size_t kUVSMappingRecordSize = 5;
299
300 const uint8_t* nonDefaultUVSTable = data + nonDefaultUVSTableOffset;
301 // This subtraction doesn't underflow since the caller already checked
302 // size > nonDefaultUVSTableOffset.
303 const size_t nonDefaultUVSTableRemaining = size - nonDefaultUVSTableOffset;
304 if (nonDefaultUVSTableRemaining < kHeaderSize) {
305 return false;
306 }
307 const uint64_t numRecords = readU32(nonDefaultUVSTable, 0);
308 const uint64_t sizeToRead = numRecords * kUVSMappingRecordSize + kHeaderSize;
309 if (sizeToRead > nonDefaultUVSTableRemaining) {
310 if (sizeToRead > UINT_MAX) {
311 android_errorWriteLog(0x534e4554, "70808908");
312 }
313 return false;
314 }
315 for (uint32_t i = 0; i < numRecords; ++i) {
316 const size_t recordOffset = kHeaderSize + kUVSMappingRecordSize * i;
317 const uint32_t codePoint = readU24(nonDefaultUVSTable, recordOffset);
318 if (!addRange(rangesFromNonDefaultUVSTable, codePoint, codePoint + 1)) {
319 return false;
320 }
321 }
322 }
323
324 // Then, construct range from default UVS Table with merging code points from non default UVS
325 // table.
326 std::vector<uint32_t> rangesFromDefaultUVSTable;
327 if (defaultUVSTableOffset != 0) {
328 constexpr size_t kHeaderSize = 4;
329 constexpr size_t kUnicodeRangeRecordSize = 4;
330
331 const uint8_t* defaultUVSTable = data + defaultUVSTableOffset;
332 // This subtraction doesn't underflow since the caller already checked
333 // size > defaultUVSTableOffset.
334 const size_t defaultUVSTableRemaining = size - defaultUVSTableOffset;
335
336 if (defaultUVSTableRemaining < kHeaderSize) {
337 return false;
338 }
339 const uint64_t numRecords = readU32(defaultUVSTable, 0);
340 const uint64_t sizeToRead = numRecords * kUnicodeRangeRecordSize + kHeaderSize;
341 if (sizeToRead > defaultUVSTableRemaining) {
342 if (sizeToRead > UINT_MAX) {
343 android_errorWriteLog(0x534e4554, "70808908");
344 }
345 return false;
346 }
347
348 for (uint32_t i = 0; i < numRecords; ++i) {
349 const size_t recordOffset = kHeaderSize + kUnicodeRangeRecordSize * i;
350 const uint32_t startCp = readU24(defaultUVSTable, recordOffset);
351 const uint8_t rangeLength = defaultUVSTable[recordOffset + 3];
352
353 // Then insert range from default UVS Table, but exclude if the base codepoint is not
354 // supported.
355 for (uint32_t cp = startCp; cp <= startCp + rangeLength; ++cp) {
356 // All codepoints in default UVS table should go to the glyphs of the codepoints
357 // without variation selectors. We need to check the default glyph availability and
358 // exclude the codepoint if it is not supported by defualt cmap table.
359 if (baseCoverage.get(cp)) {
360 if (!addRange(rangesFromDefaultUVSTable, cp, cp + 1 /* exclusive */)) {
361 return false;
362 }
363 }
364 }
365 }
366 }
367 *out_ranges = mergeRanges(rangesFromDefaultUVSTable, rangesFromNonDefaultUVSTable);
368 return true;
369 }
370
getCoverageFormat14(std::vector<SparseBitSet> * out,const uint8_t * data,size_t size,const SparseBitSet & baseCoverage)371 static void getCoverageFormat14(std::vector<SparseBitSet>* out, const uint8_t* data, size_t size,
372 const SparseBitSet& baseCoverage) {
373 constexpr size_t kHeaderSize = 10;
374 constexpr size_t kRecordSize = 11;
375 constexpr size_t kLengthOffset = 2;
376 constexpr size_t kNumRecordOffset = 6;
377
378 out->clear();
379 if (size < kHeaderSize) {
380 return;
381 }
382
383 const uint32_t length = readU32(data, kLengthOffset);
384 if (size < length) {
385 return;
386 }
387
388 const uint64_t numRecords = readU32(data, kNumRecordOffset);
389 const uint64_t sizeToRead = kHeaderSize + kRecordSize * numRecords;
390 if (numRecords == 0 || sizeToRead > length) {
391 if (sizeToRead > UINT_MAX) {
392 android_errorWriteLog(0x534e4554, "70808908");
393 }
394 return;
395 }
396
397 for (uint32_t i = 0; i < numRecords; ++i) {
398 // Insert from the largest code points since it determines the size of the output vector.
399 const uint32_t recordHeadOffset = kHeaderSize + kRecordSize * (numRecords - i - 1);
400 const uint32_t vsCodePoint = readU24(data, recordHeadOffset);
401 const uint32_t defaultUVSOffset = readU32(data, recordHeadOffset + 3);
402 const uint32_t nonDefaultUVSOffset = readU32(data, recordHeadOffset + 7);
403 if (defaultUVSOffset > length || nonDefaultUVSOffset > length) {
404 continue;
405 }
406
407 const uint16_t vsIndex = getVsIndex(vsCodePoint);
408 if (vsIndex == INVALID_VS_INDEX) {
409 continue;
410 }
411 std::vector<uint32_t> ranges;
412 if (!getVSCoverage(&ranges, data, length, defaultUVSOffset, nonDefaultUVSOffset,
413 baseCoverage)) {
414 continue;
415 }
416 if (out->size() < vsIndex + 1) {
417 out->resize(vsIndex + 1);
418 }
419 (*out)[vsIndex] = SparseBitSet(ranges.data(), ranges.size() >> 1);
420 }
421
422 out->shrink_to_fit();
423 }
424
getCoverage(const uint8_t * cmap_data,size_t cmap_size,std::vector<SparseBitSet> * out)425 SparseBitSet CmapCoverage::getCoverage(const uint8_t* cmap_data, size_t cmap_size,
426 std::vector<SparseBitSet>* out) {
427 constexpr size_t kHeaderSize = 4;
428 constexpr size_t kNumTablesOffset = 2;
429 constexpr size_t kTableSize = 8;
430 constexpr size_t kPlatformIdOffset = 0;
431 constexpr size_t kEncodingIdOffset = 2;
432 constexpr size_t kOffsetOffset = 4;
433 constexpr size_t kFormatOffset = 0;
434 constexpr uint32_t kNoTable = UINT32_MAX;
435
436 if (kHeaderSize > cmap_size) {
437 return SparseBitSet();
438 }
439 uint32_t numTables = readU16(cmap_data, kNumTablesOffset);
440 if (kHeaderSize + numTables * kTableSize > cmap_size) {
441 return SparseBitSet();
442 }
443
444 uint32_t bestTableOffset = kNoTable;
445 uint16_t bestTableFormat = 0;
446 uint8_t bestTablePriority = kLowestPriority;
447 uint32_t vsTableOffset = kNoTable;
448 for (uint32_t i = 0; i < numTables; ++i) {
449 const uint32_t tableHeadOffset = kHeaderSize + i * kTableSize;
450 const uint16_t platformId = readU16(cmap_data, tableHeadOffset + kPlatformIdOffset);
451 const uint16_t encodingId = readU16(cmap_data, tableHeadOffset + kEncodingIdOffset);
452 const uint32_t offset = readU32(cmap_data, tableHeadOffset + kOffsetOffset);
453
454 if (offset > cmap_size - 2) {
455 continue; // Invalid table: not enough space to read.
456 }
457 const uint16_t format = readU16(cmap_data, offset + kFormatOffset);
458
459 if (platformId == 0 /* Unicode */ && encodingId == 5 /* Variation Sequences */) {
460 if (vsTableOffset == kNoTable && format == 14) {
461 vsTableOffset = offset;
462 } else {
463 // Ignore the (0, 5) table if we have already seen another valid one or it's in a
464 // format we don't understand.
465 }
466 } else {
467 uint32_t length;
468 uint32_t language;
469
470 if (format == 4) {
471 constexpr size_t lengthOffset = 2;
472 constexpr size_t languageOffset = 4;
473 constexpr size_t minTableSize = languageOffset + 2;
474 if (offset > cmap_size - minTableSize) {
475 continue; // Invalid table: not enough space to read.
476 }
477 length = readU16(cmap_data, offset + lengthOffset);
478 language = readU16(cmap_data, offset + languageOffset);
479 } else if (format == 12) {
480 constexpr size_t lengthOffset = 4;
481 constexpr size_t languageOffset = 8;
482 constexpr size_t minTableSize = languageOffset + 4;
483 if (offset > cmap_size - minTableSize) {
484 continue; // Invalid table: not enough space to read.
485 }
486 length = readU32(cmap_data, offset + lengthOffset);
487 language = readU32(cmap_data, offset + languageOffset);
488 } else {
489 continue;
490 }
491
492 if (length > cmap_size - offset) {
493 continue; // Invalid table: table length is larger than whole cmap data size.
494 }
495 if (language != 0) {
496 // Unsupported or invalid table: this is either a subtable for the Macintosh
497 // platform (which we don't support), or an invalid subtable since language field
498 // should be zero for non-Macintosh subtables.
499 continue;
500 }
501 const uint8_t priority = getTablePriority(platformId, encodingId);
502 if (priority < bestTablePriority) {
503 bestTableOffset = offset;
504 bestTablePriority = priority;
505 bestTableFormat = format;
506 }
507 }
508 if (vsTableOffset != kNoTable && bestTablePriority == 0 /* highest priority */) {
509 // Already found the highest priority table and variation sequences table. No need to
510 // look at remaining tables.
511 break;
512 }
513 }
514
515 SparseBitSet coverage;
516
517 if (bestTableOffset != kNoTable) {
518 const uint8_t* tableData = cmap_data + bestTableOffset;
519 const size_t tableSize = cmap_size - bestTableOffset;
520 bool success;
521 std::vector<uint32_t> coverageVec;
522 if (bestTableFormat == 4) {
523 success = getCoverageFormat4(coverageVec, tableData, tableSize);
524 } else {
525 success = getCoverageFormat12(coverageVec, tableData, tableSize);
526 }
527
528 if (success) {
529 coverage = SparseBitSet(&coverageVec.front(), coverageVec.size() >> 1);
530 }
531 }
532
533 if (vsTableOffset != kNoTable) {
534 const uint8_t* tableData = cmap_data + vsTableOffset;
535 const size_t tableSize = cmap_size - vsTableOffset;
536 getCoverageFormat14(out, tableData, tableSize, coverage);
537 }
538 return coverage;
539 }
540
541 } // namespace minikin
542