1 /*
2  * Copyright 2011 Google Inc. All Rights Reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "subsetter_impl.h"
18 
19 #include <string.h>
20 
21 #include <algorithm>
22 #include <iterator>
23 #include <limits>
24 #include <map>
25 #include <set>
26 #include <string>
27 
28 #include <unicode/ustring.h>
29 #include <unicode/uversion.h>
30 
31 #include "sfntly/table/bitmap/eblc_table.h"
32 #include "sfntly/table/bitmap/ebdt_table.h"
33 #include "sfntly/table/bitmap/index_sub_table.h"
34 #include "sfntly/table/bitmap/index_sub_table_format1.h"
35 #include "sfntly/table/bitmap/index_sub_table_format2.h"
36 #include "sfntly/table/bitmap/index_sub_table_format3.h"
37 #include "sfntly/table/bitmap/index_sub_table_format4.h"
38 #include "sfntly/table/bitmap/index_sub_table_format5.h"
39 #include "sfntly/table/core/name_table.h"
40 #include "sfntly/tag.h"
41 #include "sfntly/data/memory_byte_array.h"
42 #include "sfntly/port/memory_input_stream.h"
43 #include "sfntly/port/memory_output_stream.h"
44 
45 namespace {
46 
47 using namespace sfntly;
48 
49 /**
50  * std::u16string and icu::UnicodeString can't be used here.
51  * UChar is not always char16_t in some platforms. std::u16string is avoided.
52  * icu::UnicodeString C++ API is also avoided to make it more portable across
53  * platforms due to C++ ABI compatility issue.
54  */
55 typedef std::basic_string<UChar> UCharString;
56 
57 // The bitmap tables must be greater than 16KB to trigger bitmap subsetter.
58 static const int BITMAP_SIZE_THRESHOLD = 16384;
59 
ConstructName(UChar * name_part,UCharString * name,int32_t name_id)60 void ConstructName(UChar* name_part, UCharString* name, int32_t name_id) {
61   switch (name_id) {
62     case NameId::kFullFontName:
63       *name = name_part;
64       break;
65     case NameId::kFontFamilyName:
66     case NameId::kPreferredFamily:
67     case NameId::kWWSFamilyName: {
68       UCharString original = *name;
69       *name = name_part;
70       *name += original;
71       break;
72     }
73     case NameId::kFontSubfamilyName:
74     case NameId::kPreferredSubfamily:
75     case NameId::kWWSSubfamilyName:
76       *name += name_part;
77       break;
78     default:
79       // This name part is not used to construct font name (e.g. copyright).
80       // Simply ignore it.
81       break;
82   }
83 }
84 
85 // Convert UTF-8 string into UTF-16 string.
86 //
87 // Ill-formed input is replaced with U+FFFD.
88 // Otherwise, return empty string if other error occurs during the conversion.
ConvertFromUtf8(const char * src)89 UCharString ConvertFromUtf8(const char* src) {
90   int32_t srcLength = strlen(src);
91   int32_t destCapacity = srcLength + 1;
92   UChar* buffer = new UChar[destCapacity];
93   UCharString dest;
94   if (buffer == NULL) {
95     return dest;
96   }
97   int32_t destLength;
98   UErrorCode errorCode = U_ZERO_ERROR;
99   u_strFromUTF8WithSub(buffer, destCapacity, &destLength, src, srcLength,
100                        0xfffd, // Unicode replacement character
101                        NULL,
102                        &errorCode);
103   if (U_SUCCESS(errorCode)) {
104     dest.append(buffer, destLength);
105   }
106   delete[] buffer;
107   return dest;
108 }
109 
CaseCompareUtf16(const UCharString & str1,const UCharString & str2,uint32_t option)110 int32_t CaseCompareUtf16(const UCharString& str1,
111                          const UCharString& str2, uint32_t option) {
112   UErrorCode errorCode = U_ZERO_ERROR;
113   return u_strCaseCompare(str1.c_str(), str1.length(), str2.c_str(),
114                           str2.length(), option, &errorCode);
115 }
116 
HashCode(int32_t platform_id,int32_t encoding_id,int32_t language_id,int32_t name_id)117 int32_t HashCode(int32_t platform_id, int32_t encoding_id, int32_t language_id,
118                  int32_t name_id) {
119   int32_t result = platform_id << 24 | encoding_id << 16 | language_id << 8;
120   if (name_id == NameId::kFullFontName) {
121     result |= 0xff;
122   } else if (name_id == NameId::kPreferredFamily ||
123              name_id == NameId::kPreferredSubfamily) {
124     result |= 0xf;
125   } else if (name_id == NameId::kWWSFamilyName ||
126              name_id == NameId::kWWSSubfamilyName) {
127     result |= 1;
128   }
129   return result;
130 }
131 
HasName(const char * font_name,Font * font)132 bool HasName(const char* font_name, Font* font) {
133   UCharString font_string = ConvertFromUtf8(font_name);
134   if (font_string.empty())
135     return false;
136   UCharString regular_suffix = ConvertFromUtf8(" Regular");
137   UCharString alt_font_string = font_string;
138   alt_font_string += regular_suffix;
139 
140   typedef std::map<int32_t, UCharString> NameMap;
141   NameMap names;
142   NameTablePtr name_table = down_cast<NameTable*>(font->GetTable(Tag::name));
143   if (name_table == NULL) {
144     return false;
145   }
146 
147   for (int32_t i = 0; i < name_table->NameCount(); ++i) {
148     switch (name_table->NameId(i)) {
149       case NameId::kFontFamilyName:
150       case NameId::kFontSubfamilyName:
151       case NameId::kFullFontName:
152       case NameId::kPreferredFamily:
153       case NameId::kPreferredSubfamily:
154       case NameId::kWWSFamilyName:
155       case NameId::kWWSSubfamilyName: {
156         UChar* name_part = name_table->Name(i);
157         if (name_part == NULL) {
158           continue;
159         }
160         int32_t hash_code = HashCode(name_table->PlatformId(i),
161                                      name_table->EncodingId(i),
162                                      name_table->LanguageId(i),
163                                      name_table->NameId(i));
164         ConstructName(name_part, &(names[hash_code]), name_table->NameId(i));
165         delete[] name_part;
166         break;
167       }
168       default:
169         break;
170     }
171   }
172 
173   if (!names.empty()) {
174     for (NameMap::iterator i = names.begin(), e = names.end(); i != e; ++i) {
175       if (CaseCompareUtf16(i->second, font_string, 0) == 0 ||
176           CaseCompareUtf16(i->second, alt_font_string, 0) == 0) {
177         return true;
178       }
179     }
180   }
181   return false;
182 }
183 
FindFont(const char * font_name,const FontArray & font_array)184 Font* FindFont(const char* font_name, const FontArray& font_array) {
185   if (font_array.empty() || font_array[0] == NULL) {
186     return NULL;
187   }
188 
189   if (font_name && strlen(font_name)) {
190     for (FontArray::const_iterator i = font_array.begin(), e = font_array.end();
191          i != e; ++i) {
192       if (HasName(font_name, i->p_)) {
193         return i->p_;
194       }
195     }
196   }
197 
198   return font_array[0].p_;
199 }
200 
ResolveCompositeGlyphs(GlyphTable * glyph_table,LocaTable * loca_table,const unsigned int * glyph_ids,size_t glyph_count,IntegerSet * glyph_id_processed)201 bool ResolveCompositeGlyphs(GlyphTable* glyph_table,
202                             LocaTable* loca_table,
203                             const unsigned int* glyph_ids,
204                             size_t glyph_count,
205                             IntegerSet* glyph_id_processed) {
206   if (glyph_table == NULL || loca_table == NULL ||
207       glyph_ids == NULL || glyph_count == 0 || glyph_id_processed == NULL) {
208     return false;
209   }
210 
211   // Sort and uniquify glyph ids.
212   IntegerSet glyph_id_remaining;
213   glyph_id_remaining.insert(0);  // Always include glyph id 0.
214   for (size_t i = 0; i < glyph_count; ++i) {
215     glyph_id_remaining.insert(glyph_ids[i]);
216   }
217 
218   // Identify if any given glyph id maps to a composite glyph.  If so, include
219   // the glyphs referenced by that composite glyph.
220   while (!glyph_id_remaining.empty()) {
221     IntegerSet comp_glyph_id;
222     for (IntegerSet::iterator i = glyph_id_remaining.begin(),
223                               e = glyph_id_remaining.end(); i != e; ++i) {
224       if (*i < 0 || *i >= loca_table->num_glyphs()) {
225         // Invalid glyph id, ignore.
226         continue;
227       }
228 
229       int32_t length = loca_table->GlyphLength(*i);
230       if (length == 0) {
231         // Empty glyph, ignore.
232         continue;
233       }
234       int32_t offset = loca_table->GlyphOffset(*i);
235 
236       GlyphPtr glyph;
237       glyph.Attach(glyph_table->GetGlyph(offset, length));
238       if (glyph == NULL) {
239         // Error finding glyph, ignore.
240         continue;
241       }
242 
243       if (glyph->GlyphType() == GlyphType::kComposite) {
244         Ptr<GlyphTable::CompositeGlyph> comp_glyph =
245             down_cast<GlyphTable::CompositeGlyph*>(glyph.p_);
246         for (int32_t j = 0; j < comp_glyph->NumGlyphs(); ++j) {
247           int32_t glyph_id = comp_glyph->GlyphIndex(j);
248           if (glyph_id_processed->find(glyph_id) == glyph_id_processed->end() &&
249               glyph_id_remaining.find(glyph_id) == glyph_id_remaining.end()) {
250             comp_glyph_id.insert(comp_glyph->GlyphIndex(j));
251           }
252         }
253       }
254 
255       glyph_id_processed->insert(*i);
256     }
257 
258     glyph_id_remaining.clear();
259     glyph_id_remaining = comp_glyph_id;
260   }
261 
262   return true;
263 }
264 
SetupGlyfBuilders(Font::Builder * font_builder,GlyphTable * glyph_table,LocaTable * loca_table,const IntegerSet & glyph_ids)265 bool SetupGlyfBuilders(Font::Builder* font_builder,
266                        GlyphTable* glyph_table,
267                        LocaTable* loca_table,
268                        const IntegerSet& glyph_ids) {
269   if (!font_builder || !glyph_table || !loca_table) {
270     return false;
271   }
272 
273   GlyphTableBuilderPtr glyph_table_builder =
274       down_cast<GlyphTable::Builder*>(font_builder->NewTableBuilder(Tag::glyf));
275   LocaTableBuilderPtr loca_table_builder =
276       down_cast<LocaTable::Builder*>(font_builder->NewTableBuilder(Tag::loca));
277   if (glyph_table_builder == NULL || loca_table_builder == NULL) {
278     // Out of memory.
279     return false;
280   }
281 
282   // Extract glyphs and setup loca list.
283   IntegerList loca_list;
284   loca_list.resize(loca_table->num_glyphs());
285   loca_list.push_back(0);
286   int32_t last_glyph_id = 0;
287   int32_t last_offset = 0;
288   GlyphTable::GlyphBuilderList* glyph_builders =
289       glyph_table_builder->GlyphBuilders();
290   for (IntegerSet::const_iterator i = glyph_ids.begin(), e = glyph_ids.end();
291                                   i != e; ++i) {
292     int32_t length = loca_table->GlyphLength(*i);
293     int32_t offset = loca_table->GlyphOffset(*i);
294 
295     GlyphPtr glyph;
296     glyph.Attach(glyph_table->GetGlyph(offset, length));
297 
298     // Add glyph to new glyf table.
299     ReadableFontDataPtr data = glyph->ReadFontData();
300     WritableFontDataPtr copy_data;
301     copy_data.Attach(WritableFontData::CreateWritableFontData(data->Length()));
302     data->CopyTo(copy_data);
303     GlyphBuilderPtr glyph_builder;
304     glyph_builder.Attach(glyph_table_builder->GlyphBuilder(copy_data));
305     glyph_builders->push_back(glyph_builder);
306 
307     // Configure loca list.
308     for (int32_t j = last_glyph_id + 1; j <= *i; ++j) {
309       loca_list[j] = last_offset;
310     }
311 
312     if (last_offset > std::numeric_limits<int32_t>::max() - length)
313       return false;
314 
315     last_offset += length;
316     loca_list[*i + 1] = last_offset;
317     last_glyph_id = *i;
318   }
319   for (int32_t j = last_glyph_id + 1; j <= loca_table->num_glyphs(); ++j) {
320     loca_list[j] = last_offset;
321   }
322   loca_table_builder->SetLocaList(&loca_list);
323 
324   return true;
325 }
326 
HasOverlap(int32_t range_begin,int32_t range_end,const IntegerSet & glyph_ids)327 bool HasOverlap(int32_t range_begin, int32_t range_end,
328                 const IntegerSet& glyph_ids) {
329   if (range_begin == range_end)
330     return glyph_ids.find(range_begin) != glyph_ids.end();
331 
332   if (range_begin >= range_end)
333     return false;
334 
335   IntegerSet::const_iterator left = glyph_ids.lower_bound(range_begin);
336   IntegerSet::const_iterator right = glyph_ids.lower_bound(range_end);
337   return left != right;
338 }
339 
340 // Initialize builder, returns false if glyph_id subset is not covered.
341 // Not thread-safe, caller to ensure object life-time.
InitializeBitmapBuilder(EbdtTable::Builder * ebdt,EblcTable::Builder * eblc,const IntegerSet & glyph_ids)342 bool InitializeBitmapBuilder(EbdtTable::Builder* ebdt, EblcTable::Builder* eblc,
343                              const IntegerSet& glyph_ids) {
344   BitmapLocaList loca_list;
345   BitmapSizeTableBuilderList* strikes = eblc->BitmapSizeBuilders();
346 
347   // Note: Do not call eblc_builder->GenerateLocaList(&loca_list) and then
348   //       ebdt_builder->SetLoca(loca_list).  For fonts like SimSun, there are
349   //       >28K glyphs inside, where a typical usage will be <1K glyphs.  Doing
350   //       the calls improperly will result in creation of >100K objects that
351   //       will be destroyed immediately, inducing significant slowness.
352   IntegerList removed_strikes;
353   for (size_t i = 0; i < strikes->size(); i++) {
354     if (!HasOverlap((*strikes)[i]->StartGlyphIndex(),
355                     (*strikes)[i]->EndGlyphIndex(), glyph_ids)) {
356       removed_strikes.push_back(i);
357       continue;
358     }
359 
360     IndexSubTableBuilderList* index_builders =
361         (*strikes)[i]->IndexSubTableBuilders();
362     IntegerList removed_indexes;
363     BitmapGlyphInfoMap info_map;
364     for (size_t j = 0; j < index_builders->size(); ++j) {
365       if ((*index_builders)[j] == NULL) {
366         // Subtable is malformed, let's just skip it.
367         removed_indexes.push_back(j);
368         continue;
369       }
370       int32_t first_glyph_id = (*index_builders)[j]->first_glyph_index();
371       int32_t last_glyph_id = (*index_builders)[j]->last_glyph_index();
372       if (!HasOverlap(first_glyph_id, last_glyph_id, glyph_ids)) {
373         removed_indexes.push_back(j);
374         continue;
375       }
376       for (IntegerSet::const_iterator gid = glyph_ids.begin(),
377                                       gid_end = glyph_ids.end();
378                                       gid != gid_end; gid++) {
379         if (*gid < first_glyph_id) {
380           continue;
381         }
382         if (*gid > last_glyph_id) {
383           break;
384         }
385         BitmapGlyphInfoPtr info;
386         info.Attach((*index_builders)[j]->GlyphInfo(*gid));
387         if (info && info->length()) {  // Do not include gid without bitmap
388           info_map[*gid] = info;
389         }
390       }
391     }
392     if (!info_map.empty()) {
393       loca_list.push_back(info_map);
394     } else {
395       removed_strikes.push_back(i);  // Detected null entries.
396     }
397 
398     // Remove unused index sub tables
399     for (IntegerList::reverse_iterator j = removed_indexes.rbegin(),
400                                        e = removed_indexes.rend();
401                                        j != e; j++) {
402       index_builders->erase(index_builders->begin() + *j);
403     }
404   }
405   if (removed_strikes.size() == strikes->size() || loca_list.empty()) {
406     return false;
407   }
408 
409   for (IntegerList::reverse_iterator i = removed_strikes.rbegin(),
410                                      e = removed_strikes.rend(); i != e; i++) {
411     strikes->erase(strikes->begin() + *i);
412   }
413 
414   if (strikes->empty()) {  // no glyph covered, can safely drop the builders.
415     return false;
416   }
417 
418   ebdt->SetLoca(&loca_list);
419   ebdt->GlyphBuilders();  // Initialize the builder.
420   return true;
421 }
422 
CopyBigGlyphMetrics(BigGlyphMetrics::Builder * source,BigGlyphMetrics::Builder * target)423 void CopyBigGlyphMetrics(BigGlyphMetrics::Builder* source,
424                          BigGlyphMetrics::Builder* target) {
425   target->SetHeight(static_cast<uint8_t>(source->Height()));
426   target->SetWidth(static_cast<uint8_t>(source->Width()));
427   target->SetHoriBearingX(static_cast<uint8_t>(source->HoriBearingX()));
428   target->SetHoriBearingY(static_cast<uint8_t>(source->HoriBearingY()));
429   target->SetHoriAdvance(static_cast<uint8_t>(source->HoriAdvance()));
430   target->SetVertBearingX(static_cast<uint8_t>(source->VertBearingX()));
431   target->SetVertBearingY(static_cast<uint8_t>(source->VertBearingY()));
432   target->SetVertAdvance(static_cast<uint8_t>(source->VertAdvance()));
433 }
434 
435 CALLER_ATTACH IndexSubTable::Builder*
ConstructIndexFormat4(IndexSubTable::Builder * b,const BitmapGlyphInfoMap & loca,int32_t * image_data_offset)436 ConstructIndexFormat4(IndexSubTable::Builder* b, const BitmapGlyphInfoMap& loca,
437                       int32_t* image_data_offset) {
438   IndexSubTableFormat4BuilderPtr builder4;
439   builder4.Attach(IndexSubTableFormat4::Builder::CreateBuilder());
440   CodeOffsetPairBuilderList offset_pairs;
441 
442   size_t offset = 0;
443   int32_t lower_bound = b->first_glyph_index();
444   int32_t upper_bound = b->last_glyph_index();
445   int32_t last_gid = -1;
446   BitmapGlyphInfoMap::const_iterator i = loca.lower_bound(lower_bound);
447   BitmapGlyphInfoMap::const_iterator end = loca.end();
448   if (i != end) {
449     last_gid = i->first;
450     builder4->set_first_glyph_index(last_gid);
451     builder4->set_image_format(b->image_format());
452     builder4->set_image_data_offset(*image_data_offset);
453   }
454   for (; i != end; i++) {
455     int32_t gid = i->first;
456     if (gid > upper_bound) {
457       break;
458     }
459     offset_pairs.push_back(
460         IndexSubTableFormat4::CodeOffsetPairBuilder(gid, offset));
461     offset += i->second->length();
462     last_gid = gid;
463   }
464   offset_pairs.push_back(
465       IndexSubTableFormat4::CodeOffsetPairBuilder(-1, offset));
466   builder4->set_last_glyph_index(last_gid);
467   *image_data_offset += offset;
468   builder4->SetOffsetArray(offset_pairs);
469 
470   return builder4.Detach();
471 }
472 
473 CALLER_ATTACH IndexSubTable::Builder*
ConstructIndexFormat5(IndexSubTable::Builder * b,const BitmapGlyphInfoMap & loca,int32_t * image_data_offset)474 ConstructIndexFormat5(IndexSubTable::Builder* b, const BitmapGlyphInfoMap& loca,
475                       int32_t* image_data_offset) {
476   IndexSubTableFormat5BuilderPtr new_builder;
477   new_builder.Attach(IndexSubTableFormat5::Builder::CreateBuilder());
478 
479   // Copy BigMetrics
480   int32_t image_size = 0;
481   if (b->index_format() == IndexSubTable::Format::FORMAT_2) {
482     IndexSubTableFormat2BuilderPtr builder2 =
483       down_cast<IndexSubTableFormat2::Builder*>(b);
484     CopyBigGlyphMetrics(builder2->BigMetrics(), new_builder->BigMetrics());
485     image_size = builder2->ImageSize();
486   } else {
487     IndexSubTableFormat5BuilderPtr builder5 =
488       down_cast<IndexSubTableFormat5::Builder*>(b);
489     BigGlyphMetricsBuilderPtr metrics_builder;
490     CopyBigGlyphMetrics(builder5->BigMetrics(), new_builder->BigMetrics());
491     image_size = builder5->ImageSize();
492   }
493 
494   IntegerList* glyph_array = new_builder->GlyphArray();
495   size_t offset = 0;
496   int32_t lower_bound = b->first_glyph_index();
497   int32_t upper_bound = b->last_glyph_index();
498   int32_t last_gid = -1;
499   BitmapGlyphInfoMap::const_iterator i = loca.lower_bound(lower_bound);
500   BitmapGlyphInfoMap::const_iterator end = loca.end();
501   if (i != end) {
502     last_gid = i->first;
503     new_builder->set_first_glyph_index(last_gid);
504     new_builder->set_image_format(b->image_format());
505     new_builder->set_image_data_offset(*image_data_offset);
506     new_builder->SetImageSize(image_size);
507   }
508   for (; i != end; i++) {
509     int32_t gid = i->first;
510     if (gid > upper_bound) {
511       break;
512     }
513     glyph_array->push_back(gid);
514     offset += i->second->length();
515     last_gid = gid;
516   }
517   new_builder->set_last_glyph_index(last_gid);
518   *image_data_offset += offset;
519   return new_builder.Detach();
520 }
521 
522 CALLER_ATTACH IndexSubTable::Builder*
SubsetIndexSubTable(IndexSubTable::Builder * builder,const BitmapGlyphInfoMap & loca,int32_t * image_data_offset)523 SubsetIndexSubTable(IndexSubTable::Builder* builder,
524                     const BitmapGlyphInfoMap& loca,
525                     int32_t* image_data_offset) {
526   switch (builder->index_format()) {
527     case IndexSubTable::Format::FORMAT_1:
528     case IndexSubTable::Format::FORMAT_3:
529     case IndexSubTable::Format::FORMAT_4:
530       return ConstructIndexFormat4(builder, loca, image_data_offset);
531     case IndexSubTable::Format::FORMAT_2:
532     case IndexSubTable::Format::FORMAT_5:
533       return ConstructIndexFormat5(builder, loca, image_data_offset);
534     default:
535       assert(false);
536       break;
537   }
538   return NULL;
539 }
540 
541 }
542 
543 namespace sfntly {
544 
545 // Not thread-safe, caller to ensure object life-time.
SubsetEBLC(EblcTable::Builder * eblc,const BitmapLocaList & new_loca)546 void SubsetEBLC(EblcTable::Builder* eblc, const BitmapLocaList& new_loca) {
547   BitmapSizeTableBuilderList* size_builders = eblc->BitmapSizeBuilders();
548   if (size_builders == NULL) {
549     return;
550   }
551 
552   int32_t image_data_offset = EbdtTable::Offset::kHeaderLength;
553   for (size_t strike = 0; strike < size_builders->size(); ++strike) {
554     IndexSubTableBuilderList* index_builders =
555         (*size_builders)[strike]->IndexSubTableBuilders();
556     for (size_t index = 0; index < index_builders->size(); ++index) {
557       IndexSubTable::Builder* new_builder_raw =
558           SubsetIndexSubTable((*index_builders)[index], new_loca[strike],
559                               &image_data_offset);
560       if (NULL != new_builder_raw) {
561         (*index_builders)[index].Attach(new_builder_raw);
562       }
563     }
564   }
565 }
566 
567 // EBLC structure (from stuartg)
568 //  header
569 //  bitmapSizeTable[]
570 //    one per strike
571 //    holds strike metrics - sbitLineMetrics
572 //    holds info about indexSubTableArray
573 //  indexSubTableArray[][]
574 //    one per strike and then one per indexSubTable for that strike
575 //    holds info about the indexSubTable
576 //    the indexSubTable entries pointed to can be of different formats
577 //  indexSubTable
578 //    one per indexSubTableArray entry
579 //    tells how to get the glyphs
580 //    may hold the glyph metrics if they are uniform for all the glyphs in range
581 // Please note that the structure can also be
582 //  {indexSubTableArray[], indexSubTables[]}[]
583 //  This way is also legal and in fact how Microsoft fonts are laid out.
584 //
585 // There is nothing that says that the indexSubTableArray entries and/or the
586 // indexSubTable items need to be unique. They may be shared between strikes.
587 //
588 // EBDT structure:
589 //  header
590 //  glyphs
591 //    amorphous blob of data
592 //    different glyphs that are only able to be figured out from the EBLC table
593 //    may hold metrics - depends on the EBLC entry that pointed to them
594 
595 // Subsetting EBLC table (from arthurhsu)
596 //  Most pages use only a fraction (hundreds or less) glyphs out of a given font
597 //  (which can have >20K glyphs for CJK).  It's safe to assume that the subset
598 //  font will have sparse bitmap glyphs.  So we reconstruct the EBLC table as
599 //  format 4 or 5 here.
600 
601 enum BuildersToRemove {
602   kRemoveNone,
603   kRemoveBDAT,
604   kRemoveBDATAndEBDT,
605   kRemoveEBDT
606 };
607 
SetupBitmapBuilders(Font * font,Font::Builder * font_builder,const IntegerSet & glyph_ids)608 int SetupBitmapBuilders(Font* font, Font::Builder* font_builder,
609                         const IntegerSet& glyph_ids) {
610   if (!font || !font_builder) {
611     return false;
612   }
613 
614   // Check if bitmap table exists.
615   EbdtTablePtr ebdt_table = down_cast<EbdtTable*>(font->GetTable(Tag::EBDT));
616   EblcTablePtr eblc_table = down_cast<EblcTable*>(font->GetTable(Tag::EBLC));
617   bool use_ebdt = (ebdt_table != NULL && eblc_table != NULL);
618   if (!use_ebdt) {
619     ebdt_table = down_cast<EbdtTable*>(font->GetTable(Tag::bdat));
620     eblc_table = down_cast<EblcTable*>(font->GetTable(Tag::bloc));
621     if (ebdt_table == NULL || eblc_table == NULL) {
622       return kRemoveNone;
623     }
624   }
625 
626   // If the bitmap table's size is too small, skip subsetting.
627   if (ebdt_table->DataLength() + eblc_table->DataLength() <
628       BITMAP_SIZE_THRESHOLD) {
629     return use_ebdt ? kRemoveBDAT : kRemoveNone;
630   }
631 
632   // Get the builders.
633   EbdtTableBuilderPtr ebdt_table_builder = down_cast<EbdtTable::Builder*>(
634       font_builder->NewTableBuilder(use_ebdt ? Tag::EBDT : Tag::bdat,
635                                     ebdt_table->ReadFontData()));
636   EblcTableBuilderPtr eblc_table_builder = down_cast<EblcTable::Builder*>(
637       font_builder->NewTableBuilder(use_ebdt ? Tag::EBLC : Tag::bloc,
638                                     eblc_table->ReadFontData()));
639   if (ebdt_table_builder == NULL || eblc_table_builder == NULL) {
640     // Out of memory.
641     return use_ebdt ? kRemoveBDAT : kRemoveNone;
642   }
643 
644   if (!InitializeBitmapBuilder(ebdt_table_builder, eblc_table_builder,
645                                glyph_ids)) {
646     // Bitmap tables do not cover the glyphs in our subset.
647     font_builder->RemoveTableBuilder(use_ebdt ? Tag::EBLC : Tag::bloc);
648     font_builder->RemoveTableBuilder(use_ebdt ? Tag::EBDT : Tag::bdat);
649     return use_ebdt ? kRemoveBDATAndEBDT : kRemoveEBDT;
650   }
651 
652   BitmapLocaList new_loca;
653   ebdt_table_builder->GenerateLocaList(&new_loca);
654   SubsetEBLC(eblc_table_builder, new_loca);
655 
656   return use_ebdt ? kRemoveBDAT : kRemoveNone;
657 }
658 
SubsetterImpl()659 SubsetterImpl::SubsetterImpl() {
660 }
661 
~SubsetterImpl()662 SubsetterImpl::~SubsetterImpl() {
663 }
664 
LoadFont(int font_index,const unsigned char * original_font,size_t font_size)665 bool SubsetterImpl::LoadFont(int font_index,
666                              const unsigned char* original_font,
667                              size_t font_size) {
668   MemoryInputStream mis;
669   mis.Attach(original_font, font_size);
670   if (factory_ == NULL) {
671     factory_.Attach(FontFactory::GetInstance());
672   }
673 
674   FontArray font_array;
675   factory_->LoadFonts(&mis, &font_array);
676   if (font_index < 0 || (size_t)font_index >= font_array.size()) {
677     return false;
678   }
679   font_ = font_array[font_index].p_;
680   return font_ != NULL;
681 }
682 
LoadFont(const char * font_name,const unsigned char * original_font,size_t font_size)683 bool SubsetterImpl::LoadFont(const char* font_name,
684                              const unsigned char* original_font,
685                              size_t font_size) {
686   MemoryInputStream mis;
687   mis.Attach(original_font, font_size);
688   if (factory_ == NULL) {
689     factory_.Attach(FontFactory::GetInstance());
690   }
691 
692   FontArray font_array;
693   factory_->LoadFonts(&mis, &font_array);
694   font_ = FindFont(font_name, font_array);
695   return font_ != NULL;
696 }
697 
SubsetFont(const unsigned int * glyph_ids,size_t glyph_count,unsigned char ** output_buffer)698 int SubsetterImpl::SubsetFont(const unsigned int* glyph_ids,
699                               size_t glyph_count,
700                               unsigned char** output_buffer) {
701   if (factory_ == NULL || font_ == NULL) {
702     return -1;
703   }
704 
705   // Find glyf and loca table.
706   GlyphTablePtr glyph_table =
707       down_cast<GlyphTable*>(font_->GetTable(Tag::glyf));
708   LocaTablePtr loca_table = down_cast<LocaTable*>(font_->GetTable(Tag::loca));
709   if (glyph_table == NULL || loca_table == NULL) {
710     // We are not able to subset the font.
711     return 0;
712   }
713 
714   IntegerSet glyph_id_processed;
715   if (!ResolveCompositeGlyphs(glyph_table, loca_table,
716                               glyph_ids, glyph_count, &glyph_id_processed) ||
717       glyph_id_processed.empty()) {
718     return 0;
719   }
720 
721   FontPtr new_font;
722   new_font.Attach(Subset(glyph_id_processed, glyph_table, loca_table));
723   if (new_font == NULL) {
724     return 0;
725   }
726 
727   MemoryOutputStream output_stream;
728   factory_->SerializeFont(new_font, &output_stream);
729   size_t length = output_stream.Size();
730   if (length == 0 ||
731       length > static_cast<size_t>(std::numeric_limits<int>::max())) {
732     return 0;
733   }
734 
735   *output_buffer = new unsigned char[length];
736   memcpy(*output_buffer, output_stream.Get(), length);
737   return length;
738 }
739 
740 // Long comments regarding TTF tables and PDF (from stuartg)
741 //
742 // According to PDF spec 1.4 (section 5.8), the following tables must be
743 // present:
744 //  head, hhea, loca, maxp, cvt, prep, glyf, hmtx, fpgm
745 //  cmap if font is used with a simple font dict and not a CIDFont dict
746 //
747 // Other tables we need to keep for PDF rendering to support zoom in/out:
748 //  bdat, bloc, ebdt, eblc, ebsc, gasp
749 //
750 // Special table:
751 //  CFF - if you have this table then you shouldn't have a glyf table and this
752 //        is the table with all the glyphs.  Shall skip subsetting completely
753 //        since sfntly is not capable of subsetting it for now.
754 //  post - extra info here for printing on PostScript printers but maybe not
755 //         enough to outweigh the space taken by the names
756 //
757 // Tables to break apart:
758 //  name - could throw away all but one language and one platform strings/ might
759 //         throw away some of the name entries
760 //  cmap - could strip out non-needed cmap subtables
761 //       - format 4 subtable can be subsetted as well using sfntly
762 //
763 // Graphite tables:
764 //  silf, glat, gloc, feat - should be okay to strip out
765 //
766 // Tables that can be discarded:
767 //  OS/2 - everything here is for layout and description of the font that is
768 //         elsewhere (some in the PDF objects)
769 //  BASE, GDEF, GSUB, GPOS, JSTF - all used for layout
770 //  kern - old style layout
771 //  DSIG - this will be invalid after subsetting
772 //  hdmx - layout
773 //  PCLT - metadata that's not needed
774 //  vmtx - layout
775 //  vhea - layout
776 //  VDMX
777 //  VORG - not used by TT/OT - used by CFF
778 //  hsty - would be surprised to see one of these - used on the Newton
779 //  AAT tables - mort, morx, feat, acnt, bsin, just, lcar, fdsc, fmtx, prop,
780 //               Zapf, opbd, trak, fvar, gvar, avar, cvar
781 //             - these are all layout tables and once layout happens are not
782 //               needed anymore
783 //  LTSH - layout
784 
785 CALLER_ATTACH
Subset(const IntegerSet & glyph_ids,GlyphTable * glyf,LocaTable * loca)786 Font* SubsetterImpl::Subset(const IntegerSet& glyph_ids, GlyphTable* glyf,
787                             LocaTable* loca) {
788   // The const is initialized here to workaround VC bug of rendering all Tag::*
789   // as 0.  These tags represents the TTF tables that we will embed in subset
790   // font.
791   const int32_t TABLES_IN_SUBSET[] = {
792     Tag::head, Tag::hhea, Tag::loca, Tag::maxp, Tag::cvt,
793     Tag::prep, Tag::glyf, Tag::hmtx, Tag::fpgm, Tag::EBDT,
794     Tag::EBLC, Tag::EBSC, Tag::bdat, Tag::bloc, Tag::bhed,
795     Tag::cmap,  // Keep here for future tagged PDF development.
796     Tag::name,  // Keep here due to legal concerns: copyright info inside.
797   };
798   const size_t kTablesInSubSetSize =
799       sizeof(TABLES_IN_SUBSET) / sizeof(TABLES_IN_SUBSET[0]);
800 
801   // Setup font builders we need.
802   FontBuilderPtr font_builder;
803   font_builder.Attach(factory_->NewFontBuilder());
804   IntegerSet remove_tags;
805 
806   if (SetupGlyfBuilders(font_builder, glyf, loca, glyph_ids)) {
807     remove_tags.insert(Tag::glyf);
808     remove_tags.insert(Tag::loca);
809   }
810 
811   // For old Apple bitmap fonts, they have only bdats and bhed is identical
812   // to head.  As a result, we can't remove bdat tables for those fonts.
813   int setup_result = SetupBitmapBuilders(font_, font_builder, glyph_ids);
814   if (setup_result == kRemoveBDATAndEBDT || setup_result == kRemoveEBDT) {
815     remove_tags.insert(Tag::EBDT);
816     remove_tags.insert(Tag::EBLC);
817     remove_tags.insert(Tag::EBSC);
818   }
819 
820   if (setup_result == kRemoveBDAT || setup_result == kRemoveBDATAndEBDT) {
821     remove_tags.insert(Tag::bdat);
822     remove_tags.insert(Tag::bloc);
823     remove_tags.insert(Tag::bhed);
824   }
825 
826   IntegerSet allowed_tags;
827   for (size_t i = 0; i < kTablesInSubSetSize; ++i)
828     allowed_tags.insert(TABLES_IN_SUBSET[i]);
829 
830   IntegerSet result;
831   std::set_difference(allowed_tags.begin(), allowed_tags.end(),
832                       remove_tags.begin(), remove_tags.end(),
833                       std::inserter(result, result.end()));
834   allowed_tags = result;
835 
836   // Setup remaining builders.
837   for (IntegerSet::const_iterator it = allowed_tags.begin();
838        it != allowed_tags.end(); ++it) {
839     int32_t tag = *it;
840     Table* table = font_->GetTable(tag);
841     if (table)
842       font_builder->NewTableBuilder(tag, table->ReadFontData());
843   }
844 
845   return font_builder->Build();
846 }
847 
848 }  // namespace sfntly
849