1 /*
2  * Copyright 2011 Google Inc. All Rights Reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "subsetter_impl.h"
18 
19 #include <string.h>
20 
21 #include <algorithm>
22 #include <iterator>
23 #include <map>
24 #include <set>
25 
26 #include <unicode/unistr.h>
27 
28 #include "sfntly/table/bitmap/eblc_table.h"
29 #include "sfntly/table/bitmap/ebdt_table.h"
30 #include "sfntly/table/bitmap/index_sub_table.h"
31 #include "sfntly/table/bitmap/index_sub_table_format1.h"
32 #include "sfntly/table/bitmap/index_sub_table_format2.h"
33 #include "sfntly/table/bitmap/index_sub_table_format3.h"
34 #include "sfntly/table/bitmap/index_sub_table_format4.h"
35 #include "sfntly/table/bitmap/index_sub_table_format5.h"
36 #include "sfntly/table/core/name_table.h"
37 #include "sfntly/tag.h"
38 #include "sfntly/data/memory_byte_array.h"
39 #include "sfntly/port/memory_input_stream.h"
40 #include "sfntly/port/memory_output_stream.h"
41 
42 #if defined U_USING_ICU_NAMESPACE
43   U_NAMESPACE_USE
44 #endif
45 
46 namespace {
47 
48 using namespace sfntly;
49 
50 // The bitmap tables must be greater than 16KB to trigger bitmap subsetter.
51 static const int BITMAP_SIZE_THRESHOLD = 16384;
52 
ConstructName(UChar * name_part,UnicodeString * name,int32_t name_id)53 void ConstructName(UChar* name_part, UnicodeString* name, int32_t name_id) {
54   switch (name_id) {
55     case NameId::kFullFontName:
56       *name = name_part;
57       break;
58     case NameId::kFontFamilyName:
59     case NameId::kPreferredFamily:
60     case NameId::kWWSFamilyName: {
61       UnicodeString original = *name;
62       *name = name_part;
63       *name += original;
64       break;
65     }
66     case NameId::kFontSubfamilyName:
67     case NameId::kPreferredSubfamily:
68     case NameId::kWWSSubfamilyName:
69       *name += name_part;
70       break;
71     default:
72       // This name part is not used to construct font name (e.g. copyright).
73       // Simply ignore it.
74       break;
75   }
76 }
77 
HashCode(int32_t platform_id,int32_t encoding_id,int32_t language_id,int32_t name_id)78 int32_t HashCode(int32_t platform_id, int32_t encoding_id, int32_t language_id,
79                  int32_t name_id) {
80   int32_t result = platform_id << 24 | encoding_id << 16 | language_id << 8;
81   if (name_id == NameId::kFullFontName) {
82     result |= 0xff;
83   } else if (name_id == NameId::kPreferredFamily ||
84              name_id == NameId::kPreferredSubfamily) {
85     result |= 0xf;
86   } else if (name_id == NameId::kWWSFamilyName ||
87              name_id == NameId::kWWSSubfamilyName) {
88     result |= 1;
89   }
90   return result;
91 }
92 
HasName(const char * font_name,Font * font)93 bool HasName(const char* font_name, Font* font) {
94   UnicodeString font_string = UnicodeString::fromUTF8(font_name);
95   if (font_string.isEmpty())
96     return false;
97   UnicodeString regular_suffix = UnicodeString::fromUTF8(" Regular");
98   UnicodeString alt_font_string = font_string;
99   alt_font_string += regular_suffix;
100 
101   typedef std::map<int32_t, UnicodeString> NameMap;
102   NameMap names;
103   NameTablePtr name_table = down_cast<NameTable*>(font->GetTable(Tag::name));
104   if (name_table == NULL) {
105     return false;
106   }
107 
108   for (int32_t i = 0; i < name_table->NameCount(); ++i) {
109     switch (name_table->NameId(i)) {
110       case NameId::kFontFamilyName:
111       case NameId::kFontSubfamilyName:
112       case NameId::kFullFontName:
113       case NameId::kPreferredFamily:
114       case NameId::kPreferredSubfamily:
115       case NameId::kWWSFamilyName:
116       case NameId::kWWSSubfamilyName: {
117         UChar* name_part = name_table->Name(i);
118         if (name_part == NULL) {
119           continue;
120         }
121         int32_t hash_code = HashCode(name_table->PlatformId(i),
122                                      name_table->EncodingId(i),
123                                      name_table->LanguageId(i),
124                                      name_table->NameId(i));
125         ConstructName(name_part, &(names[hash_code]), name_table->NameId(i));
126         delete[] name_part;
127         break;
128       }
129       default:
130         break;
131     }
132   }
133 
134   if (!names.empty()) {
135     for (NameMap::iterator i = names.begin(), e = names.end(); i != e; ++i) {
136       if (i->second.caseCompare(font_string, 0) == 0 ||
137           i->second.caseCompare(alt_font_string, 0) == 0) {
138         return true;
139       }
140     }
141   }
142   return false;
143 }
144 
FindFont(const char * font_name,const FontArray & font_array)145 Font* FindFont(const char* font_name, const FontArray& font_array) {
146   if (font_array.empty() || font_array[0] == NULL) {
147     return NULL;
148   }
149 
150   if (font_name && strlen(font_name)) {
151     for (FontArray::const_iterator i = font_array.begin(), e = font_array.end();
152          i != e; ++i) {
153       if (HasName(font_name, i->p_)) {
154         return i->p_;
155       }
156     }
157   }
158 
159   return font_array[0].p_;
160 }
161 
ResolveCompositeGlyphs(GlyphTable * glyph_table,LocaTable * loca_table,const unsigned int * glyph_ids,size_t glyph_count,IntegerSet * glyph_id_processed)162 bool ResolveCompositeGlyphs(GlyphTable* glyph_table,
163                             LocaTable* loca_table,
164                             const unsigned int* glyph_ids,
165                             size_t glyph_count,
166                             IntegerSet* glyph_id_processed) {
167   if (glyph_table == NULL || loca_table == NULL ||
168       glyph_ids == NULL || glyph_count == 0 || glyph_id_processed == NULL) {
169     return false;
170   }
171 
172   // Sort and uniquify glyph ids.
173   IntegerSet glyph_id_remaining;
174   glyph_id_remaining.insert(0);  // Always include glyph id 0.
175   for (size_t i = 0; i < glyph_count; ++i) {
176     glyph_id_remaining.insert(glyph_ids[i]);
177   }
178 
179   // Identify if any given glyph id maps to a composite glyph.  If so, include
180   // the glyphs referenced by that composite glyph.
181   while (!glyph_id_remaining.empty()) {
182     IntegerSet comp_glyph_id;
183     for (IntegerSet::iterator i = glyph_id_remaining.begin(),
184                               e = glyph_id_remaining.end(); i != e; ++i) {
185       if (*i < 0 || *i >= loca_table->num_glyphs()) {
186         // Invalid glyph id, ignore.
187         continue;
188       }
189 
190       int32_t length = loca_table->GlyphLength(*i);
191       if (length == 0) {
192         // Empty glyph, ignore.
193         continue;
194       }
195       int32_t offset = loca_table->GlyphOffset(*i);
196 
197       GlyphPtr glyph;
198       glyph.Attach(glyph_table->GetGlyph(offset, length));
199       if (glyph == NULL) {
200         // Error finding glyph, ignore.
201         continue;
202       }
203 
204       if (glyph->GlyphType() == GlyphType::kComposite) {
205         Ptr<GlyphTable::CompositeGlyph> comp_glyph =
206             down_cast<GlyphTable::CompositeGlyph*>(glyph.p_);
207         for (int32_t j = 0; j < comp_glyph->NumGlyphs(); ++j) {
208           int32_t glyph_id = comp_glyph->GlyphIndex(j);
209           if (glyph_id_processed->find(glyph_id) == glyph_id_processed->end() &&
210               glyph_id_remaining.find(glyph_id) == glyph_id_remaining.end()) {
211             comp_glyph_id.insert(comp_glyph->GlyphIndex(j));
212           }
213         }
214       }
215 
216       glyph_id_processed->insert(*i);
217     }
218 
219     glyph_id_remaining.clear();
220     glyph_id_remaining = comp_glyph_id;
221   }
222 
223   return true;
224 }
225 
SetupGlyfBuilders(Font::Builder * font_builder,GlyphTable * glyph_table,LocaTable * loca_table,const IntegerSet & glyph_ids)226 bool SetupGlyfBuilders(Font::Builder* font_builder,
227                        GlyphTable* glyph_table,
228                        LocaTable* loca_table,
229                        const IntegerSet& glyph_ids) {
230   if (!font_builder || !glyph_table || !loca_table) {
231     return false;
232   }
233 
234   GlyphTableBuilderPtr glyph_table_builder =
235       down_cast<GlyphTable::Builder*>(font_builder->NewTableBuilder(Tag::glyf));
236   LocaTableBuilderPtr loca_table_builder =
237       down_cast<LocaTable::Builder*>(font_builder->NewTableBuilder(Tag::loca));
238   if (glyph_table_builder == NULL || loca_table_builder == NULL) {
239     // Out of memory.
240     return false;
241   }
242 
243   // Extract glyphs and setup loca list.
244   IntegerList loca_list;
245   loca_list.resize(loca_table->num_glyphs());
246   loca_list.push_back(0);
247   int32_t last_glyph_id = 0;
248   int32_t last_offset = 0;
249   GlyphTable::GlyphBuilderList* glyph_builders =
250       glyph_table_builder->GlyphBuilders();
251   for (IntegerSet::const_iterator i = glyph_ids.begin(), e = glyph_ids.end();
252                                   i != e; ++i) {
253     int32_t length = loca_table->GlyphLength(*i);
254     int32_t offset = loca_table->GlyphOffset(*i);
255 
256     GlyphPtr glyph;
257     glyph.Attach(glyph_table->GetGlyph(offset, length));
258 
259     // Add glyph to new glyf table.
260     ReadableFontDataPtr data = glyph->ReadFontData();
261     WritableFontDataPtr copy_data;
262     copy_data.Attach(WritableFontData::CreateWritableFontData(data->Length()));
263     data->CopyTo(copy_data);
264     GlyphBuilderPtr glyph_builder;
265     glyph_builder.Attach(glyph_table_builder->GlyphBuilder(copy_data));
266     glyph_builders->push_back(glyph_builder);
267 
268     // Configure loca list.
269     for (int32_t j = last_glyph_id + 1; j <= *i; ++j) {
270       loca_list[j] = last_offset;
271     }
272     last_offset += length;
273     loca_list[*i + 1] = last_offset;
274     last_glyph_id = *i;
275   }
276   for (int32_t j = last_glyph_id + 1; j <= loca_table->num_glyphs(); ++j) {
277     loca_list[j] = last_offset;
278   }
279   loca_table_builder->SetLocaList(&loca_list);
280 
281   return true;
282 }
283 
HasOverlap(int32_t range_begin,int32_t range_end,const IntegerSet & glyph_ids)284 bool HasOverlap(int32_t range_begin, int32_t range_end,
285                 const IntegerSet& glyph_ids) {
286   if (range_begin == range_end) {
287     return glyph_ids.find(range_begin) != glyph_ids.end();
288   } else if (range_end > range_begin) {
289     IntegerSet::const_iterator left = glyph_ids.lower_bound(range_begin);
290     IntegerSet::const_iterator right = glyph_ids.lower_bound(range_end);
291     return right != left;
292   }
293   return false;
294 }
295 
296 // Initialize builder, returns false if glyph_id subset is not covered.
297 // Not thread-safe, caller to ensure object life-time.
InitializeBitmapBuilder(EbdtTable::Builder * ebdt,EblcTable::Builder * eblc,const IntegerSet & glyph_ids)298 bool InitializeBitmapBuilder(EbdtTable::Builder* ebdt, EblcTable::Builder* eblc,
299                              const IntegerSet& glyph_ids) {
300   BitmapLocaList loca_list;
301   BitmapSizeTableBuilderList* strikes = eblc->BitmapSizeBuilders();
302 
303   // Note: Do not call eblc_builder->GenerateLocaList(&loca_list) and then
304   //       ebdt_builder->SetLoca(loca_list).  For fonts like SimSun, there are
305   //       >28K glyphs inside, where a typical usage will be <1K glyphs.  Doing
306   //       the calls improperly will result in creation of >100K objects that
307   //       will be destroyed immediately, inducing significant slowness.
308   IntegerList removed_strikes;
309   for (size_t i = 0; i < strikes->size(); i++) {
310     if (!HasOverlap((*strikes)[i]->StartGlyphIndex(),
311                     (*strikes)[i]->EndGlyphIndex(), glyph_ids)) {
312       removed_strikes.push_back(i);
313       continue;
314     }
315 
316     IndexSubTableBuilderList* index_builders =
317         (*strikes)[i]->IndexSubTableBuilders();
318     IntegerList removed_indexes;
319     BitmapGlyphInfoMap info_map;
320     for (size_t j = 0; j < index_builders->size(); ++j) {
321       if ((*index_builders)[j] == NULL) {
322         // Subtable is malformed, let's just skip it.
323         removed_indexes.push_back(j);
324         continue;
325       }
326       int32_t first_glyph_id = (*index_builders)[j]->first_glyph_index();
327       int32_t last_glyph_id = (*index_builders)[j]->last_glyph_index();
328       if (!HasOverlap(first_glyph_id, last_glyph_id, glyph_ids)) {
329         removed_indexes.push_back(j);
330         continue;
331       }
332       for (IntegerSet::const_iterator gid = glyph_ids.begin(),
333                                       gid_end = glyph_ids.end();
334                                       gid != gid_end; gid++) {
335         if (*gid < first_glyph_id) {
336           continue;
337         }
338         if (*gid > last_glyph_id) {
339           break;
340         }
341         BitmapGlyphInfoPtr info;
342         info.Attach((*index_builders)[j]->GlyphInfo(*gid));
343         if (info && info->length()) {  // Do not include gid without bitmap
344           info_map[*gid] = info;
345         }
346       }
347     }
348     if (!info_map.empty()) {
349       loca_list.push_back(info_map);
350     } else {
351       removed_strikes.push_back(i);  // Detected null entries.
352     }
353 
354     // Remove unused index sub tables
355     for (IntegerList::reverse_iterator j = removed_indexes.rbegin(),
356                                        e = removed_indexes.rend();
357                                        j != e; j++) {
358       index_builders->erase(index_builders->begin() + *j);
359     }
360   }
361   if (removed_strikes.size() == strikes->size() || loca_list.empty()) {
362     return false;
363   }
364 
365   for (IntegerList::reverse_iterator i = removed_strikes.rbegin(),
366                                      e = removed_strikes.rend(); i != e; i++) {
367     strikes->erase(strikes->begin() + *i);
368   }
369 
370   if (strikes->empty()) {  // no glyph covered, can safely drop the builders.
371     return false;
372   }
373 
374   ebdt->SetLoca(&loca_list);
375   ebdt->GlyphBuilders();  // Initialize the builder.
376   return true;
377 }
378 
CopyBigGlyphMetrics(BigGlyphMetrics::Builder * source,BigGlyphMetrics::Builder * target)379 void CopyBigGlyphMetrics(BigGlyphMetrics::Builder* source,
380                          BigGlyphMetrics::Builder* target) {
381   target->SetHeight(static_cast<byte_t>(source->Height()));
382   target->SetWidth(static_cast<byte_t>(source->Width()));
383   target->SetHoriBearingX(static_cast<byte_t>(source->HoriBearingX()));
384   target->SetHoriBearingY(static_cast<byte_t>(source->HoriBearingY()));
385   target->SetHoriAdvance(static_cast<byte_t>(source->HoriAdvance()));
386   target->SetVertBearingX(static_cast<byte_t>(source->VertBearingX()));
387   target->SetVertBearingY(static_cast<byte_t>(source->VertBearingY()));
388   target->SetVertAdvance(static_cast<byte_t>(source->VertAdvance()));
389 }
390 
391 CALLER_ATTACH IndexSubTable::Builder*
ConstructIndexFormat4(IndexSubTable::Builder * b,const BitmapGlyphInfoMap & loca,int32_t * image_data_offset)392 ConstructIndexFormat4(IndexSubTable::Builder* b, const BitmapGlyphInfoMap& loca,
393                       int32_t* image_data_offset) {
394   IndexSubTableFormat4BuilderPtr builder4;
395   builder4.Attach(IndexSubTableFormat4::Builder::CreateBuilder());
396   CodeOffsetPairBuilderList offset_pairs;
397 
398   size_t offset = 0;
399   int32_t lower_bound = b->first_glyph_index();
400   int32_t upper_bound = b->last_glyph_index();
401   int32_t last_gid = -1;
402   BitmapGlyphInfoMap::const_iterator i = loca.lower_bound(lower_bound);
403   BitmapGlyphInfoMap::const_iterator end = loca.end();
404   if (i != end) {
405     last_gid = i->first;
406     builder4->set_first_glyph_index(last_gid);
407     builder4->set_image_format(b->image_format());
408     builder4->set_image_data_offset(*image_data_offset);
409   }
410   for (; i != end; i++) {
411     int32_t gid = i->first;
412     if (gid > upper_bound) {
413       break;
414     }
415     offset_pairs.push_back(
416         IndexSubTableFormat4::CodeOffsetPairBuilder(gid, offset));
417     offset += i->second->length();
418     last_gid = gid;
419   }
420   offset_pairs.push_back(
421       IndexSubTableFormat4::CodeOffsetPairBuilder(-1, offset));
422   builder4->set_last_glyph_index(last_gid);
423   *image_data_offset += offset;
424   builder4->SetOffsetArray(offset_pairs);
425 
426   return builder4.Detach();
427 }
428 
429 CALLER_ATTACH IndexSubTable::Builder*
ConstructIndexFormat5(IndexSubTable::Builder * b,const BitmapGlyphInfoMap & loca,int32_t * image_data_offset)430 ConstructIndexFormat5(IndexSubTable::Builder* b, const BitmapGlyphInfoMap& loca,
431                       int32_t* image_data_offset) {
432   IndexSubTableFormat5BuilderPtr new_builder;
433   new_builder.Attach(IndexSubTableFormat5::Builder::CreateBuilder());
434 
435   // Copy BigMetrics
436   int32_t image_size = 0;
437   if (b->index_format() == IndexSubTable::Format::FORMAT_2) {
438     IndexSubTableFormat2BuilderPtr builder2 =
439       down_cast<IndexSubTableFormat2::Builder*>(b);
440     CopyBigGlyphMetrics(builder2->BigMetrics(), new_builder->BigMetrics());
441     image_size = builder2->ImageSize();
442   } else {
443     IndexSubTableFormat5BuilderPtr builder5 =
444       down_cast<IndexSubTableFormat5::Builder*>(b);
445     BigGlyphMetricsBuilderPtr metrics_builder;
446     CopyBigGlyphMetrics(builder5->BigMetrics(), new_builder->BigMetrics());
447     image_size = builder5->ImageSize();
448   }
449 
450   IntegerList* glyph_array = new_builder->GlyphArray();
451   size_t offset = 0;
452   int32_t lower_bound = b->first_glyph_index();
453   int32_t upper_bound = b->last_glyph_index();
454   int32_t last_gid = -1;
455   BitmapGlyphInfoMap::const_iterator i = loca.lower_bound(lower_bound);
456   BitmapGlyphInfoMap::const_iterator end = loca.end();
457   if (i != end) {
458     last_gid = i->first;
459     new_builder->set_first_glyph_index(last_gid);
460     new_builder->set_image_format(b->image_format());
461     new_builder->set_image_data_offset(*image_data_offset);
462     new_builder->SetImageSize(image_size);
463   }
464   for (; i != end; i++) {
465     int32_t gid = i->first;
466     if (gid > upper_bound) {
467       break;
468     }
469     glyph_array->push_back(gid);
470     offset += i->second->length();
471     last_gid = gid;
472   }
473   new_builder->set_last_glyph_index(last_gid);
474   *image_data_offset += offset;
475   return new_builder.Detach();
476 }
477 
478 CALLER_ATTACH IndexSubTable::Builder*
SubsetIndexSubTable(IndexSubTable::Builder * builder,const BitmapGlyphInfoMap & loca,int32_t * image_data_offset)479 SubsetIndexSubTable(IndexSubTable::Builder* builder,
480                     const BitmapGlyphInfoMap& loca,
481                     int32_t* image_data_offset) {
482   switch (builder->index_format()) {
483     case IndexSubTable::Format::FORMAT_1:
484     case IndexSubTable::Format::FORMAT_3:
485     case IndexSubTable::Format::FORMAT_4:
486       return ConstructIndexFormat4(builder, loca, image_data_offset);
487     case IndexSubTable::Format::FORMAT_2:
488     case IndexSubTable::Format::FORMAT_5:
489       return ConstructIndexFormat5(builder, loca, image_data_offset);
490     default:
491       assert(false);
492       break;
493   }
494   return NULL;
495 }
496 
497 }
498 
499 namespace sfntly {
500 
501 // Not thread-safe, caller to ensure object life-time.
SubsetEBLC(EblcTable::Builder * eblc,const BitmapLocaList & new_loca)502 void SubsetEBLC(EblcTable::Builder* eblc, const BitmapLocaList& new_loca) {
503   BitmapSizeTableBuilderList* size_builders = eblc->BitmapSizeBuilders();
504   if (size_builders == NULL) {
505     return;
506   }
507 
508   int32_t image_data_offset = EbdtTable::Offset::kHeaderLength;
509   for (size_t strike = 0; strike < size_builders->size(); ++strike) {
510     IndexSubTableBuilderList* index_builders =
511         (*size_builders)[strike]->IndexSubTableBuilders();
512     for (size_t index = 0; index < index_builders->size(); ++index) {
513       IndexSubTable::Builder* new_builder_raw =
514           SubsetIndexSubTable((*index_builders)[index], new_loca[strike],
515                               &image_data_offset);
516       if (NULL != new_builder_raw) {
517         (*index_builders)[index].Attach(new_builder_raw);
518       }
519     }
520   }
521 }
522 
523 // EBLC structure (from stuartg)
524 //  header
525 //  bitmapSizeTable[]
526 //    one per strike
527 //    holds strike metrics - sbitLineMetrics
528 //    holds info about indexSubTableArray
529 //  indexSubTableArray[][]
530 //    one per strike and then one per indexSubTable for that strike
531 //    holds info about the indexSubTable
532 //    the indexSubTable entries pointed to can be of different formats
533 //  indexSubTable
534 //    one per indexSubTableArray entry
535 //    tells how to get the glyphs
536 //    may hold the glyph metrics if they are uniform for all the glyphs in range
537 // Please note that the structure can also be
538 //  {indexSubTableArray[], indexSubTables[]}[]
539 //  This way is also legal and in fact how Microsoft fonts are laid out.
540 //
541 // There is nothing that says that the indexSubTableArray entries and/or the
542 // indexSubTable items need to be unique. They may be shared between strikes.
543 //
544 // EBDT structure:
545 //  header
546 //  glyphs
547 //    amorphous blob of data
548 //    different glyphs that are only able to be figured out from the EBLC table
549 //    may hold metrics - depends on the EBLC entry that pointed to them
550 
551 // Subsetting EBLC table (from arthurhsu)
552 //  Most pages use only a fraction (hundreds or less) glyphs out of a given font
553 //  (which can have >20K glyphs for CJK).  It's safe to assume that the subset
554 //  font will have sparse bitmap glyphs.  So we reconstruct the EBLC table as
555 //  format 4 or 5 here.
556 
557 enum BuildersToRemove {
558   kRemoveNone,
559   kRemoveBDAT,
560   kRemoveBDATAndEBDT,
561   kRemoveEBDT
562 };
563 
SetupBitmapBuilders(Font * font,Font::Builder * font_builder,const IntegerSet & glyph_ids)564 int SetupBitmapBuilders(Font* font, Font::Builder* font_builder,
565                         const IntegerSet& glyph_ids) {
566   if (!font || !font_builder) {
567     return false;
568   }
569 
570   // Check if bitmap table exists.
571   EbdtTablePtr ebdt_table = down_cast<EbdtTable*>(font->GetTable(Tag::EBDT));
572   EblcTablePtr eblc_table = down_cast<EblcTable*>(font->GetTable(Tag::EBLC));
573   bool use_ebdt = (ebdt_table != NULL && eblc_table != NULL);
574   if (!use_ebdt) {
575     ebdt_table = down_cast<EbdtTable*>(font->GetTable(Tag::bdat));
576     eblc_table = down_cast<EblcTable*>(font->GetTable(Tag::bloc));
577     if (ebdt_table == NULL || eblc_table == NULL) {
578       return kRemoveNone;
579     }
580   }
581 
582   // If the bitmap table's size is too small, skip subsetting.
583   if (ebdt_table->DataLength() + eblc_table->DataLength() <
584       BITMAP_SIZE_THRESHOLD) {
585     return use_ebdt ? kRemoveBDAT : kRemoveNone;
586   }
587 
588   // Get the builders.
589   EbdtTableBuilderPtr ebdt_table_builder = down_cast<EbdtTable::Builder*>(
590       font_builder->NewTableBuilder(use_ebdt ? Tag::EBDT : Tag::bdat,
591                                     ebdt_table->ReadFontData()));
592   EblcTableBuilderPtr eblc_table_builder = down_cast<EblcTable::Builder*>(
593       font_builder->NewTableBuilder(use_ebdt ? Tag::EBLC : Tag::bloc,
594                                     eblc_table->ReadFontData()));
595   if (ebdt_table_builder == NULL || eblc_table_builder == NULL) {
596     // Out of memory.
597     return use_ebdt ? kRemoveBDAT : kRemoveNone;
598   }
599 
600   if (!InitializeBitmapBuilder(ebdt_table_builder, eblc_table_builder,
601                                glyph_ids)) {
602     // Bitmap tables do not cover the glyphs in our subset.
603     font_builder->RemoveTableBuilder(use_ebdt ? Tag::EBLC : Tag::bloc);
604     font_builder->RemoveTableBuilder(use_ebdt ? Tag::EBDT : Tag::bdat);
605     return use_ebdt ? kRemoveBDATAndEBDT : kRemoveEBDT;
606   }
607 
608   BitmapLocaList new_loca;
609   ebdt_table_builder->GenerateLocaList(&new_loca);
610   SubsetEBLC(eblc_table_builder, new_loca);
611 
612   return use_ebdt ? kRemoveBDAT : kRemoveNone;
613 }
614 
SubsetterImpl()615 SubsetterImpl::SubsetterImpl() {
616 }
617 
~SubsetterImpl()618 SubsetterImpl::~SubsetterImpl() {
619 }
620 
LoadFont(int font_index,const unsigned char * original_font,size_t font_size)621 bool SubsetterImpl::LoadFont(int font_index,
622                              const unsigned char* original_font,
623                              size_t font_size) {
624   MemoryInputStream mis;
625   mis.Attach(original_font, font_size);
626   if (factory_ == NULL) {
627     factory_.Attach(FontFactory::GetInstance());
628   }
629 
630   FontArray font_array;
631   factory_->LoadFonts(&mis, &font_array);
632   if (font_index < 0 || (size_t)font_index >= font_array.size()) {
633     return false;
634   }
635   font_ = font_array[font_index].p_;
636   return font_ != NULL;
637 }
638 
LoadFont(const char * font_name,const unsigned char * original_font,size_t font_size)639 bool SubsetterImpl::LoadFont(const char* font_name,
640                              const unsigned char* original_font,
641                              size_t font_size) {
642   MemoryInputStream mis;
643   mis.Attach(original_font, font_size);
644   if (factory_ == NULL) {
645     factory_.Attach(FontFactory::GetInstance());
646   }
647 
648   FontArray font_array;
649   factory_->LoadFonts(&mis, &font_array);
650   font_ = FindFont(font_name, font_array);
651   if (font_ == NULL) {
652     return false;
653   }
654 
655   return true;
656 }
657 
SubsetFont(const unsigned int * glyph_ids,size_t glyph_count,unsigned char ** output_buffer)658 int SubsetterImpl::SubsetFont(const unsigned int* glyph_ids,
659                               size_t glyph_count,
660                               unsigned char** output_buffer) {
661   if (factory_ == NULL || font_ == NULL) {
662     return -1;
663   }
664 
665   // Find glyf and loca table.
666   GlyphTablePtr glyph_table =
667       down_cast<GlyphTable*>(font_->GetTable(Tag::glyf));
668   LocaTablePtr loca_table = down_cast<LocaTable*>(font_->GetTable(Tag::loca));
669   if (glyph_table == NULL || loca_table == NULL) {
670     // We are not able to subset the font.
671     return 0;
672   }
673 
674   IntegerSet glyph_id_processed;
675   if (!ResolveCompositeGlyphs(glyph_table, loca_table,
676                               glyph_ids, glyph_count, &glyph_id_processed) ||
677       glyph_id_processed.empty()) {
678     return 0;
679   }
680 
681   FontPtr new_font;
682   new_font.Attach(Subset(glyph_id_processed, glyph_table, loca_table));
683   if (new_font == NULL) {
684     return 0;
685   }
686 
687   MemoryOutputStream output_stream;
688   factory_->SerializeFont(new_font, &output_stream);
689   int length = static_cast<int>(output_stream.Size());
690   if (length > 0) {
691     *output_buffer = new unsigned char[length];
692     memcpy(*output_buffer, output_stream.Get(), length);
693   }
694 
695   return length;
696 }
697 
698 // Long comments regarding TTF tables and PDF (from stuartg)
699 //
700 // According to PDF spec 1.4 (section 5.8), the following tables must be
701 // present:
702 //  head, hhea, loca, maxp, cvt, prep, glyf, hmtx, fpgm
703 //  cmap if font is used with a simple font dict and not a CIDFont dict
704 //
705 // Other tables we need to keep for PDF rendering to support zoom in/out:
706 //  bdat, bloc, ebdt, eblc, ebsc, gasp
707 //
708 // Special table:
709 //  CFF - if you have this table then you shouldn't have a glyf table and this
710 //        is the table with all the glyphs.  Shall skip subsetting completely
711 //        since sfntly is not capable of subsetting it for now.
712 //  post - extra info here for printing on PostScript printers but maybe not
713 //         enough to outweigh the space taken by the names
714 //
715 // Tables to break apart:
716 //  name - could throw away all but one language and one platform strings/ might
717 //         throw away some of the name entries
718 //  cmap - could strip out non-needed cmap subtables
719 //       - format 4 subtable can be subsetted as well using sfntly
720 //
721 // Graphite tables:
722 //  silf, glat, gloc, feat - should be okay to strip out
723 //
724 // Tables that can be discarded:
725 //  OS/2 - everything here is for layout and description of the font that is
726 //         elsewhere (some in the PDF objects)
727 //  BASE, GDEF, GSUB, GPOS, JSTF - all used for layout
728 //  kern - old style layout
729 //  DSIG - this will be invalid after subsetting
730 //  hdmx - layout
731 //  PCLT - metadata that's not needed
732 //  vmtx - layout
733 //  vhea - layout
734 //  VDMX
735 //  VORG - not used by TT/OT - used by CFF
736 //  hsty - would be surprised to see one of these - used on the Newton
737 //  AAT tables - mort, morx, feat, acnt, bsin, just, lcar, fdsc, fmtx, prop,
738 //               Zapf, opbd, trak, fvar, gvar, avar, cvar
739 //             - these are all layout tables and once layout happens are not
740 //               needed anymore
741 //  LTSH - layout
742 
743 CALLER_ATTACH
Subset(const IntegerSet & glyph_ids,GlyphTable * glyf,LocaTable * loca)744 Font* SubsetterImpl::Subset(const IntegerSet& glyph_ids, GlyphTable* glyf,
745                             LocaTable* loca) {
746   // The const is initialized here to workaround VC bug of rendering all Tag::*
747   // as 0.  These tags represents the TTF tables that we will embed in subset
748   // font.
749   const int32_t TABLES_IN_SUBSET[] = {
750     Tag::head, Tag::hhea, Tag::loca, Tag::maxp, Tag::cvt,
751     Tag::prep, Tag::glyf, Tag::hmtx, Tag::fpgm, Tag::EBDT,
752     Tag::EBLC, Tag::EBSC, Tag::bdat, Tag::bloc, Tag::bhed,
753     Tag::cmap,  // Keep here for future tagged PDF development.
754     Tag::name,  // Keep here due to legal concerns: copyright info inside.
755   };
756 
757   // Setup font builders we need.
758   FontBuilderPtr font_builder;
759   font_builder.Attach(factory_->NewFontBuilder());
760   IntegerSet remove_tags;
761 
762   if (SetupGlyfBuilders(font_builder, glyf, loca, glyph_ids)) {
763     remove_tags.insert(Tag::glyf);
764     remove_tags.insert(Tag::loca);
765   }
766 
767   // For old Apple bitmap fonts, they have only bdats and bhed is identical
768   // to head.  As a result, we can't remove bdat tables for those fonts.
769   int setup_result = SetupBitmapBuilders(font_, font_builder, glyph_ids);
770   if (setup_result == kRemoveBDATAndEBDT || setup_result == kRemoveEBDT) {
771     remove_tags.insert(Tag::EBDT);
772     remove_tags.insert(Tag::EBLC);
773     remove_tags.insert(Tag::EBSC);
774   }
775 
776   if (setup_result == kRemoveBDAT || setup_result == kRemoveBDATAndEBDT) {
777     remove_tags.insert(Tag::bdat);
778     remove_tags.insert(Tag::bloc);
779     remove_tags.insert(Tag::bhed);
780   }
781 
782   IntegerSet allowed_tags;
783   for (size_t i = 0; i < sizeof(TABLES_IN_SUBSET) / sizeof(int32_t); ++i) {
784     allowed_tags.insert(TABLES_IN_SUBSET[i]);
785   }
786 
787   IntegerSet result;
788   std::set_difference(allowed_tags.begin(), allowed_tags.end(),
789                       remove_tags.begin(), remove_tags.end(),
790                       std::inserter(result, result.end()));
791   allowed_tags = result;
792 
793   // Setup remaining builders.
794   for (IntegerSet::iterator i = allowed_tags.begin(), e = allowed_tags.end();
795                             i != e; ++i) {
796     Table* table = font_->GetTable(*i);
797     if (table) {
798       font_builder->NewTableBuilder(*i, table->ReadFontData());
799     }
800   }
801 
802   return font_builder->Build();
803 }
804 
805 }  // namespace sfntly
806