1 /*
2  * Copyright 2011 Google Inc. All Rights Reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "subsetter_impl.h"
18 
19 #include <string.h>
20 
21 #include <algorithm>
22 #include <iterator>
23 #include <map>
24 #include <set>
25 #include <string>
26 
27 #include <unicode/ustring.h>
28 
29 #include "sfntly/table/bitmap/eblc_table.h"
30 #include "sfntly/table/bitmap/ebdt_table.h"
31 #include "sfntly/table/bitmap/index_sub_table.h"
32 #include "sfntly/table/bitmap/index_sub_table_format1.h"
33 #include "sfntly/table/bitmap/index_sub_table_format2.h"
34 #include "sfntly/table/bitmap/index_sub_table_format3.h"
35 #include "sfntly/table/bitmap/index_sub_table_format4.h"
36 #include "sfntly/table/bitmap/index_sub_table_format5.h"
37 #include "sfntly/table/core/name_table.h"
38 #include "sfntly/tag.h"
39 #include "sfntly/data/memory_byte_array.h"
40 #include "sfntly/port/memory_input_stream.h"
41 #include "sfntly/port/memory_output_stream.h"
42 
43 namespace {
44 
45 using namespace sfntly;
46 
47 /**
48  * std::u16string and icu::UnicodeString can't be used here.
49  * UChar is not always char16_t in some platforms. std::u16string is avoided.
50  * icu::UnicodeString C++ API is also avoided to make it more portable across
51  * platforms due to C++ ABI compatility issue.
52  */
53 typedef std::basic_string<UChar> UCharString;
54 
55 // The bitmap tables must be greater than 16KB to trigger bitmap subsetter.
56 static const int BITMAP_SIZE_THRESHOLD = 16384;
57 
ConstructName(UChar * name_part,UCharString * name,int32_t name_id)58 void ConstructName(UChar* name_part, UCharString* name, int32_t name_id) {
59   switch (name_id) {
60     case NameId::kFullFontName:
61       *name = name_part;
62       break;
63     case NameId::kFontFamilyName:
64     case NameId::kPreferredFamily:
65     case NameId::kWWSFamilyName: {
66       UCharString original = *name;
67       *name = name_part;
68       *name += original;
69       break;
70     }
71     case NameId::kFontSubfamilyName:
72     case NameId::kPreferredSubfamily:
73     case NameId::kWWSSubfamilyName:
74       *name += name_part;
75       break;
76     default:
77       // This name part is not used to construct font name (e.g. copyright).
78       // Simply ignore it.
79       break;
80   }
81 }
82 
83 // Convert UTF-8 string into UTF-16 string.
84 //
85 // Ill-formed input is replaced with U+FFFD.
86 // Otherwise, return empty string if other error occurs during the conversion.
ConvertFromUtf8(const char * src)87 UCharString ConvertFromUtf8(const char* src) {
88   int32_t srcLength = strlen(src);
89   int32_t destCapacity = srcLength + 1;
90   UChar* buffer = new UChar[destCapacity];
91   UCharString dest;
92   if (buffer == NULL) {
93     return dest;
94   }
95   int32_t destLength;
96   UErrorCode errorCode = U_ZERO_ERROR;
97   u_strFromUTF8WithSub(buffer, destCapacity, &destLength, src, srcLength,
98                        0xfffd, // Unicode replacement character
99                        NULL,
100                        &errorCode);
101   if (U_SUCCESS(errorCode)) {
102     dest.append(buffer, destLength);
103   }
104   delete[] buffer;
105   return dest;
106 }
107 
CaseCompareUtf16(const UCharString & str1,const UCharString & str2,uint32_t option)108 int32_t CaseCompareUtf16(const UCharString& str1,
109                          const UCharString& str2, uint32_t option) {
110   UErrorCode errorCode = U_ZERO_ERROR;
111   return u_strCaseCompare(str1.c_str(), str1.length(), str2.c_str(),
112                           str2.length(), option, &errorCode);
113 }
114 
HashCode(int32_t platform_id,int32_t encoding_id,int32_t language_id,int32_t name_id)115 int32_t HashCode(int32_t platform_id, int32_t encoding_id, int32_t language_id,
116                  int32_t name_id) {
117   int32_t result = platform_id << 24 | encoding_id << 16 | language_id << 8;
118   if (name_id == NameId::kFullFontName) {
119     result |= 0xff;
120   } else if (name_id == NameId::kPreferredFamily ||
121              name_id == NameId::kPreferredSubfamily) {
122     result |= 0xf;
123   } else if (name_id == NameId::kWWSFamilyName ||
124              name_id == NameId::kWWSSubfamilyName) {
125     result |= 1;
126   }
127   return result;
128 }
129 
HasName(const char * font_name,Font * font)130 bool HasName(const char* font_name, Font* font) {
131   UCharString font_string = ConvertFromUtf8(font_name);
132   if (font_string.empty())
133     return false;
134   UCharString regular_suffix = ConvertFromUtf8("Regular");
135   UCharString alt_font_string = font_string;
136   alt_font_string += regular_suffix;
137 
138   typedef std::map<int32_t, UCharString> NameMap;
139   NameMap names;
140   NameTablePtr name_table = down_cast<NameTable*>(font->GetTable(Tag::name));
141   if (name_table == NULL) {
142     return false;
143   }
144 
145   for (int32_t i = 0; i < name_table->NameCount(); ++i) {
146     switch (name_table->NameId(i)) {
147       case NameId::kFontFamilyName:
148       case NameId::kFontSubfamilyName:
149       case NameId::kFullFontName:
150       case NameId::kPreferredFamily:
151       case NameId::kPreferredSubfamily:
152       case NameId::kWWSFamilyName:
153       case NameId::kWWSSubfamilyName: {
154         UChar* name_part = name_table->Name(i);
155         if (name_part == NULL) {
156           continue;
157         }
158         int32_t hash_code = HashCode(name_table->PlatformId(i),
159                                      name_table->EncodingId(i),
160                                      name_table->LanguageId(i),
161                                      name_table->NameId(i));
162         ConstructName(name_part, &(names[hash_code]), name_table->NameId(i));
163         delete[] name_part;
164         break;
165       }
166       default:
167         break;
168     }
169   }
170 
171   if (!names.empty()) {
172     for (NameMap::iterator i = names.begin(), e = names.end(); i != e; ++i) {
173       if (CaseCompareUtf16(i->second, font_string, 0) == 0 ||
174           CaseCompareUtf16(i->second, alt_font_string, 0) == 0) {
175         return true;
176       }
177     }
178   }
179   return false;
180 }
181 
FindFont(const char * font_name,const FontArray & font_array)182 Font* FindFont(const char* font_name, const FontArray& font_array) {
183   if (font_array.empty() || font_array[0] == NULL) {
184     return NULL;
185   }
186 
187   if (font_name && strlen(font_name)) {
188     for (FontArray::const_iterator i = font_array.begin(), e = font_array.end();
189          i != e; ++i) {
190       if (HasName(font_name, i->p_)) {
191         return i->p_;
192       }
193     }
194   }
195 
196   return font_array[0].p_;
197 }
198 
ResolveCompositeGlyphs(GlyphTable * glyph_table,LocaTable * loca_table,const unsigned int * glyph_ids,size_t glyph_count,IntegerSet * glyph_id_processed)199 bool ResolveCompositeGlyphs(GlyphTable* glyph_table,
200                             LocaTable* loca_table,
201                             const unsigned int* glyph_ids,
202                             size_t glyph_count,
203                             IntegerSet* glyph_id_processed) {
204   if (glyph_table == NULL || loca_table == NULL ||
205       glyph_ids == NULL || glyph_count == 0 || glyph_id_processed == NULL) {
206     return false;
207   }
208 
209   // Sort and uniquify glyph ids.
210   IntegerSet glyph_id_remaining;
211   glyph_id_remaining.insert(0);  // Always include glyph id 0.
212   for (size_t i = 0; i < glyph_count; ++i) {
213     glyph_id_remaining.insert(glyph_ids[i]);
214   }
215 
216   // Identify if any given glyph id maps to a composite glyph.  If so, include
217   // the glyphs referenced by that composite glyph.
218   while (!glyph_id_remaining.empty()) {
219     IntegerSet comp_glyph_id;
220     for (IntegerSet::iterator i = glyph_id_remaining.begin(),
221                               e = glyph_id_remaining.end(); i != e; ++i) {
222       if (*i < 0 || *i >= loca_table->num_glyphs()) {
223         // Invalid glyph id, ignore.
224         continue;
225       }
226 
227       int32_t length = loca_table->GlyphLength(*i);
228       if (length == 0) {
229         // Empty glyph, ignore.
230         continue;
231       }
232       int32_t offset = loca_table->GlyphOffset(*i);
233 
234       GlyphPtr glyph;
235       glyph.Attach(glyph_table->GetGlyph(offset, length));
236       if (glyph == NULL) {
237         // Error finding glyph, ignore.
238         continue;
239       }
240 
241       if (glyph->GlyphType() == GlyphType::kComposite) {
242         Ptr<GlyphTable::CompositeGlyph> comp_glyph =
243             down_cast<GlyphTable::CompositeGlyph*>(glyph.p_);
244         for (int32_t j = 0; j < comp_glyph->NumGlyphs(); ++j) {
245           int32_t glyph_id = comp_glyph->GlyphIndex(j);
246           if (glyph_id_processed->find(glyph_id) == glyph_id_processed->end() &&
247               glyph_id_remaining.find(glyph_id) == glyph_id_remaining.end()) {
248             comp_glyph_id.insert(comp_glyph->GlyphIndex(j));
249           }
250         }
251       }
252 
253       glyph_id_processed->insert(*i);
254     }
255 
256     glyph_id_remaining.clear();
257     glyph_id_remaining = comp_glyph_id;
258   }
259 
260   return true;
261 }
262 
SetupGlyfBuilders(Font::Builder * font_builder,GlyphTable * glyph_table,LocaTable * loca_table,const IntegerSet & glyph_ids)263 bool SetupGlyfBuilders(Font::Builder* font_builder,
264                        GlyphTable* glyph_table,
265                        LocaTable* loca_table,
266                        const IntegerSet& glyph_ids) {
267   if (!font_builder || !glyph_table || !loca_table) {
268     return false;
269   }
270 
271   GlyphTableBuilderPtr glyph_table_builder =
272       down_cast<GlyphTable::Builder*>(font_builder->NewTableBuilder(Tag::glyf));
273   LocaTableBuilderPtr loca_table_builder =
274       down_cast<LocaTable::Builder*>(font_builder->NewTableBuilder(Tag::loca));
275   if (glyph_table_builder == NULL || loca_table_builder == NULL) {
276     // Out of memory.
277     return false;
278   }
279 
280   // Extract glyphs and setup loca list.
281   IntegerList loca_list;
282   loca_list.resize(loca_table->num_glyphs());
283   loca_list.push_back(0);
284   int32_t last_glyph_id = 0;
285   int32_t last_offset = 0;
286   GlyphTable::GlyphBuilderList* glyph_builders =
287       glyph_table_builder->GlyphBuilders();
288   for (IntegerSet::const_iterator i = glyph_ids.begin(), e = glyph_ids.end();
289                                   i != e; ++i) {
290     int32_t length = loca_table->GlyphLength(*i);
291     int32_t offset = loca_table->GlyphOffset(*i);
292 
293     GlyphPtr glyph;
294     glyph.Attach(glyph_table->GetGlyph(offset, length));
295 
296     // Add glyph to new glyf table.
297     ReadableFontDataPtr data = glyph->ReadFontData();
298     WritableFontDataPtr copy_data;
299     copy_data.Attach(WritableFontData::CreateWritableFontData(data->Length()));
300     data->CopyTo(copy_data);
301     GlyphBuilderPtr glyph_builder;
302     glyph_builder.Attach(glyph_table_builder->GlyphBuilder(copy_data));
303     glyph_builders->push_back(glyph_builder);
304 
305     // Configure loca list.
306     for (int32_t j = last_glyph_id + 1; j <= *i; ++j) {
307       loca_list[j] = last_offset;
308     }
309     last_offset += length;
310     loca_list[*i + 1] = last_offset;
311     last_glyph_id = *i;
312   }
313   for (int32_t j = last_glyph_id + 1; j <= loca_table->num_glyphs(); ++j) {
314     loca_list[j] = last_offset;
315   }
316   loca_table_builder->SetLocaList(&loca_list);
317 
318   return true;
319 }
320 
HasOverlap(int32_t range_begin,int32_t range_end,const IntegerSet & glyph_ids)321 bool HasOverlap(int32_t range_begin, int32_t range_end,
322                 const IntegerSet& glyph_ids) {
323   if (range_begin == range_end) {
324     return glyph_ids.find(range_begin) != glyph_ids.end();
325   } else if (range_end > range_begin) {
326     IntegerSet::const_iterator left = glyph_ids.lower_bound(range_begin);
327     IntegerSet::const_iterator right = glyph_ids.lower_bound(range_end);
328     return right != left;
329   }
330   return false;
331 }
332 
333 // Initialize builder, returns false if glyph_id subset is not covered.
334 // Not thread-safe, caller to ensure object life-time.
InitializeBitmapBuilder(EbdtTable::Builder * ebdt,EblcTable::Builder * eblc,const IntegerSet & glyph_ids)335 bool InitializeBitmapBuilder(EbdtTable::Builder* ebdt, EblcTable::Builder* eblc,
336                              const IntegerSet& glyph_ids) {
337   BitmapLocaList loca_list;
338   BitmapSizeTableBuilderList* strikes = eblc->BitmapSizeBuilders();
339 
340   // Note: Do not call eblc_builder->GenerateLocaList(&loca_list) and then
341   //       ebdt_builder->SetLoca(loca_list).  For fonts like SimSun, there are
342   //       >28K glyphs inside, where a typical usage will be <1K glyphs.  Doing
343   //       the calls improperly will result in creation of >100K objects that
344   //       will be destroyed immediately, inducing significant slowness.
345   IntegerList removed_strikes;
346   for (size_t i = 0; i < strikes->size(); i++) {
347     if (!HasOverlap((*strikes)[i]->StartGlyphIndex(),
348                     (*strikes)[i]->EndGlyphIndex(), glyph_ids)) {
349       removed_strikes.push_back(i);
350       continue;
351     }
352 
353     IndexSubTableBuilderList* index_builders =
354         (*strikes)[i]->IndexSubTableBuilders();
355     IntegerList removed_indexes;
356     BitmapGlyphInfoMap info_map;
357     for (size_t j = 0; j < index_builders->size(); ++j) {
358       if ((*index_builders)[j] == NULL) {
359         // Subtable is malformed, let's just skip it.
360         removed_indexes.push_back(j);
361         continue;
362       }
363       int32_t first_glyph_id = (*index_builders)[j]->first_glyph_index();
364       int32_t last_glyph_id = (*index_builders)[j]->last_glyph_index();
365       if (!HasOverlap(first_glyph_id, last_glyph_id, glyph_ids)) {
366         removed_indexes.push_back(j);
367         continue;
368       }
369       for (IntegerSet::const_iterator gid = glyph_ids.begin(),
370                                       gid_end = glyph_ids.end();
371                                       gid != gid_end; gid++) {
372         if (*gid < first_glyph_id) {
373           continue;
374         }
375         if (*gid > last_glyph_id) {
376           break;
377         }
378         BitmapGlyphInfoPtr info;
379         info.Attach((*index_builders)[j]->GlyphInfo(*gid));
380         if (info && info->length()) {  // Do not include gid without bitmap
381           info_map[*gid] = info;
382         }
383       }
384     }
385     if (!info_map.empty()) {
386       loca_list.push_back(info_map);
387     } else {
388       removed_strikes.push_back(i);  // Detected null entries.
389     }
390 
391     // Remove unused index sub tables
392     for (IntegerList::reverse_iterator j = removed_indexes.rbegin(),
393                                        e = removed_indexes.rend();
394                                        j != e; j++) {
395       index_builders->erase(index_builders->begin() + *j);
396     }
397   }
398   if (removed_strikes.size() == strikes->size() || loca_list.empty()) {
399     return false;
400   }
401 
402   for (IntegerList::reverse_iterator i = removed_strikes.rbegin(),
403                                      e = removed_strikes.rend(); i != e; i++) {
404     strikes->erase(strikes->begin() + *i);
405   }
406 
407   if (strikes->empty()) {  // no glyph covered, can safely drop the builders.
408     return false;
409   }
410 
411   ebdt->SetLoca(&loca_list);
412   ebdt->GlyphBuilders();  // Initialize the builder.
413   return true;
414 }
415 
CopyBigGlyphMetrics(BigGlyphMetrics::Builder * source,BigGlyphMetrics::Builder * target)416 void CopyBigGlyphMetrics(BigGlyphMetrics::Builder* source,
417                          BigGlyphMetrics::Builder* target) {
418   target->SetHeight(static_cast<byte_t>(source->Height()));
419   target->SetWidth(static_cast<byte_t>(source->Width()));
420   target->SetHoriBearingX(static_cast<byte_t>(source->HoriBearingX()));
421   target->SetHoriBearingY(static_cast<byte_t>(source->HoriBearingY()));
422   target->SetHoriAdvance(static_cast<byte_t>(source->HoriAdvance()));
423   target->SetVertBearingX(static_cast<byte_t>(source->VertBearingX()));
424   target->SetVertBearingY(static_cast<byte_t>(source->VertBearingY()));
425   target->SetVertAdvance(static_cast<byte_t>(source->VertAdvance()));
426 }
427 
428 CALLER_ATTACH IndexSubTable::Builder*
ConstructIndexFormat4(IndexSubTable::Builder * b,const BitmapGlyphInfoMap & loca,int32_t * image_data_offset)429 ConstructIndexFormat4(IndexSubTable::Builder* b, const BitmapGlyphInfoMap& loca,
430                       int32_t* image_data_offset) {
431   IndexSubTableFormat4BuilderPtr builder4;
432   builder4.Attach(IndexSubTableFormat4::Builder::CreateBuilder());
433   CodeOffsetPairBuilderList offset_pairs;
434 
435   size_t offset = 0;
436   int32_t lower_bound = b->first_glyph_index();
437   int32_t upper_bound = b->last_glyph_index();
438   int32_t last_gid = -1;
439   BitmapGlyphInfoMap::const_iterator i = loca.lower_bound(lower_bound);
440   BitmapGlyphInfoMap::const_iterator end = loca.end();
441   if (i != end) {
442     last_gid = i->first;
443     builder4->set_first_glyph_index(last_gid);
444     builder4->set_image_format(b->image_format());
445     builder4->set_image_data_offset(*image_data_offset);
446   }
447   for (; i != end; i++) {
448     int32_t gid = i->first;
449     if (gid > upper_bound) {
450       break;
451     }
452     offset_pairs.push_back(
453         IndexSubTableFormat4::CodeOffsetPairBuilder(gid, offset));
454     offset += i->second->length();
455     last_gid = gid;
456   }
457   offset_pairs.push_back(
458       IndexSubTableFormat4::CodeOffsetPairBuilder(-1, offset));
459   builder4->set_last_glyph_index(last_gid);
460   *image_data_offset += offset;
461   builder4->SetOffsetArray(offset_pairs);
462 
463   return builder4.Detach();
464 }
465 
466 CALLER_ATTACH IndexSubTable::Builder*
ConstructIndexFormat5(IndexSubTable::Builder * b,const BitmapGlyphInfoMap & loca,int32_t * image_data_offset)467 ConstructIndexFormat5(IndexSubTable::Builder* b, const BitmapGlyphInfoMap& loca,
468                       int32_t* image_data_offset) {
469   IndexSubTableFormat5BuilderPtr new_builder;
470   new_builder.Attach(IndexSubTableFormat5::Builder::CreateBuilder());
471 
472   // Copy BigMetrics
473   int32_t image_size = 0;
474   if (b->index_format() == IndexSubTable::Format::FORMAT_2) {
475     IndexSubTableFormat2BuilderPtr builder2 =
476       down_cast<IndexSubTableFormat2::Builder*>(b);
477     CopyBigGlyphMetrics(builder2->BigMetrics(), new_builder->BigMetrics());
478     image_size = builder2->ImageSize();
479   } else {
480     IndexSubTableFormat5BuilderPtr builder5 =
481       down_cast<IndexSubTableFormat5::Builder*>(b);
482     BigGlyphMetricsBuilderPtr metrics_builder;
483     CopyBigGlyphMetrics(builder5->BigMetrics(), new_builder->BigMetrics());
484     image_size = builder5->ImageSize();
485   }
486 
487   IntegerList* glyph_array = new_builder->GlyphArray();
488   size_t offset = 0;
489   int32_t lower_bound = b->first_glyph_index();
490   int32_t upper_bound = b->last_glyph_index();
491   int32_t last_gid = -1;
492   BitmapGlyphInfoMap::const_iterator i = loca.lower_bound(lower_bound);
493   BitmapGlyphInfoMap::const_iterator end = loca.end();
494   if (i != end) {
495     last_gid = i->first;
496     new_builder->set_first_glyph_index(last_gid);
497     new_builder->set_image_format(b->image_format());
498     new_builder->set_image_data_offset(*image_data_offset);
499     new_builder->SetImageSize(image_size);
500   }
501   for (; i != end; i++) {
502     int32_t gid = i->first;
503     if (gid > upper_bound) {
504       break;
505     }
506     glyph_array->push_back(gid);
507     offset += i->second->length();
508     last_gid = gid;
509   }
510   new_builder->set_last_glyph_index(last_gid);
511   *image_data_offset += offset;
512   return new_builder.Detach();
513 }
514 
515 CALLER_ATTACH IndexSubTable::Builder*
SubsetIndexSubTable(IndexSubTable::Builder * builder,const BitmapGlyphInfoMap & loca,int32_t * image_data_offset)516 SubsetIndexSubTable(IndexSubTable::Builder* builder,
517                     const BitmapGlyphInfoMap& loca,
518                     int32_t* image_data_offset) {
519   switch (builder->index_format()) {
520     case IndexSubTable::Format::FORMAT_1:
521     case IndexSubTable::Format::FORMAT_3:
522     case IndexSubTable::Format::FORMAT_4:
523       return ConstructIndexFormat4(builder, loca, image_data_offset);
524     case IndexSubTable::Format::FORMAT_2:
525     case IndexSubTable::Format::FORMAT_5:
526       return ConstructIndexFormat5(builder, loca, image_data_offset);
527     default:
528       assert(false);
529       break;
530   }
531   return NULL;
532 }
533 
534 }
535 
536 namespace sfntly {
537 
538 // Not thread-safe, caller to ensure object life-time.
SubsetEBLC(EblcTable::Builder * eblc,const BitmapLocaList & new_loca)539 void SubsetEBLC(EblcTable::Builder* eblc, const BitmapLocaList& new_loca) {
540   BitmapSizeTableBuilderList* size_builders = eblc->BitmapSizeBuilders();
541   if (size_builders == NULL) {
542     return;
543   }
544 
545   int32_t image_data_offset = EbdtTable::Offset::kHeaderLength;
546   for (size_t strike = 0; strike < size_builders->size(); ++strike) {
547     IndexSubTableBuilderList* index_builders =
548         (*size_builders)[strike]->IndexSubTableBuilders();
549     for (size_t index = 0; index < index_builders->size(); ++index) {
550       IndexSubTable::Builder* new_builder_raw =
551           SubsetIndexSubTable((*index_builders)[index], new_loca[strike],
552                               &image_data_offset);
553       if (NULL != new_builder_raw) {
554         (*index_builders)[index].Attach(new_builder_raw);
555       }
556     }
557   }
558 }
559 
560 // EBLC structure (from stuartg)
561 //  header
562 //  bitmapSizeTable[]
563 //    one per strike
564 //    holds strike metrics - sbitLineMetrics
565 //    holds info about indexSubTableArray
566 //  indexSubTableArray[][]
567 //    one per strike and then one per indexSubTable for that strike
568 //    holds info about the indexSubTable
569 //    the indexSubTable entries pointed to can be of different formats
570 //  indexSubTable
571 //    one per indexSubTableArray entry
572 //    tells how to get the glyphs
573 //    may hold the glyph metrics if they are uniform for all the glyphs in range
574 // Please note that the structure can also be
575 //  {indexSubTableArray[], indexSubTables[]}[]
576 //  This way is also legal and in fact how Microsoft fonts are laid out.
577 //
578 // There is nothing that says that the indexSubTableArray entries and/or the
579 // indexSubTable items need to be unique. They may be shared between strikes.
580 //
581 // EBDT structure:
582 //  header
583 //  glyphs
584 //    amorphous blob of data
585 //    different glyphs that are only able to be figured out from the EBLC table
586 //    may hold metrics - depends on the EBLC entry that pointed to them
587 
588 // Subsetting EBLC table (from arthurhsu)
589 //  Most pages use only a fraction (hundreds or less) glyphs out of a given font
590 //  (which can have >20K glyphs for CJK).  It's safe to assume that the subset
591 //  font will have sparse bitmap glyphs.  So we reconstruct the EBLC table as
592 //  format 4 or 5 here.
593 
594 enum BuildersToRemove {
595   kRemoveNone,
596   kRemoveBDAT,
597   kRemoveBDATAndEBDT,
598   kRemoveEBDT
599 };
600 
SetupBitmapBuilders(Font * font,Font::Builder * font_builder,const IntegerSet & glyph_ids)601 int SetupBitmapBuilders(Font* font, Font::Builder* font_builder,
602                         const IntegerSet& glyph_ids) {
603   if (!font || !font_builder) {
604     return false;
605   }
606 
607   // Check if bitmap table exists.
608   EbdtTablePtr ebdt_table = down_cast<EbdtTable*>(font->GetTable(Tag::EBDT));
609   EblcTablePtr eblc_table = down_cast<EblcTable*>(font->GetTable(Tag::EBLC));
610   bool use_ebdt = (ebdt_table != NULL && eblc_table != NULL);
611   if (!use_ebdt) {
612     ebdt_table = down_cast<EbdtTable*>(font->GetTable(Tag::bdat));
613     eblc_table = down_cast<EblcTable*>(font->GetTable(Tag::bloc));
614     if (ebdt_table == NULL || eblc_table == NULL) {
615       return kRemoveNone;
616     }
617   }
618 
619   // If the bitmap table's size is too small, skip subsetting.
620   if (ebdt_table->DataLength() + eblc_table->DataLength() <
621       BITMAP_SIZE_THRESHOLD) {
622     return use_ebdt ? kRemoveBDAT : kRemoveNone;
623   }
624 
625   // Get the builders.
626   EbdtTableBuilderPtr ebdt_table_builder = down_cast<EbdtTable::Builder*>(
627       font_builder->NewTableBuilder(use_ebdt ? Tag::EBDT : Tag::bdat,
628                                     ebdt_table->ReadFontData()));
629   EblcTableBuilderPtr eblc_table_builder = down_cast<EblcTable::Builder*>(
630       font_builder->NewTableBuilder(use_ebdt ? Tag::EBLC : Tag::bloc,
631                                     eblc_table->ReadFontData()));
632   if (ebdt_table_builder == NULL || eblc_table_builder == NULL) {
633     // Out of memory.
634     return use_ebdt ? kRemoveBDAT : kRemoveNone;
635   }
636 
637   if (!InitializeBitmapBuilder(ebdt_table_builder, eblc_table_builder,
638                                glyph_ids)) {
639     // Bitmap tables do not cover the glyphs in our subset.
640     font_builder->RemoveTableBuilder(use_ebdt ? Tag::EBLC : Tag::bloc);
641     font_builder->RemoveTableBuilder(use_ebdt ? Tag::EBDT : Tag::bdat);
642     return use_ebdt ? kRemoveBDATAndEBDT : kRemoveEBDT;
643   }
644 
645   BitmapLocaList new_loca;
646   ebdt_table_builder->GenerateLocaList(&new_loca);
647   SubsetEBLC(eblc_table_builder, new_loca);
648 
649   return use_ebdt ? kRemoveBDAT : kRemoveNone;
650 }
651 
SubsetterImpl()652 SubsetterImpl::SubsetterImpl() {
653 }
654 
~SubsetterImpl()655 SubsetterImpl::~SubsetterImpl() {
656 }
657 
LoadFont(int font_index,const unsigned char * original_font,size_t font_size)658 bool SubsetterImpl::LoadFont(int font_index,
659                              const unsigned char* original_font,
660                              size_t font_size) {
661   MemoryInputStream mis;
662   mis.Attach(original_font, font_size);
663   if (factory_ == NULL) {
664     factory_.Attach(FontFactory::GetInstance());
665   }
666 
667   FontArray font_array;
668   factory_->LoadFonts(&mis, &font_array);
669   if (font_index < 0 || (size_t)font_index >= font_array.size()) {
670     return false;
671   }
672   font_ = font_array[font_index].p_;
673   return font_ != NULL;
674 }
675 
LoadFont(const char * font_name,const unsigned char * original_font,size_t font_size)676 bool SubsetterImpl::LoadFont(const char* font_name,
677                              const unsigned char* original_font,
678                              size_t font_size) {
679   MemoryInputStream mis;
680   mis.Attach(original_font, font_size);
681   if (factory_ == NULL) {
682     factory_.Attach(FontFactory::GetInstance());
683   }
684 
685   FontArray font_array;
686   factory_->LoadFonts(&mis, &font_array);
687   font_ = FindFont(font_name, font_array);
688   if (font_ == NULL) {
689     return false;
690   }
691 
692   return true;
693 }
694 
SubsetFont(const unsigned int * glyph_ids,size_t glyph_count,unsigned char ** output_buffer)695 int SubsetterImpl::SubsetFont(const unsigned int* glyph_ids,
696                               size_t glyph_count,
697                               unsigned char** output_buffer) {
698   if (factory_ == NULL || font_ == NULL) {
699     return -1;
700   }
701 
702   // Find glyf and loca table.
703   GlyphTablePtr glyph_table =
704       down_cast<GlyphTable*>(font_->GetTable(Tag::glyf));
705   LocaTablePtr loca_table = down_cast<LocaTable*>(font_->GetTable(Tag::loca));
706   if (glyph_table == NULL || loca_table == NULL) {
707     // We are not able to subset the font.
708     return 0;
709   }
710 
711   IntegerSet glyph_id_processed;
712   if (!ResolveCompositeGlyphs(glyph_table, loca_table,
713                               glyph_ids, glyph_count, &glyph_id_processed) ||
714       glyph_id_processed.empty()) {
715     return 0;
716   }
717 
718   FontPtr new_font;
719   new_font.Attach(Subset(glyph_id_processed, glyph_table, loca_table));
720   if (new_font == NULL) {
721     return 0;
722   }
723 
724   MemoryOutputStream output_stream;
725   factory_->SerializeFont(new_font, &output_stream);
726   int length = static_cast<int>(output_stream.Size());
727   if (length > 0) {
728     *output_buffer = new unsigned char[length];
729     memcpy(*output_buffer, output_stream.Get(), length);
730   }
731 
732   return length;
733 }
734 
735 // Long comments regarding TTF tables and PDF (from stuartg)
736 //
737 // According to PDF spec 1.4 (section 5.8), the following tables must be
738 // present:
739 //  head, hhea, loca, maxp, cvt, prep, glyf, hmtx, fpgm
740 //  cmap if font is used with a simple font dict and not a CIDFont dict
741 //
742 // Other tables we need to keep for PDF rendering to support zoom in/out:
743 //  bdat, bloc, ebdt, eblc, ebsc, gasp
744 //
745 // Special table:
746 //  CFF - if you have this table then you shouldn't have a glyf table and this
747 //        is the table with all the glyphs.  Shall skip subsetting completely
748 //        since sfntly is not capable of subsetting it for now.
749 //  post - extra info here for printing on PostScript printers but maybe not
750 //         enough to outweigh the space taken by the names
751 //
752 // Tables to break apart:
753 //  name - could throw away all but one language and one platform strings/ might
754 //         throw away some of the name entries
755 //  cmap - could strip out non-needed cmap subtables
756 //       - format 4 subtable can be subsetted as well using sfntly
757 //
758 // Graphite tables:
759 //  silf, glat, gloc, feat - should be okay to strip out
760 //
761 // Tables that can be discarded:
762 //  OS/2 - everything here is for layout and description of the font that is
763 //         elsewhere (some in the PDF objects)
764 //  BASE, GDEF, GSUB, GPOS, JSTF - all used for layout
765 //  kern - old style layout
766 //  DSIG - this will be invalid after subsetting
767 //  hdmx - layout
768 //  PCLT - metadata that's not needed
769 //  vmtx - layout
770 //  vhea - layout
771 //  VDMX
772 //  VORG - not used by TT/OT - used by CFF
773 //  hsty - would be surprised to see one of these - used on the Newton
774 //  AAT tables - mort, morx, feat, acnt, bsin, just, lcar, fdsc, fmtx, prop,
775 //               Zapf, opbd, trak, fvar, gvar, avar, cvar
776 //             - these are all layout tables and once layout happens are not
777 //               needed anymore
778 //  LTSH - layout
779 
780 CALLER_ATTACH
Subset(const IntegerSet & glyph_ids,GlyphTable * glyf,LocaTable * loca)781 Font* SubsetterImpl::Subset(const IntegerSet& glyph_ids, GlyphTable* glyf,
782                             LocaTable* loca) {
783   // The const is initialized here to workaround VC bug of rendering all Tag::*
784   // as 0.  These tags represents the TTF tables that we will embed in subset
785   // font.
786   const int32_t TABLES_IN_SUBSET[] = {
787     Tag::head, Tag::hhea, Tag::loca, Tag::maxp, Tag::cvt,
788     Tag::prep, Tag::glyf, Tag::hmtx, Tag::fpgm, Tag::EBDT,
789     Tag::EBLC, Tag::EBSC, Tag::bdat, Tag::bloc, Tag::bhed,
790     Tag::cmap,  // Keep here for future tagged PDF development.
791     Tag::name,  // Keep here due to legal concerns: copyright info inside.
792   };
793 
794   // Setup font builders we need.
795   FontBuilderPtr font_builder;
796   font_builder.Attach(factory_->NewFontBuilder());
797   IntegerSet remove_tags;
798 
799   if (SetupGlyfBuilders(font_builder, glyf, loca, glyph_ids)) {
800     remove_tags.insert(Tag::glyf);
801     remove_tags.insert(Tag::loca);
802   }
803 
804   // For old Apple bitmap fonts, they have only bdats and bhed is identical
805   // to head.  As a result, we can't remove bdat tables for those fonts.
806   int setup_result = SetupBitmapBuilders(font_, font_builder, glyph_ids);
807   if (setup_result == kRemoveBDATAndEBDT || setup_result == kRemoveEBDT) {
808     remove_tags.insert(Tag::EBDT);
809     remove_tags.insert(Tag::EBLC);
810     remove_tags.insert(Tag::EBSC);
811   }
812 
813   if (setup_result == kRemoveBDAT || setup_result == kRemoveBDATAndEBDT) {
814     remove_tags.insert(Tag::bdat);
815     remove_tags.insert(Tag::bloc);
816     remove_tags.insert(Tag::bhed);
817   }
818 
819   IntegerSet allowed_tags;
820   for (size_t i = 0; i < sizeof(TABLES_IN_SUBSET) / sizeof(int32_t); ++i) {
821     allowed_tags.insert(TABLES_IN_SUBSET[i]);
822   }
823 
824   IntegerSet result;
825   std::set_difference(allowed_tags.begin(), allowed_tags.end(),
826                       remove_tags.begin(), remove_tags.end(),
827                       std::inserter(result, result.end()));
828   allowed_tags = result;
829 
830   // Setup remaining builders.
831   for (IntegerSet::iterator i = allowed_tags.begin(), e = allowed_tags.end();
832                             i != e; ++i) {
833     Table* table = font_->GetTable(*i);
834     if (table) {
835       font_builder->NewTableBuilder(*i, table->ReadFontData());
836     }
837   }
838 
839   return font_builder->Build();
840 }
841 
842 }  // namespace sfntly
843