1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/font/cpdf_cidfont.h"
8 
9 #include <algorithm>
10 #include <limits>
11 #include <vector>
12 
13 #include "build/build_config.h"
14 #include "core/fpdfapi/cmaps/fpdf_cmaps.h"
15 #include "core/fpdfapi/font/cfx_cttgsubtable.h"
16 #include "core/fpdfapi/font/cpdf_cid2unicodemap.h"
17 #include "core/fpdfapi/font/cpdf_cmap.h"
18 #include "core/fpdfapi/font/cpdf_cmapparser.h"
19 #include "core/fpdfapi/font/cpdf_fontencoding.h"
20 #include "core/fpdfapi/font/cpdf_fontglobals.h"
21 #include "core/fpdfapi/parser/cpdf_array.h"
22 #include "core/fpdfapi/parser/cpdf_dictionary.h"
23 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
24 #include "core/fxge/fx_font.h"
25 #include "third_party/base/numerics/safe_math.h"
26 #include "third_party/base/ptr_util.h"
27 #include "third_party/base/span.h"
28 #include "third_party/base/stl_util.h"
29 
30 namespace {
31 
32 const uint16_t g_CharsetCPs[CIDSET_NUM_SETS] = {0, 936, 950, 932, 949, 1200};
33 
34 const struct CIDTransform {
35   uint16_t cid;
36   uint8_t a;
37   uint8_t b;
38   uint8_t c;
39   uint8_t d;
40   uint8_t e;
41   uint8_t f;
42 } g_Japan1_VertCIDs[] = {
43     {97, 129, 0, 0, 127, 55, 0},     {7887, 127, 0, 0, 127, 76, 89},
44     {7888, 127, 0, 0, 127, 79, 94},  {7889, 0, 129, 127, 0, 17, 127},
45     {7890, 0, 129, 127, 0, 17, 127}, {7891, 0, 129, 127, 0, 17, 127},
46     {7892, 0, 129, 127, 0, 17, 127}, {7893, 0, 129, 127, 0, 17, 127},
47     {7894, 0, 129, 127, 0, 17, 127}, {7895, 0, 129, 127, 0, 17, 127},
48     {7896, 0, 129, 127, 0, 17, 127}, {7897, 0, 129, 127, 0, 17, 127},
49     {7898, 0, 129, 127, 0, 17, 127}, {7899, 0, 129, 127, 0, 17, 104},
50     {7900, 0, 129, 127, 0, 17, 127}, {7901, 0, 129, 127, 0, 17, 104},
51     {7902, 0, 129, 127, 0, 17, 127}, {7903, 0, 129, 127, 0, 17, 127},
52     {7904, 0, 129, 127, 0, 17, 127}, {7905, 0, 129, 127, 0, 17, 114},
53     {7906, 0, 129, 127, 0, 17, 127}, {7907, 0, 129, 127, 0, 17, 127},
54     {7908, 0, 129, 127, 0, 17, 127}, {7909, 0, 129, 127, 0, 17, 127},
55     {7910, 0, 129, 127, 0, 17, 127}, {7911, 0, 129, 127, 0, 17, 127},
56     {7912, 0, 129, 127, 0, 17, 127}, {7913, 0, 129, 127, 0, 17, 127},
57     {7914, 0, 129, 127, 0, 17, 127}, {7915, 0, 129, 127, 0, 17, 114},
58     {7916, 0, 129, 127, 0, 17, 127}, {7917, 0, 129, 127, 0, 17, 127},
59     {7918, 127, 0, 0, 127, 18, 25},  {7919, 127, 0, 0, 127, 18, 25},
60     {7920, 127, 0, 0, 127, 18, 25},  {7921, 127, 0, 0, 127, 18, 25},
61     {7922, 127, 0, 0, 127, 18, 25},  {7923, 127, 0, 0, 127, 18, 25},
62     {7924, 127, 0, 0, 127, 18, 25},  {7925, 127, 0, 0, 127, 18, 25},
63     {7926, 127, 0, 0, 127, 18, 25},  {7927, 127, 0, 0, 127, 18, 25},
64     {7928, 127, 0, 0, 127, 18, 25},  {7929, 127, 0, 0, 127, 18, 25},
65     {7930, 127, 0, 0, 127, 18, 25},  {7931, 127, 0, 0, 127, 18, 25},
66     {7932, 127, 0, 0, 127, 18, 25},  {7933, 127, 0, 0, 127, 18, 25},
67     {7934, 127, 0, 0, 127, 18, 25},  {7935, 127, 0, 0, 127, 18, 25},
68     {7936, 127, 0, 0, 127, 18, 25},  {7937, 127, 0, 0, 127, 18, 25},
69     {7938, 127, 0, 0, 127, 18, 25},  {7939, 127, 0, 0, 127, 18, 25},
70     {8720, 0, 129, 127, 0, 19, 102}, {8721, 0, 129, 127, 0, 13, 127},
71     {8722, 0, 129, 127, 0, 19, 108}, {8723, 0, 129, 127, 0, 19, 102},
72     {8724, 0, 129, 127, 0, 19, 102}, {8725, 0, 129, 127, 0, 19, 102},
73     {8726, 0, 129, 127, 0, 19, 102}, {8727, 0, 129, 127, 0, 19, 102},
74     {8728, 0, 129, 127, 0, 19, 114}, {8729, 0, 129, 127, 0, 19, 114},
75     {8730, 0, 129, 127, 0, 38, 108}, {8731, 0, 129, 127, 0, 13, 108},
76     {8732, 0, 129, 127, 0, 19, 108}, {8733, 0, 129, 127, 0, 19, 108},
77     {8734, 0, 129, 127, 0, 19, 108}, {8735, 0, 129, 127, 0, 19, 108},
78     {8736, 0, 129, 127, 0, 19, 102}, {8737, 0, 129, 127, 0, 19, 102},
79     {8738, 0, 129, 127, 0, 19, 102}, {8739, 0, 129, 127, 0, 19, 102},
80     {8740, 0, 129, 127, 0, 19, 102}, {8741, 0, 129, 127, 0, 19, 102},
81     {8742, 0, 129, 127, 0, 19, 102}, {8743, 0, 129, 127, 0, 19, 102},
82     {8744, 0, 129, 127, 0, 19, 102}, {8745, 0, 129, 127, 0, 19, 102},
83     {8746, 0, 129, 127, 0, 19, 114}, {8747, 0, 129, 127, 0, 19, 114},
84     {8748, 0, 129, 127, 0, 19, 102}, {8749, 0, 129, 127, 0, 19, 102},
85     {8750, 0, 129, 127, 0, 19, 102}, {8751, 0, 129, 127, 0, 19, 102},
86     {8752, 0, 129, 127, 0, 19, 102}, {8753, 0, 129, 127, 0, 19, 102},
87     {8754, 0, 129, 127, 0, 19, 102}, {8755, 0, 129, 127, 0, 19, 102},
88     {8756, 0, 129, 127, 0, 19, 102}, {8757, 0, 129, 127, 0, 19, 102},
89     {8758, 0, 129, 127, 0, 19, 102}, {8759, 0, 129, 127, 0, 19, 102},
90     {8760, 0, 129, 127, 0, 19, 102}, {8761, 0, 129, 127, 0, 19, 102},
91     {8762, 0, 129, 127, 0, 19, 102}, {8763, 0, 129, 127, 0, 19, 102},
92     {8764, 0, 129, 127, 0, 19, 102}, {8765, 0, 129, 127, 0, 19, 102},
93     {8766, 0, 129, 127, 0, 19, 102}, {8767, 0, 129, 127, 0, 19, 102},
94     {8768, 0, 129, 127, 0, 19, 102}, {8769, 0, 129, 127, 0, 19, 102},
95     {8770, 0, 129, 127, 0, 19, 102}, {8771, 0, 129, 127, 0, 19, 102},
96     {8772, 0, 129, 127, 0, 19, 102}, {8773, 0, 129, 127, 0, 19, 102},
97     {8774, 0, 129, 127, 0, 19, 102}, {8775, 0, 129, 127, 0, 19, 102},
98     {8776, 0, 129, 127, 0, 19, 102}, {8777, 0, 129, 127, 0, 19, 102},
99     {8778, 0, 129, 127, 0, 19, 102}, {8779, 0, 129, 127, 0, 19, 114},
100     {8780, 0, 129, 127, 0, 19, 108}, {8781, 0, 129, 127, 0, 19, 114},
101     {8782, 0, 129, 127, 0, 13, 114}, {8783, 0, 129, 127, 0, 19, 108},
102     {8784, 0, 129, 127, 0, 13, 114}, {8785, 0, 129, 127, 0, 19, 108},
103     {8786, 0, 129, 127, 0, 19, 108}, {8787, 0, 129, 127, 0, 19, 108},
104     {8788, 0, 129, 127, 0, 19, 108}, {8789, 0, 129, 127, 0, 19, 108},
105     {8790, 0, 129, 127, 0, 19, 108}, {8791, 0, 129, 127, 0, 19, 108},
106     {8792, 0, 129, 127, 0, 19, 108}, {8793, 0, 129, 127, 0, 19, 108},
107     {8794, 0, 129, 127, 0, 19, 108}, {8795, 0, 129, 127, 0, 19, 108},
108     {8796, 0, 129, 127, 0, 19, 108}, {8797, 0, 129, 127, 0, 19, 108},
109     {8798, 0, 129, 127, 0, 19, 108}, {8799, 0, 129, 127, 0, 19, 108},
110     {8800, 0, 129, 127, 0, 19, 108}, {8801, 0, 129, 127, 0, 19, 108},
111     {8802, 0, 129, 127, 0, 19, 108}, {8803, 0, 129, 127, 0, 19, 108},
112     {8804, 0, 129, 127, 0, 19, 108}, {8805, 0, 129, 127, 0, 19, 108},
113     {8806, 0, 129, 127, 0, 19, 108}, {8807, 0, 129, 127, 0, 19, 108},
114     {8808, 0, 129, 127, 0, 19, 108}, {8809, 0, 129, 127, 0, 19, 108},
115     {8810, 0, 129, 127, 0, 19, 108}, {8811, 0, 129, 127, 0, 19, 114},
116     {8812, 0, 129, 127, 0, 19, 102}, {8813, 0, 129, 127, 0, 19, 114},
117     {8814, 0, 129, 127, 0, 76, 102}, {8815, 0, 129, 127, 0, 13, 121},
118     {8816, 0, 129, 127, 0, 19, 114}, {8817, 0, 129, 127, 0, 19, 127},
119     {8818, 0, 129, 127, 0, 19, 114}, {8819, 0, 129, 127, 0, 218, 108},
120 };
121 
122 // Boundary values to avoid integer overflow when multiplied by 1000.
123 constexpr long kMinCBox = -2147483;
124 constexpr long kMaxCBox = 2147483;
125 
126 // Boundary value to avoid integer overflow when adding 1/64th of the value.
127 constexpr int kMaxRectTop = 2114445437;
128 
129 #if !defined(OS_WIN)
130 
IsValidEmbeddedCharcodeFromUnicodeCharset(CIDSet charset)131 bool IsValidEmbeddedCharcodeFromUnicodeCharset(CIDSet charset) {
132   switch (charset) {
133     case CIDSET_GB1:
134     case CIDSET_CNS1:
135     case CIDSET_JAPAN1:
136     case CIDSET_KOREA1:
137       return true;
138 
139     default:
140       return false;
141   }
142 }
143 
EmbeddedUnicodeFromCharcode(const FXCMAP_CMap * pEmbedMap,CIDSet charset,uint32_t charcode)144 wchar_t EmbeddedUnicodeFromCharcode(const FXCMAP_CMap* pEmbedMap,
145                                     CIDSet charset,
146                                     uint32_t charcode) {
147   if (!IsValidEmbeddedCharcodeFromUnicodeCharset(charset))
148     return 0;
149 
150   uint16_t cid = CIDFromCharCode(pEmbedMap, charcode);
151   if (!cid)
152     return 0;
153 
154   pdfium::span<const uint16_t> map =
155       CPDF_FontGlobals::GetInstance()->GetEmbeddedToUnicode(charset);
156   return cid < map.size() ? map[cid] : 0;
157 }
158 
EmbeddedCharcodeFromUnicode(const FXCMAP_CMap * pEmbedMap,CIDSet charset,wchar_t unicode)159 uint32_t EmbeddedCharcodeFromUnicode(const FXCMAP_CMap* pEmbedMap,
160                                      CIDSet charset,
161                                      wchar_t unicode) {
162   if (!IsValidEmbeddedCharcodeFromUnicodeCharset(charset))
163     return 0;
164 
165   pdfium::span<const uint16_t> map =
166       CPDF_FontGlobals::GetInstance()->GetEmbeddedToUnicode(charset);
167   for (uint32_t i = 0; i < map.size(); ++i) {
168     if (map[i] == unicode) {
169       uint32_t charCode = CharCodeFromCID(pEmbedMap, i);
170       if (charCode)
171         return charCode;
172     }
173   }
174   return 0;
175 }
176 
177 #endif  // !defined(OS_WIN)
178 
FT_UseCIDCharmap(FXFT_FaceRec * face,int coding)179 void FT_UseCIDCharmap(FXFT_FaceRec* face, int coding) {
180   int encoding;
181   switch (coding) {
182     case CIDCODING_GB:
183       encoding = FT_ENCODING_GB2312;
184       break;
185     case CIDCODING_BIG5:
186       encoding = FT_ENCODING_BIG5;
187       break;
188     case CIDCODING_JIS:
189       encoding = FT_ENCODING_SJIS;
190       break;
191     case CIDCODING_KOREA:
192       encoding = FT_ENCODING_JOHAB;
193       break;
194     default:
195       encoding = FT_ENCODING_UNICODE;
196   }
197   int err = FXFT_Select_Charmap(face, encoding);
198   if (err)
199     err = FXFT_Select_Charmap(face, FT_ENCODING_UNICODE);
200   if (err && FXFT_Get_Face_Charmaps(face))
201     FT_Set_Charmap(face, *FXFT_Get_Face_Charmaps(face));
202 }
203 
IsMetricForCID(const uint32_t * pEntry,uint16_t CID)204 bool IsMetricForCID(const uint32_t* pEntry, uint16_t CID) {
205   return pEntry[0] <= CID && pEntry[1] >= CID;
206 }
207 
208 }  // namespace
209 
CPDF_CIDFont(CPDF_Document * pDocument,CPDF_Dictionary * pFontDict)210 CPDF_CIDFont::CPDF_CIDFont(CPDF_Document* pDocument, CPDF_Dictionary* pFontDict)
211     : CPDF_Font(pDocument, pFontDict) {
212   for (size_t i = 0; i < FX_ArraySize(m_CharBBox); ++i)
213     m_CharBBox[i] = FX_RECT(-1, -1, -1, -1);
214 }
215 
~CPDF_CIDFont()216 CPDF_CIDFont::~CPDF_CIDFont() {}
217 
IsCIDFont() const218 bool CPDF_CIDFont::IsCIDFont() const {
219   return true;
220 }
221 
AsCIDFont() const222 const CPDF_CIDFont* CPDF_CIDFont::AsCIDFont() const {
223   return this;
224 }
225 
AsCIDFont()226 CPDF_CIDFont* CPDF_CIDFont::AsCIDFont() {
227   return this;
228 }
229 
CIDFromCharCode(uint32_t charcode) const230 uint16_t CPDF_CIDFont::CIDFromCharCode(uint32_t charcode) const {
231   return m_pCMap ? m_pCMap->CIDFromCharCode(charcode)
232                  : static_cast<uint16_t>(charcode);
233 }
234 
IsVertWriting() const235 bool CPDF_CIDFont::IsVertWriting() const {
236   return m_pCMap && m_pCMap->IsVertWriting();
237 }
238 
UnicodeFromCharCode(uint32_t charcode) const239 WideString CPDF_CIDFont::UnicodeFromCharCode(uint32_t charcode) const {
240   WideString str = CPDF_Font::UnicodeFromCharCode(charcode);
241   if (!str.IsEmpty())
242     return str;
243   wchar_t ret = GetUnicodeFromCharCode(charcode);
244   return ret ? ret : WideString();
245 }
246 
GetUnicodeFromCharCode(uint32_t charcode) const247 wchar_t CPDF_CIDFont::GetUnicodeFromCharCode(uint32_t charcode) const {
248   switch (m_pCMap->GetCoding()) {
249     case CIDCODING_UCS2:
250     case CIDCODING_UTF16:
251       return static_cast<wchar_t>(charcode);
252     case CIDCODING_CID:
253       if (!m_pCID2UnicodeMap || !m_pCID2UnicodeMap->IsLoaded())
254         return 0;
255       return m_pCID2UnicodeMap->UnicodeFromCID(static_cast<uint16_t>(charcode));
256   }
257   if (m_pCID2UnicodeMap && m_pCID2UnicodeMap->IsLoaded() && m_pCMap->IsLoaded())
258     return m_pCID2UnicodeMap->UnicodeFromCID(CIDFromCharCode(charcode));
259 
260 #if defined(OS_WIN)
261   wchar_t unicode;
262   int charsize = 1;
263   if (charcode > 255) {
264     charcode = (charcode % 256) * 256 + (charcode / 256);
265     charsize = 2;
266   }
267   int ret = FXSYS_MultiByteToWideChar(g_CharsetCPs[m_pCMap->GetCoding()], 0,
268                                       reinterpret_cast<const char*>(&charcode),
269                                       charsize, &unicode, 1);
270   return ret == 1 ? unicode : 0;
271 #else
272   if (!m_pCMap->GetEmbedMap())
273     return 0;
274   return EmbeddedUnicodeFromCharcode(m_pCMap->GetEmbedMap(),
275                                      m_pCMap->GetCharset(), charcode);
276 #endif
277 }
278 
CharCodeFromUnicode(wchar_t unicode) const279 uint32_t CPDF_CIDFont::CharCodeFromUnicode(wchar_t unicode) const {
280   uint32_t charcode = CPDF_Font::CharCodeFromUnicode(unicode);
281   if (charcode)
282     return charcode;
283   switch (m_pCMap->GetCoding()) {
284     case CIDCODING_UNKNOWN:
285       return 0;
286     case CIDCODING_UCS2:
287     case CIDCODING_UTF16:
288       return unicode;
289     case CIDCODING_CID: {
290       if (!m_pCID2UnicodeMap || !m_pCID2UnicodeMap->IsLoaded())
291         return 0;
292       uint32_t CID = 0;
293       while (CID < 65536) {
294         wchar_t this_unicode =
295             m_pCID2UnicodeMap->UnicodeFromCID(static_cast<uint16_t>(CID));
296         if (this_unicode == unicode)
297           return CID;
298         CID++;
299       }
300       break;
301     }
302   }
303 
304   if (unicode < 0x80)
305     return static_cast<uint32_t>(unicode);
306   if (m_pCMap->GetCoding() == CIDCODING_CID)
307     return 0;
308 #if defined(OS_WIN)
309   uint8_t buffer[32];
310   int ret = FXSYS_WideCharToMultiByte(
311       g_CharsetCPs[m_pCMap->GetCoding()], 0, &unicode, 1,
312       reinterpret_cast<char*>(buffer), 4, nullptr, nullptr);
313   if (ret == 1)
314     return buffer[0];
315   if (ret == 2)
316     return buffer[0] * 256 + buffer[1];
317 #else
318   if (m_pCMap->GetEmbedMap()) {
319     return EmbeddedCharcodeFromUnicode(m_pCMap->GetEmbedMap(),
320                                        m_pCMap->GetCharset(), unicode);
321   }
322 #endif
323   return 0;
324 }
325 
Load()326 bool CPDF_CIDFont::Load() {
327   if (m_pFontDict->GetStringFor("Subtype") == "TrueType") {
328     LoadGB2312();
329     return true;
330   }
331 
332   const CPDF_Array* pFonts = m_pFontDict->GetArrayFor("DescendantFonts");
333   if (!pFonts || pFonts->size() != 1)
334     return false;
335 
336   const CPDF_Dictionary* pCIDFontDict = pFonts->GetDictAt(0);
337   if (!pCIDFontDict)
338     return false;
339 
340   m_BaseFontName = pCIDFontDict->GetStringFor("BaseFont");
341   if ((m_BaseFontName.Compare("CourierStd") == 0 ||
342        m_BaseFontName.Compare("CourierStd-Bold") == 0 ||
343        m_BaseFontName.Compare("CourierStd-BoldOblique") == 0 ||
344        m_BaseFontName.Compare("CourierStd-Oblique") == 0) &&
345       !IsEmbedded()) {
346     m_bAdobeCourierStd = true;
347   }
348 
349   CPDF_Object* pEncoding = m_pFontDict->GetDirectObjectFor("Encoding");
350   if (!pEncoding)
351     return false;
352 
353   ByteString subtype = pCIDFontDict->GetStringFor("Subtype");
354   m_bType1 = (subtype == "CIDFontType0");
355 
356   CPDF_CMapManager* manager = CPDF_FontGlobals::GetInstance()->GetCMapManager();
357   if (pEncoding->IsName()) {
358     ByteString cmap = pEncoding->GetString();
359     m_pCMap = manager->GetPredefinedCMap(cmap);
360   } else if (CPDF_Stream* pStream = pEncoding->AsStream()) {
361     auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pStream);
362     pAcc->LoadAllDataFiltered();
363     pdfium::span<const uint8_t> span = pAcc->GetSpan();
364     m_pCMap = pdfium::MakeRetain<CPDF_CMap>(span);
365   } else {
366     return false;
367   }
368 
369   const CPDF_Dictionary* pFontDesc = pCIDFontDict->GetDictFor("FontDescriptor");
370   if (pFontDesc)
371     LoadFontDescriptor(pFontDesc);
372 
373   m_Charset = m_pCMap->GetCharset();
374   if (m_Charset == CIDSET_UNKNOWN) {
375     const CPDF_Dictionary* pCIDInfo = pCIDFontDict->GetDictFor("CIDSystemInfo");
376     if (pCIDInfo) {
377       m_Charset = CPDF_CMapParser::CharsetFromOrdering(
378           pCIDInfo->GetStringFor("Ordering").AsStringView());
379     }
380   }
381   if (m_Charset != CIDSET_UNKNOWN) {
382     m_pCID2UnicodeMap = manager->GetCID2UnicodeMap(m_Charset);
383   }
384   if (m_Font.GetFaceRec()) {
385     if (m_bType1)
386       FXFT_Select_Charmap(m_Font.GetFaceRec(), FT_ENCODING_UNICODE);
387     else
388       FT_UseCIDCharmap(m_Font.GetFaceRec(), m_pCMap->GetCoding());
389   }
390   m_DefaultWidth = pCIDFontDict->GetIntegerFor("DW", 1000);
391   const CPDF_Array* pWidthArray = pCIDFontDict->GetArrayFor("W");
392   if (pWidthArray)
393     LoadMetricsArray(pWidthArray, &m_WidthList, 1);
394   if (!IsEmbedded())
395     LoadSubstFont();
396 
397   const CPDF_Object* pmap = pCIDFontDict->GetDirectObjectFor("CIDToGIDMap");
398   if (pmap) {
399     if (const CPDF_Stream* pStream = pmap->AsStream()) {
400       m_pStreamAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pStream);
401       m_pStreamAcc->LoadAllDataFiltered();
402     } else if (m_pFontFile && pmap->GetString() == "Identity") {
403       m_bCIDIsGID = true;
404     }
405   }
406 
407   CheckFontMetrics();
408   if (IsVertWriting()) {
409     pWidthArray = pCIDFontDict->GetArrayFor("W2");
410     if (pWidthArray)
411       LoadMetricsArray(pWidthArray, &m_VertMetrics, 3);
412     const CPDF_Array* pDefaultArray = pCIDFontDict->GetArrayFor("DW2");
413     if (pDefaultArray) {
414       m_DefaultVY = pDefaultArray->GetIntegerAt(0);
415       m_DefaultW1 = pDefaultArray->GetIntegerAt(1);
416     }
417   }
418   return true;
419 }
420 
GetCharBBox(uint32_t charcode)421 FX_RECT CPDF_CIDFont::GetCharBBox(uint32_t charcode) {
422   if (charcode < 256 && m_CharBBox[charcode].right != -1)
423     return m_CharBBox[charcode];
424 
425   FX_RECT rect;
426   bool bVert = false;
427   int glyph_index = GlyphFromCharCode(charcode, &bVert);
428   FXFT_FaceRec* face = m_Font.GetFaceRec();
429   if (face) {
430     if (FXFT_Is_Face_Tricky(face)) {
431       int err =
432           FT_Load_Glyph(face, glyph_index, FT_LOAD_IGNORE_GLOBAL_ADVANCE_WIDTH);
433       if (!err) {
434         FT_Glyph glyph;
435         err = FT_Get_Glyph(face->glyph, &glyph);
436         if (!err) {
437           FT_BBox cbox;
438           FT_Glyph_Get_CBox(glyph, FT_GLYPH_BBOX_PIXELS, &cbox);
439           cbox.xMin = pdfium::clamp(cbox.xMin, kMinCBox, kMaxCBox);
440           cbox.xMax = pdfium::clamp(cbox.xMax, kMinCBox, kMaxCBox);
441           cbox.yMin = pdfium::clamp(cbox.yMin, kMinCBox, kMaxCBox);
442           cbox.yMax = pdfium::clamp(cbox.yMax, kMinCBox, kMaxCBox);
443           int pixel_size_x = face->size->metrics.x_ppem;
444           int pixel_size_y = face->size->metrics.y_ppem;
445           if (pixel_size_x == 0 || pixel_size_y == 0) {
446             rect = FX_RECT(cbox.xMin, cbox.yMax, cbox.xMax, cbox.yMin);
447           } else {
448             rect = FX_RECT(cbox.xMin * 1000 / pixel_size_x,
449                            cbox.yMax * 1000 / pixel_size_y,
450                            cbox.xMax * 1000 / pixel_size_x,
451                            cbox.yMin * 1000 / pixel_size_y);
452           }
453           rect.top = std::min(rect.top,
454                               static_cast<int>(FXFT_Get_Face_Ascender(face)));
455           rect.bottom = std::max(
456               rect.bottom, static_cast<int>(FXFT_Get_Face_Descender(face)));
457           FT_Done_Glyph(glyph);
458         }
459       }
460     } else {
461       int err = FT_Load_Glyph(face, glyph_index, FT_LOAD_NO_SCALE);
462       if (err == 0) {
463         rect = FX_RECT(TT2PDF(FXFT_Get_Glyph_HoriBearingX(face), face),
464                        TT2PDF(FXFT_Get_Glyph_HoriBearingY(face), face),
465                        TT2PDF(FXFT_Get_Glyph_HoriBearingX(face) +
466                                   FXFT_Get_Glyph_Width(face),
467                               face),
468                        TT2PDF(FXFT_Get_Glyph_HoriBearingY(face) -
469                                   FXFT_Get_Glyph_Height(face),
470                               face));
471         if (rect.top <= kMaxRectTop)
472           rect.top += rect.top / 64;
473         else
474           rect.top = std::numeric_limits<int>::max();
475       }
476     }
477   }
478   if (!m_pFontFile && m_Charset == CIDSET_JAPAN1) {
479     uint16_t CID = CIDFromCharCode(charcode);
480     const uint8_t* pTransform = GetCIDTransform(CID);
481     if (pTransform && !bVert) {
482       CFX_Matrix matrix(CIDTransformToFloat(pTransform[0]),
483                         CIDTransformToFloat(pTransform[1]),
484                         CIDTransformToFloat(pTransform[2]),
485                         CIDTransformToFloat(pTransform[3]),
486                         CIDTransformToFloat(pTransform[4]) * 1000,
487                         CIDTransformToFloat(pTransform[5]) * 1000);
488       rect = matrix.TransformRect(CFX_FloatRect(rect)).GetOuterRect();
489     }
490   }
491   if (charcode < 256)
492     m_CharBBox[charcode] = rect;
493 
494   return rect;
495 }
496 
GetCharWidthF(uint32_t charcode)497 uint32_t CPDF_CIDFont::GetCharWidthF(uint32_t charcode) {
498   if (charcode < 0x80 && m_bAnsiWidthsFixed)
499     return (charcode >= 32 && charcode < 127) ? 500 : 0;
500 
501   uint16_t cid = CIDFromCharCode(charcode);
502   size_t size = m_WidthList.size();
503   const uint32_t* pList = m_WidthList.data();
504   for (size_t i = 0; i < size; i += 3) {
505     const uint32_t* pEntry = pList + i;
506     if (IsMetricForCID(pEntry, cid))
507       return pEntry[2];
508   }
509   return m_DefaultWidth;
510 }
511 
GetVertWidth(uint16_t CID) const512 short CPDF_CIDFont::GetVertWidth(uint16_t CID) const {
513   size_t vertsize = m_VertMetrics.size() / 5;
514   if (vertsize) {
515     const uint32_t* pTable = m_VertMetrics.data();
516     for (size_t i = 0; i < vertsize; i++) {
517       const uint32_t* pEntry = pTable + (i * 5);
518       if (IsMetricForCID(pEntry, CID))
519         return static_cast<short>(pEntry[2]);
520     }
521   }
522   return m_DefaultW1;
523 }
524 
GetVertOrigin(uint16_t CID,short & vx,short & vy) const525 void CPDF_CIDFont::GetVertOrigin(uint16_t CID, short& vx, short& vy) const {
526   size_t vertsize = m_VertMetrics.size() / 5;
527   if (vertsize) {
528     const uint32_t* pTable = m_VertMetrics.data();
529     for (size_t i = 0; i < vertsize; i++) {
530       const uint32_t* pEntry = pTable + (i * 5);
531       if (IsMetricForCID(pEntry, CID)) {
532         vx = static_cast<short>(pEntry[3]);
533         vy = static_cast<short>(pEntry[4]);
534         return;
535       }
536     }
537   }
538   uint32_t dwWidth = m_DefaultWidth;
539   size_t size = m_WidthList.size();
540   const uint32_t* pList = m_WidthList.data();
541   for (size_t i = 0; i < size; i += 3) {
542     const uint32_t* pEntry = pList + i;
543     if (IsMetricForCID(pEntry, CID)) {
544       dwWidth = pEntry[2];
545       break;
546     }
547   }
548   vx = static_cast<short>(dwWidth) / 2;
549   vy = m_DefaultVY;
550 }
551 
GetGlyphIndex(uint32_t unicode,bool * pVertGlyph)552 int CPDF_CIDFont::GetGlyphIndex(uint32_t unicode, bool* pVertGlyph) {
553   if (pVertGlyph)
554     *pVertGlyph = false;
555 
556   FXFT_FaceRec* face = m_Font.GetFaceRec();
557   int index = FT_Get_Char_Index(face, unicode);
558   if (unicode == 0x2502)
559     return index;
560 
561   if (!index || !IsVertWriting())
562     return index;
563 
564   if (m_pTTGSUBTable)
565     return GetVerticalGlyph(index, pVertGlyph);
566 
567   if (!m_Font.GetSubData()) {
568     unsigned long length = 0;
569     int error = FT_Load_Sfnt_Table(face, FT_MAKE_TAG('G', 'S', 'U', 'B'), 0,
570                                    nullptr, &length);
571     if (!error)
572       m_Font.SetSubData(FX_Alloc(uint8_t, length));
573   }
574   int error = FT_Load_Sfnt_Table(face, FT_MAKE_TAG('G', 'S', 'U', 'B'), 0,
575                                  m_Font.GetSubData(), nullptr);
576   if (error || !m_Font.GetSubData())
577     return index;
578 
579   m_pTTGSUBTable = pdfium::MakeUnique<CFX_CTTGSUBTable>(m_Font.GetSubData());
580   return GetVerticalGlyph(index, pVertGlyph);
581 }
582 
GetVerticalGlyph(int index,bool * pVertGlyph)583 int CPDF_CIDFont::GetVerticalGlyph(int index, bool* pVertGlyph) {
584   uint32_t vindex = m_pTTGSUBTable->GetVerticalGlyph(index);
585   if (!vindex)
586     return index;
587 
588   index = vindex;
589   if (pVertGlyph)
590     *pVertGlyph = true;
591   return index;
592 }
593 
GlyphFromCharCode(uint32_t charcode,bool * pVertGlyph)594 int CPDF_CIDFont::GlyphFromCharCode(uint32_t charcode, bool* pVertGlyph) {
595   if (pVertGlyph)
596     *pVertGlyph = false;
597 
598   if (!m_pFontFile && (!m_pStreamAcc || m_pCID2UnicodeMap)) {
599     uint16_t cid = CIDFromCharCode(charcode);
600     wchar_t unicode = 0;
601     if (m_bCIDIsGID) {
602 #if defined(OS_MACOSX)
603       if (FontStyleIsSymbolic(m_Flags))
604         return cid;
605 
606       WideString uni_str = UnicodeFromCharCode(charcode);
607       if (uni_str.IsEmpty())
608         return cid;
609 
610       unicode = uni_str[0];
611 #else
612       return cid;
613 #endif
614     } else {
615       if (cid && m_pCID2UnicodeMap && m_pCID2UnicodeMap->IsLoaded())
616         unicode = m_pCID2UnicodeMap->UnicodeFromCID(cid);
617       if (unicode == 0)
618         unicode = GetUnicodeFromCharCode(charcode);
619       if (unicode == 0) {
620         WideString unicode_str = UnicodeFromCharCode(charcode);
621         if (!unicode_str.IsEmpty())
622           unicode = unicode_str[0];
623       }
624     }
625     FXFT_FaceRec* face = m_Font.GetFaceRec();
626     if (unicode == 0) {
627       if (!m_bAdobeCourierStd)
628         return charcode ? static_cast<int>(charcode) : -1;
629 
630       charcode += 31;
631       bool bMSUnicode = FT_UseTTCharmap(face, 3, 1);
632       bool bMacRoman = !bMSUnicode && FT_UseTTCharmap(face, 1, 0);
633       int iBaseEncoding = PDFFONT_ENCODING_STANDARD;
634       if (bMSUnicode)
635         iBaseEncoding = PDFFONT_ENCODING_WINANSI;
636       else if (bMacRoman)
637         iBaseEncoding = PDFFONT_ENCODING_MACROMAN;
638       const char* name =
639           GetAdobeCharName(iBaseEncoding, std::vector<ByteString>(), charcode);
640       if (!name)
641         return charcode ? static_cast<int>(charcode) : -1;
642 
643       int index = 0;
644       uint16_t name_unicode = PDF_UnicodeFromAdobeName(name);
645       if (!name_unicode)
646         return charcode ? static_cast<int>(charcode) : -1;
647 
648       if (iBaseEncoding == PDFFONT_ENCODING_STANDARD)
649         return FT_Get_Char_Index(face, name_unicode);
650 
651       if (iBaseEncoding == PDFFONT_ENCODING_WINANSI) {
652         index = FT_Get_Char_Index(face, name_unicode);
653       } else {
654         ASSERT(iBaseEncoding == PDFFONT_ENCODING_MACROMAN);
655         uint32_t maccode =
656             FT_CharCodeFromUnicode(FT_ENCODING_APPLE_ROMAN, name_unicode);
657         index = maccode ? FT_Get_Char_Index(face, maccode)
658                         : FXFT_Get_Name_Index(face, name);
659       }
660       if (index == 0 || index == 0xffff)
661         return charcode ? static_cast<int>(charcode) : -1;
662       return index;
663     }
664     if (m_Charset == CIDSET_JAPAN1) {
665       if (unicode == '\\') {
666         unicode = '/';
667 #if !defined(OS_MACOSX)
668       } else if (unicode == 0xa5) {
669         unicode = 0x5c;
670 #endif
671       }
672     }
673     if (!face)
674       return unicode;
675 
676     int err = FXFT_Select_Charmap(face, FT_ENCODING_UNICODE);
677     if (err) {
678       int i;
679       for (i = 0; i < FXFT_Get_Face_CharmapCount(face); i++) {
680         uint32_t ret = FT_CharCodeFromUnicode(
681             FXFT_Get_Charmap_Encoding(FXFT_Get_Face_Charmaps(face)[i]),
682             static_cast<wchar_t>(charcode));
683         if (ret == 0)
684           continue;
685         FT_Set_Charmap(face, FXFT_Get_Face_Charmaps(face)[i]);
686         unicode = static_cast<wchar_t>(ret);
687         break;
688       }
689       if (i == FXFT_Get_Face_CharmapCount(face) && i) {
690         FT_Set_Charmap(face, FXFT_Get_Face_Charmaps(face)[0]);
691         unicode = static_cast<wchar_t>(charcode);
692       }
693     }
694     if (FXFT_Get_Face_Charmap(face)) {
695       int index = GetGlyphIndex(unicode, pVertGlyph);
696       return index != 0 ? index : -1;
697     }
698     return unicode;
699   }
700 
701   if (!m_Font.GetFaceRec())
702     return -1;
703 
704   uint16_t cid = CIDFromCharCode(charcode);
705   if (!m_pStreamAcc) {
706     if (m_bType1)
707       return cid;
708     if (m_pFontFile && m_pCMap->IsDirectCharcodeToCIDTableIsEmpty())
709       return cid;
710     if (m_pCMap->GetCoding() == CIDCODING_UNKNOWN ||
711         !FXFT_Get_Face_Charmap(m_Font.GetFaceRec())) {
712       return cid;
713     }
714     if (FXFT_Get_Charmap_Encoding(FXFT_Get_Face_Charmap(m_Font.GetFaceRec())) ==
715         FT_ENCODING_UNICODE) {
716       WideString unicode_str = UnicodeFromCharCode(charcode);
717       if (unicode_str.IsEmpty())
718         return -1;
719 
720       charcode = unicode_str[0];
721     }
722     return GetGlyphIndex(charcode, pVertGlyph);
723   }
724   uint32_t byte_pos = cid * 2;
725   if (byte_pos + 2 > m_pStreamAcc->GetSize())
726     return -1;
727 
728   const uint8_t* pdata = m_pStreamAcc->GetData() + byte_pos;
729   return pdata[0] * 256 + pdata[1];
730 }
731 
GetNextChar(ByteStringView pString,size_t * pOffset) const732 uint32_t CPDF_CIDFont::GetNextChar(ByteStringView pString,
733                                    size_t* pOffset) const {
734   return m_pCMap->GetNextChar(pString, pOffset);
735 }
736 
GetCharSize(uint32_t charcode) const737 int CPDF_CIDFont::GetCharSize(uint32_t charcode) const {
738   return m_pCMap->GetCharSize(charcode);
739 }
740 
CountChar(ByteStringView pString) const741 size_t CPDF_CIDFont::CountChar(ByteStringView pString) const {
742   return m_pCMap->CountChar(pString);
743 }
744 
AppendChar(char * str,uint32_t charcode) const745 int CPDF_CIDFont::AppendChar(char* str, uint32_t charcode) const {
746   return m_pCMap->AppendChar(str, charcode);
747 }
748 
IsUnicodeCompatible() const749 bool CPDF_CIDFont::IsUnicodeCompatible() const {
750   if (m_pCID2UnicodeMap && m_pCID2UnicodeMap->IsLoaded() && m_pCMap->IsLoaded())
751     return true;
752   return m_pCMap->GetCoding() != CIDCODING_UNKNOWN;
753 }
754 
LoadSubstFont()755 void CPDF_CIDFont::LoadSubstFont() {
756   pdfium::base::CheckedNumeric<int> safeStemV(m_StemV);
757   safeStemV *= 5;
758   m_Font.LoadSubst(m_BaseFontName, !m_bType1, m_Flags,
759                    safeStemV.ValueOrDefault(FXFONT_FW_NORMAL), m_ItalicAngle,
760                    g_CharsetCPs[m_Charset], IsVertWriting());
761 }
762 
LoadMetricsArray(const CPDF_Array * pArray,std::vector<uint32_t> * result,int nElements)763 void CPDF_CIDFont::LoadMetricsArray(const CPDF_Array* pArray,
764                                     std::vector<uint32_t>* result,
765                                     int nElements) {
766   int width_status = 0;
767   int iCurElement = 0;
768   uint32_t first_code = 0;
769   uint32_t last_code = 0;
770   for (size_t i = 0; i < pArray->size(); i++) {
771     const CPDF_Object* pObj = pArray->GetDirectObjectAt(i);
772     if (!pObj)
773       continue;
774 
775     if (const CPDF_Array* pObjArray = pObj->AsArray()) {
776       if (width_status != 1)
777         return;
778       if (first_code >
779           std::numeric_limits<uint32_t>::max() - pObjArray->size()) {
780         width_status = 0;
781         continue;
782       }
783 
784       for (size_t j = 0; j < pObjArray->size(); j += nElements) {
785         result->push_back(first_code);
786         result->push_back(first_code);
787         for (int k = 0; k < nElements; k++)
788           result->push_back(pObjArray->GetIntegerAt(j + k));
789         first_code++;
790       }
791       width_status = 0;
792     } else {
793       if (width_status == 0) {
794         first_code = pObj->GetInteger();
795         width_status = 1;
796       } else if (width_status == 1) {
797         last_code = pObj->GetInteger();
798         width_status = 2;
799         iCurElement = 0;
800       } else {
801         if (!iCurElement) {
802           result->push_back(first_code);
803           result->push_back(last_code);
804         }
805         result->push_back(pObj->GetInteger());
806         iCurElement++;
807         if (iCurElement == nElements)
808           width_status = 0;
809       }
810     }
811   }
812 }
813 
814 // static
CIDTransformToFloat(uint8_t ch)815 float CPDF_CIDFont::CIDTransformToFloat(uint8_t ch) {
816   return (ch < 128 ? ch : ch - 255) * (1.0f / 127);
817 }
818 
LoadGB2312()819 void CPDF_CIDFont::LoadGB2312() {
820   m_BaseFontName = m_pFontDict->GetStringFor("BaseFont");
821   m_Charset = CIDSET_GB1;
822 
823   CPDF_CMapManager* manager = CPDF_FontGlobals::GetInstance()->GetCMapManager();
824   m_pCMap = manager->GetPredefinedCMap("GBK-EUC-H");
825   m_pCID2UnicodeMap = manager->GetCID2UnicodeMap(m_Charset);
826   const CPDF_Dictionary* pFontDesc = m_pFontDict->GetDictFor("FontDescriptor");
827   if (pFontDesc)
828     LoadFontDescriptor(pFontDesc);
829 
830   if (!IsEmbedded())
831     LoadSubstFont();
832   CheckFontMetrics();
833   m_bAnsiWidthsFixed = true;
834 }
835 
GetCIDTransform(uint16_t CID) const836 const uint8_t* CPDF_CIDFont::GetCIDTransform(uint16_t CID) const {
837   if (m_Charset != CIDSET_JAPAN1 || m_pFontFile)
838     return nullptr;
839 
840   const auto* pEnd = g_Japan1_VertCIDs + FX_ArraySize(g_Japan1_VertCIDs);
841   const auto* pTransform = std::lower_bound(
842       g_Japan1_VertCIDs, pEnd, CID,
843       [](const CIDTransform& entry, uint16_t cid) { return entry.cid < cid; });
844   return (pTransform < pEnd && CID == pTransform->cid) ? &pTransform->a
845                                                        : nullptr;
846 }
847