1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/font/cpdf_cmapparser.h"
8 
9 #include <vector>
10 
11 #include "core/fpdfapi/cmaps/cmap_int.h"
12 #include "core/fpdfapi/cpdf_modulemgr.h"
13 #include "core/fpdfapi/page/cpdf_pagemodule.h"
14 #include "core/fpdfapi/parser/cpdf_array.h"
15 #include "core/fpdfapi/parser/cpdf_dictionary.h"
16 #include "core/fpdfapi/parser/cpdf_simple_parser.h"
17 #include "core/fxcrt/fx_extension.h"
18 #include "core/fxge/fx_freetype.h"
19 #include "third_party/base/logging.h"
20 
21 namespace {
22 
23 const char* const g_CharsetNames[CIDSET_NUM_SETS] = {nullptr,  "GB1",    "CNS1",
24                                                      "Japan1", "Korea1", "UCS"};
25 
CIDSetFromSizeT(size_t index)26 CIDSet CIDSetFromSizeT(size_t index) {
27   if (index >= CIDSET_NUM_SETS) {
28     NOTREACHED();
29     return CIDSET_UNKNOWN;
30   }
31   return static_cast<CIDSet>(index);
32 }
33 
CMap_GetString(const ByteStringView & word)34 ByteStringView CMap_GetString(const ByteStringView& word) {
35   if (word.GetLength() <= 2)
36     return ByteStringView();
37   return word.Right(word.GetLength() - 2);
38 }
39 
40 }  // namespace
41 
CPDF_CMapParser(CPDF_CMap * pCMap)42 CPDF_CMapParser::CPDF_CMapParser(CPDF_CMap* pCMap)
43     : m_pCMap(pCMap), m_Status(0), m_CodeSeq(0) {}
44 
~CPDF_CMapParser()45 CPDF_CMapParser::~CPDF_CMapParser() {}
46 
ParseWord(const ByteStringView & word)47 void CPDF_CMapParser::ParseWord(const ByteStringView& word) {
48   if (word.IsEmpty()) {
49     return;
50   }
51   if (word == "begincidchar") {
52     m_Status = 1;
53     m_CodeSeq = 0;
54   } else if (word == "begincidrange") {
55     m_Status = 2;
56     m_CodeSeq = 0;
57   } else if (word == "endcidrange" || word == "endcidchar") {
58     m_Status = 0;
59   } else if (word == "/WMode") {
60     m_Status = 6;
61   } else if (word == "/Registry") {
62     m_Status = 3;
63   } else if (word == "/Ordering") {
64     m_Status = 4;
65   } else if (word == "/Supplement") {
66     m_Status = 5;
67   } else if (word == "begincodespacerange") {
68     m_Status = 7;
69     m_CodeSeq = 0;
70   } else if (word == "usecmap") {
71   } else if (m_Status == 1 || m_Status == 2) {
72     m_CodePoints[m_CodeSeq] = GetCode(word);
73     m_CodeSeq++;
74     uint32_t StartCode, EndCode;
75     uint16_t StartCID;
76     if (m_Status == 1) {
77       if (m_CodeSeq < 2) {
78         return;
79       }
80       EndCode = StartCode = m_CodePoints[0];
81       StartCID = (uint16_t)m_CodePoints[1];
82     } else {
83       if (m_CodeSeq < 3) {
84         return;
85       }
86       StartCode = m_CodePoints[0];
87       EndCode = m_CodePoints[1];
88       StartCID = (uint16_t)m_CodePoints[2];
89     }
90     if (EndCode < 0x10000) {
91       for (uint32_t code = StartCode; code <= EndCode; code++) {
92         m_pCMap->SetDirectCharcodeToCIDTable(
93             code, static_cast<uint16_t>(StartCID + code - StartCode));
94       }
95     } else {
96       m_AdditionalCharcodeToCIDMappings.push_back(
97           {StartCode, EndCode, StartCID});
98     }
99     m_CodeSeq = 0;
100   } else if (m_Status == 3) {
101     m_Status = 0;
102   } else if (m_Status == 4) {
103     m_pCMap->SetCharset(CharsetFromOrdering(CMap_GetString(word)));
104     m_Status = 0;
105   } else if (m_Status == 5) {
106     m_Status = 0;
107   } else if (m_Status == 6) {
108     m_pCMap->SetVertical(GetCode(word) != 0);
109     m_Status = 0;
110   } else if (m_Status == 7) {
111     if (word == "endcodespacerange") {
112       size_t nSegs = m_CodeRanges.size();
113       if (nSegs == 1) {
114         m_pCMap->SetCodingScheme((m_CodeRanges[0].m_CharSize == 2)
115                                      ? CPDF_CMap::TwoBytes
116                                      : CPDF_CMap::OneByte);
117       } else if (nSegs > 1) {
118         m_pCMap->SetCodingScheme(CPDF_CMap::MixedFourBytes);
119         m_pCMap->SetMixedFourByteLeadingRanges(m_CodeRanges);
120       }
121       m_Status = 0;
122     } else {
123       if (word.GetLength() == 0 || word[0] != '<') {
124         return;
125       }
126       if (m_CodeSeq % 2) {
127         CPDF_CMap::CodeRange range;
128         if (GetCodeRange(range, m_LastWord.AsStringView(), word))
129           m_CodeRanges.push_back(range);
130       }
131       m_CodeSeq++;
132     }
133   }
134   m_LastWord = word;
135 }
136 
GetCode(const ByteStringView & word) const137 uint32_t CPDF_CMapParser::GetCode(const ByteStringView& word) const {
138   if (word.IsEmpty())
139     return 0;
140 
141   pdfium::base::CheckedNumeric<uint32_t> num = 0;
142   if (word[0] == '<') {
143     for (size_t i = 1; i < word.GetLength() && std::isxdigit(word[i]); ++i) {
144       num = num * 16 + FXSYS_HexCharToInt(word[i]);
145       if (!num.IsValid())
146         return 0;
147     }
148     return num.ValueOrDie();
149   }
150 
151   for (size_t i = 0; i < word.GetLength() && std::isdigit(word[i]); ++i) {
152     num = num * 10 + FXSYS_DecimalCharToInt(static_cast<wchar_t>(word[i]));
153     if (!num.IsValid())
154       return 0;
155   }
156   return num.ValueOrDie();
157 }
158 
GetCodeRange(CPDF_CMap::CodeRange & range,const ByteStringView & first,const ByteStringView & second) const159 bool CPDF_CMapParser::GetCodeRange(CPDF_CMap::CodeRange& range,
160                                    const ByteStringView& first,
161                                    const ByteStringView& second) const {
162   if (first.GetLength() == 0 || first[0] != '<')
163     return false;
164 
165   size_t i;
166   for (i = 1; i < first.GetLength(); ++i) {
167     if (first[i] == '>') {
168       break;
169     }
170   }
171   range.m_CharSize = (i - 1) / 2;
172   if (range.m_CharSize > 4)
173     return false;
174 
175   for (i = 0; i < range.m_CharSize; ++i) {
176     uint8_t digit1 = first[i * 2 + 1];
177     uint8_t digit2 = first[i * 2 + 2];
178     range.m_Lower[i] =
179         FXSYS_HexCharToInt(digit1) * 16 + FXSYS_HexCharToInt(digit2);
180   }
181 
182   size_t size = second.GetLength();
183   for (i = 0; i < range.m_CharSize; ++i) {
184     uint8_t digit1 = (i * 2 + 1 < size) ? second[i * 2 + 1] : '0';
185     uint8_t digit2 = (i * 2 + 2 < size) ? second[i * 2 + 2] : '0';
186     range.m_Upper[i] =
187         FXSYS_HexCharToInt(digit1) * 16 + FXSYS_HexCharToInt(digit2);
188   }
189   return true;
190 }
191 
192 // static
CharsetFromOrdering(const ByteStringView & ordering)193 CIDSet CPDF_CMapParser::CharsetFromOrdering(const ByteStringView& ordering) {
194   for (size_t charset = 1; charset < FX_ArraySize(g_CharsetNames); ++charset) {
195     if (ordering == g_CharsetNames[charset])
196       return CIDSetFromSizeT(charset);
197   }
198   return CIDSET_UNKNOWN;
199 }
200