1 // Copyright 2017 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/font/cpdf_tounicodemap.h"
8 
9 #include "core/fpdfapi/cpdf_modulemgr.h"
10 #include "core/fpdfapi/font/cpdf_cid2unicodemap.h"
11 #include "core/fpdfapi/page/cpdf_pagemodule.h"
12 #include "core/fpdfapi/parser/cpdf_simple_parser.h"
13 #include "core/fxcrt/fx_extension.h"
14 #include "core/fxcrt/fx_safe_types.h"
15 #include "third_party/base/numerics/safe_conversions.h"
16 
Lookup(uint32_t charcode) const17 WideString CPDF_ToUnicodeMap::Lookup(uint32_t charcode) const {
18   auto it = m_Map.find(charcode);
19   if (it != m_Map.end()) {
20     uint32_t value = it->second;
21     wchar_t unicode = (wchar_t)(value & 0xffff);
22     if (unicode != 0xffff) {
23       return unicode;
24     }
25     const wchar_t* buf = m_MultiCharBuf.GetBuffer();
26     uint32_t buf_len = m_MultiCharBuf.GetLength();
27     if (!buf || buf_len == 0) {
28       return WideString();
29     }
30     uint32_t index = value >> 16;
31     if (index >= buf_len) {
32       return WideString();
33     }
34     uint32_t len = buf[index];
35     if (index + len < index || index + len >= buf_len) {
36       return WideString();
37     }
38     return WideString(buf + index + 1, len);
39   }
40   if (m_pBaseMap) {
41     return m_pBaseMap->UnicodeFromCID((uint16_t)charcode);
42   }
43   return WideString();
44 }
45 
ReverseLookup(wchar_t unicode) const46 uint32_t CPDF_ToUnicodeMap::ReverseLookup(wchar_t unicode) const {
47   for (const auto& pair : m_Map) {
48     if (pair.second == static_cast<uint32_t>(unicode))
49       return pair.first;
50   }
51   return 0;
52 }
53 
54 // Static.
StringToCode(const ByteStringView & str)55 uint32_t CPDF_ToUnicodeMap::StringToCode(const ByteStringView& str) {
56   int len = str.GetLength();
57   if (len == 0)
58     return 0;
59 
60   uint32_t result = 0;
61   if (str[0] == '<') {
62     for (int i = 1; i < len && std::isxdigit(str[i]); ++i)
63       result = result * 16 + FXSYS_HexCharToInt(str.CharAt(i));
64     return result;
65   }
66 
67   for (int i = 0; i < len && std::isdigit(str[i]); ++i)
68     result = result * 10 + FXSYS_DecimalCharToInt(str.CharAt(i));
69 
70   return result;
71 }
72 
StringDataAdd(WideString str)73 static WideString StringDataAdd(WideString str) {
74   WideString ret;
75   int len = str.GetLength();
76   wchar_t value = 1;
77   for (int i = len - 1; i >= 0; --i) {
78     wchar_t ch = str[i] + value;
79     if (ch < str[i]) {
80       ret.InsertAtFront(0);
81     } else {
82       ret.InsertAtFront(ch);
83       value = 0;
84     }
85   }
86   if (value)
87     ret.InsertAtFront(value);
88   return ret;
89 }
90 
91 // Static.
StringToWideString(const ByteStringView & str)92 WideString CPDF_ToUnicodeMap::StringToWideString(const ByteStringView& str) {
93   int len = str.GetLength();
94   if (len == 0)
95     return WideString();
96 
97   WideString result;
98   if (str[0] == '<') {
99     int byte_pos = 0;
100     wchar_t ch = 0;
101     for (int i = 1; i < len && std::isxdigit(str[i]); ++i) {
102       ch = ch * 16 + FXSYS_HexCharToInt(str[i]);
103       byte_pos++;
104       if (byte_pos == 4) {
105         result += ch;
106         byte_pos = 0;
107         ch = 0;
108       }
109     }
110     return result;
111   }
112   return result;
113 }
114 
CPDF_ToUnicodeMap()115 CPDF_ToUnicodeMap::CPDF_ToUnicodeMap() : m_pBaseMap(nullptr) {}
116 
~CPDF_ToUnicodeMap()117 CPDF_ToUnicodeMap::~CPDF_ToUnicodeMap() {}
118 
GetUnicode()119 uint32_t CPDF_ToUnicodeMap::GetUnicode() {
120   FX_SAFE_UINT32 uni = m_MultiCharBuf.GetLength();
121   uni = uni * 0x10000 + 0xffff;
122   return uni.ValueOrDefault(0);
123 }
124 
Load(CPDF_Stream * pStream)125 void CPDF_ToUnicodeMap::Load(CPDF_Stream* pStream) {
126   CIDSet cid_set = CIDSET_UNKNOWN;
127   auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pStream);
128   pAcc->LoadAllDataFiltered();
129   CPDF_SimpleParser parser(pAcc->GetData(), pAcc->GetSize());
130   while (1) {
131     ByteStringView word = parser.GetWord();
132     if (word.IsEmpty()) {
133       break;
134     }
135     if (word == "beginbfchar") {
136       while (1) {
137         word = parser.GetWord();
138         if (word.IsEmpty() || word == "endbfchar") {
139           break;
140         }
141         uint32_t srccode = StringToCode(word);
142         word = parser.GetWord();
143         WideString destcode = StringToWideString(word);
144         int len = destcode.GetLength();
145         if (len == 0) {
146           continue;
147         }
148         if (len == 1) {
149           m_Map[srccode] = destcode[0];
150         } else {
151           m_Map[srccode] = GetUnicode();
152           m_MultiCharBuf.AppendChar(destcode.GetLength());
153           m_MultiCharBuf << destcode;
154         }
155       }
156     } else if (word == "beginbfrange") {
157       while (1) {
158         ByteString low, high;
159         low = parser.GetWord();
160         if (low.IsEmpty() || low == "endbfrange") {
161           break;
162         }
163         high = parser.GetWord();
164         uint32_t lowcode = StringToCode(low.AsStringView());
165         uint32_t highcode =
166             (lowcode & 0xffffff00) | (StringToCode(high.AsStringView()) & 0xff);
167         if (highcode == (uint32_t)-1) {
168           break;
169         }
170         ByteString start(parser.GetWord());
171         if (start == "[") {
172           for (uint32_t code = lowcode; code <= highcode; code++) {
173             ByteString dest(parser.GetWord());
174             WideString destcode = StringToWideString(dest.AsStringView());
175             int len = destcode.GetLength();
176             if (len == 0) {
177               continue;
178             }
179             if (len == 1) {
180               m_Map[code] = destcode[0];
181             } else {
182               m_Map[code] = GetUnicode();
183               m_MultiCharBuf.AppendChar(destcode.GetLength());
184               m_MultiCharBuf << destcode;
185             }
186           }
187           parser.GetWord();
188         } else {
189           WideString destcode = StringToWideString(start.AsStringView());
190           int len = destcode.GetLength();
191           uint32_t value = 0;
192           if (len == 1) {
193             value = StringToCode(start.AsStringView());
194             for (uint32_t code = lowcode; code <= highcode; code++) {
195               m_Map[code] = value++;
196             }
197           } else {
198             for (uint32_t code = lowcode; code <= highcode; code++) {
199               WideString retcode;
200               if (code == lowcode) {
201                 retcode = destcode;
202               } else {
203                 retcode = StringDataAdd(destcode);
204               }
205               m_Map[code] = GetUnicode();
206               m_MultiCharBuf.AppendChar(retcode.GetLength());
207               m_MultiCharBuf << retcode;
208               destcode = retcode;
209             }
210           }
211         }
212       }
213     } else if (word == "/Adobe-Korea1-UCS2") {
214       cid_set = CIDSET_KOREA1;
215     } else if (word == "/Adobe-Japan1-UCS2") {
216       cid_set = CIDSET_JAPAN1;
217     } else if (word == "/Adobe-CNS1-UCS2") {
218       cid_set = CIDSET_CNS1;
219     } else if (word == "/Adobe-GB1-UCS2") {
220       cid_set = CIDSET_GB1;
221     }
222   }
223   if (cid_set) {
224     m_pBaseMap = CPDF_ModuleMgr::Get()
225                      ->GetPageModule()
226                      ->GetFontGlobals()
227                      ->GetCMapManager()
228                      ->GetCID2UnicodeMap(cid_set, false);
229   } else {
230     m_pBaseMap = nullptr;
231   }
232 }
233