1 // Copyright 2017 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fxcrt/cfx_seekablestreamproxy.h"
8 
9 #if _FX_PLATFORM_ == _FX_PLATFORM_WINDOWS_
10 #include <io.h>
11 #endif  // _FX_PLATFORM_ == _FX_PLATFORM_WINDOWS_
12 
13 #include <algorithm>
14 #include <limits>
15 #include <memory>
16 #include <utility>
17 #include <vector>
18 
19 #include "core/fxcrt/cfx_memorystream.h"
20 #include "core/fxcrt/fx_codepage.h"
21 #include "core/fxcrt/fx_extension.h"
22 #include "third_party/base/ptr_util.h"
23 #include "third_party/base/stl_util.h"
24 
25 namespace {
26 
27 // Returns {src bytes consumed, dst bytes produced}.
UTF8Decode(const char * pSrc,size_t srcLen,wchar_t * pDst,size_t dstLen)28 std::pair<size_t, size_t> UTF8Decode(const char* pSrc,
29                                      size_t srcLen,
30                                      wchar_t* pDst,
31                                      size_t dstLen) {
32   ASSERT(pDst && dstLen > 0);
33 
34   if (srcLen < 1)
35     return {0, 0};
36 
37   uint32_t dwCode = 0;
38   int32_t iPending = 0;
39   size_t iSrcNum = 0;
40   size_t iDstNum = 0;
41   size_t iIndex = 0;
42   int32_t k = 1;
43   while (iIndex < srcLen) {
44     uint8_t byte = static_cast<uint8_t>(*(pSrc + iIndex));
45     if (byte < 0x80) {
46       iPending = 0;
47       k = 1;
48       iDstNum++;
49       iSrcNum += k;
50       *pDst++ = byte;
51       if (iDstNum >= dstLen)
52         break;
53     } else if (byte < 0xc0) {
54       if (iPending < 1)
55         break;
56 
57       iPending--;
58       dwCode |= (byte & 0x3f) << (iPending * 6);
59       if (iPending == 0) {
60         iDstNum++;
61         iSrcNum += k;
62         *pDst++ = dwCode;
63         if (iDstNum >= dstLen)
64           break;
65       }
66     } else if (byte < 0xe0) {
67       iPending = 1;
68       k = 2;
69       dwCode = (byte & 0x1f) << 6;
70     } else if (byte < 0xf0) {
71       iPending = 2;
72       k = 3;
73       dwCode = (byte & 0x0f) << 12;
74     } else if (byte < 0xf8) {
75       iPending = 3;
76       k = 4;
77       dwCode = (byte & 0x07) << 18;
78     } else if (byte < 0xfc) {
79       iPending = 4;
80       k = 5;
81       dwCode = (byte & 0x03) << 24;
82     } else if (byte < 0xfe) {
83       iPending = 5;
84       k = 6;
85       dwCode = (byte & 0x01) << 30;
86     } else {
87       break;
88     }
89     iIndex++;
90   }
91   return {iSrcNum, iDstNum};
92 }
93 
UTF16ToWChar(void * pBuffer,size_t iLength)94 void UTF16ToWChar(void* pBuffer, size_t iLength) {
95   ASSERT(pBuffer);
96   ASSERT(iLength > 0);
97   ASSERT(sizeof(wchar_t) > 2);
98 
99   uint16_t* pSrc = static_cast<uint16_t*>(pBuffer);
100   wchar_t* pDst = static_cast<wchar_t*>(pBuffer);
101   for (size_t i = 0; i < iLength; i++)
102     pDst[i] = static_cast<wchar_t>(pSrc[i]);
103 }
104 
SwapByteOrder(wchar_t * pStr,size_t iLength)105 void SwapByteOrder(wchar_t* pStr, size_t iLength) {
106   ASSERT(pStr);
107 
108   uint16_t wch;
109   if (sizeof(wchar_t) > 2) {
110     while (iLength-- > 0) {
111       wch = static_cast<uint16_t>(*pStr);
112       wch = (wch >> 8) | (wch << 8);
113       wch &= 0x00FF;
114       *pStr = wch;
115       ++pStr;
116     }
117     return;
118   }
119 
120   while (iLength-- > 0) {
121     wch = static_cast<uint16_t>(*pStr);
122     wch = (wch >> 8) | (wch << 8);
123     *pStr = wch;
124     ++pStr;
125   }
126 }
127 
128 }  // namespace
129 
130 #define BOM_MASK 0x00FFFFFF
131 #define BOM_UTF8 0x00BFBBEF
132 #define BOM_UTF16_MASK 0x0000FFFF
133 #define BOM_UTF16_BE 0x0000FFFE
134 #define BOM_UTF16_LE 0x0000FEFF
135 
CFX_SeekableStreamProxy(const RetainPtr<IFX_SeekableStream> & stream,bool isWriteStream)136 CFX_SeekableStreamProxy::CFX_SeekableStreamProxy(
137     const RetainPtr<IFX_SeekableStream>& stream,
138     bool isWriteStream)
139     : m_IsWriteStream(isWriteStream),
140       m_wCodePage(FX_CODEPAGE_DefANSI),
141       m_wBOMLength(0),
142       m_iPosition(0),
143       m_pStream(stream) {
144   ASSERT(m_pStream);
145 
146   if (isWriteStream) {
147     m_iPosition = m_pStream->GetSize();
148     return;
149   }
150 
151   Seek(From::Begin, 0);
152 
153   uint32_t bom = 0;
154   ReadData(reinterpret_cast<uint8_t*>(&bom), 3);
155 
156   bom &= BOM_MASK;
157   if (bom == BOM_UTF8) {
158     m_wBOMLength = 3;
159     m_wCodePage = FX_CODEPAGE_UTF8;
160   } else {
161     bom &= BOM_UTF16_MASK;
162     if (bom == BOM_UTF16_BE) {
163       m_wBOMLength = 2;
164       m_wCodePage = FX_CODEPAGE_UTF16BE;
165     } else if (bom == BOM_UTF16_LE) {
166       m_wBOMLength = 2;
167       m_wCodePage = FX_CODEPAGE_UTF16LE;
168     } else {
169       m_wBOMLength = 0;
170       m_wCodePage = FXSYS_GetACP();
171     }
172   }
173 
174   Seek(From::Begin, static_cast<FX_FILESIZE>(m_wBOMLength));
175 }
176 
CFX_SeekableStreamProxy(uint8_t * data,size_t size)177 CFX_SeekableStreamProxy::CFX_SeekableStreamProxy(uint8_t* data, size_t size)
178     : CFX_SeekableStreamProxy(
179           pdfium::MakeRetain<CFX_MemoryStream>(data, size, false),
180           false) {}
181 
~CFX_SeekableStreamProxy()182 CFX_SeekableStreamProxy::~CFX_SeekableStreamProxy() {}
183 
Seek(From eSeek,FX_FILESIZE iOffset)184 void CFX_SeekableStreamProxy::Seek(From eSeek, FX_FILESIZE iOffset) {
185   switch (eSeek) {
186     case From::Begin:
187       m_iPosition = iOffset;
188       break;
189     case From::Current: {
190       pdfium::base::CheckedNumeric<FX_FILESIZE> new_pos = m_iPosition;
191       new_pos += iOffset;
192       m_iPosition =
193           new_pos.ValueOrDefault(std::numeric_limits<FX_FILESIZE>::max());
194     } break;
195   }
196   m_iPosition =
197       pdfium::clamp(m_iPosition, static_cast<FX_FILESIZE>(0), GetLength());
198 }
199 
SetCodePage(uint16_t wCodePage)200 void CFX_SeekableStreamProxy::SetCodePage(uint16_t wCodePage) {
201   if (m_wBOMLength > 0)
202     return;
203   m_wCodePage = wCodePage;
204 }
205 
ReadData(uint8_t * pBuffer,size_t iBufferSize)206 size_t CFX_SeekableStreamProxy::ReadData(uint8_t* pBuffer, size_t iBufferSize) {
207   ASSERT(pBuffer && iBufferSize > 0);
208 
209   if (m_IsWriteStream)
210     return 0;
211 
212   iBufferSize =
213       std::min(iBufferSize, static_cast<size_t>(GetLength() - m_iPosition));
214   if (iBufferSize <= 0)
215     return 0;
216 
217   if (!m_pStream->ReadBlock(pBuffer, m_iPosition, iBufferSize))
218     return 0;
219 
220   pdfium::base::CheckedNumeric<FX_FILESIZE> new_pos = m_iPosition;
221   new_pos += iBufferSize;
222   m_iPosition = new_pos.ValueOrDefault(m_iPosition);
223   return new_pos.IsValid() ? iBufferSize : 0;
224 }
225 
ReadString(wchar_t * pStr,size_t iMaxLength,bool * bEOS)226 size_t CFX_SeekableStreamProxy::ReadString(wchar_t* pStr,
227                                            size_t iMaxLength,
228                                            bool* bEOS) {
229   if (!pStr || iMaxLength == 0)
230     return 0;
231 
232   if (m_IsWriteStream)
233     return 0;
234 
235   if (m_wCodePage == FX_CODEPAGE_UTF16LE ||
236       m_wCodePage == FX_CODEPAGE_UTF16BE) {
237     size_t iBytes = iMaxLength * 2;
238     size_t iLen = ReadData(reinterpret_cast<uint8_t*>(pStr), iBytes);
239     iMaxLength = iLen / 2;
240     if (sizeof(wchar_t) > 2 && iMaxLength > 0)
241       UTF16ToWChar(pStr, iMaxLength);
242 
243     if (m_wCodePage == FX_CODEPAGE_UTF16BE)
244       SwapByteOrder(pStr, iMaxLength);
245 
246   } else {
247     FX_FILESIZE pos = GetPosition();
248     size_t iBytes =
249         std::min(iMaxLength, static_cast<size_t>(GetLength() - pos));
250 
251     if (iBytes > 0) {
252       std::vector<uint8_t> buf(iBytes);
253 
254       size_t iLen = ReadData(buf.data(), iBytes);
255       if (m_wCodePage != FX_CODEPAGE_UTF8)
256         return 0;
257 
258       size_t iSrc = 0;
259       std::tie(iSrc, iMaxLength) = UTF8Decode(
260           reinterpret_cast<const char*>(buf.data()), iLen, pStr, iMaxLength);
261       Seek(From::Current, iSrc - iLen);
262     } else {
263       iMaxLength = 0;
264     }
265   }
266 
267   *bEOS = IsEOF();
268   return iMaxLength;
269 }
270 
WriteString(const WideStringView & str)271 void CFX_SeekableStreamProxy::WriteString(const WideStringView& str) {
272   if (!m_IsWriteStream || str.GetLength() == 0 ||
273       m_wCodePage != FX_CODEPAGE_UTF8) {
274     return;
275   }
276   if (!m_pStream->WriteBlock(str.unterminated_c_str(), m_iPosition,
277                              str.GetLength() * sizeof(wchar_t))) {
278     return;
279   }
280 
281   pdfium::base::CheckedNumeric<FX_FILESIZE> new_pos = m_iPosition;
282   new_pos += str.GetLength() * sizeof(wchar_t);
283   m_iPosition = new_pos.ValueOrDefault(std::numeric_limits<FX_FILESIZE>::max());
284   m_iPosition =
285       pdfium::clamp(m_iPosition, static_cast<FX_FILESIZE>(0), GetLength());
286 }
287