1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fxcrt/widestring.h"
8 
9 #include <stddef.h>
10 
11 #include <algorithm>
12 #include <cctype>
13 #include <cwctype>
14 
15 #include "core/fxcrt/fx_codepage.h"
16 #include "core/fxcrt/fx_extension.h"
17 #include "core/fxcrt/fx_safe_types.h"
18 #include "core/fxcrt/string_pool_template.h"
19 #include "third_party/base/numerics/safe_math.h"
20 #include "third_party/base/stl_util.h"
21 
22 template class fxcrt::StringDataTemplate<wchar_t>;
23 template class fxcrt::StringViewTemplate<wchar_t>;
24 template class fxcrt::StringPoolTemplate<WideString>;
25 template struct std::hash<WideString>;
26 
27 #define FORCE_ANSI 0x10000
28 #define FORCE_UNICODE 0x20000
29 #define FORCE_INT64 0x40000
30 
31 namespace {
32 
33 constexpr wchar_t kWideTrimChars[] = L"\x09\x0a\x0b\x0c\x0d\x20";
34 
FX_wcsstr(const wchar_t * haystack,int haystack_len,const wchar_t * needle,int needle_len)35 const wchar_t* FX_wcsstr(const wchar_t* haystack,
36                          int haystack_len,
37                          const wchar_t* needle,
38                          int needle_len) {
39   if (needle_len > haystack_len || needle_len == 0) {
40     return nullptr;
41   }
42   const wchar_t* end_ptr = haystack + haystack_len - needle_len;
43   while (haystack <= end_ptr) {
44     int i = 0;
45     while (1) {
46       if (haystack[i] != needle[i]) {
47         break;
48       }
49       i++;
50       if (i == needle_len) {
51         return haystack;
52       }
53     }
54     haystack++;
55   }
56   return nullptr;
57 }
58 
GuessSizeForVSWPrintf(const wchar_t * pFormat,va_list argList)59 Optional<size_t> GuessSizeForVSWPrintf(const wchar_t* pFormat,
60                                        va_list argList) {
61   size_t nMaxLen = 0;
62   for (const wchar_t* pStr = pFormat; *pStr != 0; pStr++) {
63     if (*pStr != '%' || *(pStr = pStr + 1) == '%') {
64       ++nMaxLen;
65       continue;
66     }
67     int nItemLen = 0;
68     int nWidth = 0;
69     for (; *pStr != 0; pStr++) {
70       if (*pStr == '#') {
71         nMaxLen += 2;
72       } else if (*pStr == '*') {
73         nWidth = va_arg(argList, int);
74       } else if (*pStr != '-' && *pStr != '+' && *pStr != '0' && *pStr != ' ') {
75         break;
76       }
77     }
78     if (nWidth == 0) {
79       nWidth = FXSYS_wtoi(pStr);
80       while (FXSYS_IsDecimalDigit(*pStr))
81         ++pStr;
82     }
83     if (nWidth < 0 || nWidth > 128 * 1024)
84       return pdfium::nullopt;
85     int nPrecision = 0;
86     if (*pStr == '.') {
87       pStr++;
88       if (*pStr == '*') {
89         nPrecision = va_arg(argList, int);
90         pStr++;
91       } else {
92         nPrecision = FXSYS_wtoi(pStr);
93         while (FXSYS_IsDecimalDigit(*pStr))
94           ++pStr;
95       }
96     }
97     if (nPrecision < 0 || nPrecision > 128 * 1024)
98       return pdfium::nullopt;
99     int nModifier = 0;
100     if (*pStr == L'I' && *(pStr + 1) == L'6' && *(pStr + 2) == L'4') {
101       pStr += 3;
102       nModifier = FORCE_INT64;
103     } else {
104       switch (*pStr) {
105         case 'h':
106           nModifier = FORCE_ANSI;
107           pStr++;
108           break;
109         case 'l':
110           nModifier = FORCE_UNICODE;
111           pStr++;
112           break;
113         case 'F':
114         case 'N':
115         case 'L':
116           pStr++;
117           break;
118       }
119     }
120     switch (*pStr | nModifier) {
121       case 'c':
122       case 'C':
123         nItemLen = 2;
124         va_arg(argList, int);
125         break;
126       case 'c' | FORCE_ANSI:
127       case 'C' | FORCE_ANSI:
128         nItemLen = 2;
129         va_arg(argList, int);
130         break;
131       case 'c' | FORCE_UNICODE:
132       case 'C' | FORCE_UNICODE:
133         nItemLen = 2;
134         va_arg(argList, int);
135         break;
136       case 's': {
137         const wchar_t* pstrNextArg = va_arg(argList, const wchar_t*);
138         if (pstrNextArg) {
139           nItemLen = wcslen(pstrNextArg);
140           if (nItemLen < 1) {
141             nItemLen = 1;
142           }
143         } else {
144           nItemLen = 6;
145         }
146       } break;
147       case 'S': {
148         const char* pstrNextArg = va_arg(argList, const char*);
149         if (pstrNextArg) {
150           nItemLen = strlen(pstrNextArg);
151           if (nItemLen < 1) {
152             nItemLen = 1;
153           }
154         } else {
155           nItemLen = 6;
156         }
157       } break;
158       case 's' | FORCE_ANSI:
159       case 'S' | FORCE_ANSI: {
160         const char* pstrNextArg = va_arg(argList, const char*);
161         if (pstrNextArg) {
162           nItemLen = strlen(pstrNextArg);
163           if (nItemLen < 1) {
164             nItemLen = 1;
165           }
166         } else {
167           nItemLen = 6;
168         }
169       } break;
170       case 's' | FORCE_UNICODE:
171       case 'S' | FORCE_UNICODE: {
172         const wchar_t* pstrNextArg = va_arg(argList, wchar_t*);
173         if (pstrNextArg) {
174           nItemLen = wcslen(pstrNextArg);
175           if (nItemLen < 1) {
176             nItemLen = 1;
177           }
178         } else {
179           nItemLen = 6;
180         }
181       } break;
182     }
183     if (nItemLen != 0) {
184       if (nPrecision != 0 && nItemLen > nPrecision) {
185         nItemLen = nPrecision;
186       }
187       if (nItemLen < nWidth) {
188         nItemLen = nWidth;
189       }
190     } else {
191       switch (*pStr) {
192         case 'd':
193         case 'i':
194         case 'u':
195         case 'x':
196         case 'X':
197         case 'o':
198           if (nModifier & FORCE_INT64) {
199             va_arg(argList, int64_t);
200           } else {
201             va_arg(argList, int);
202           }
203           nItemLen = 32;
204           if (nItemLen < nWidth + nPrecision) {
205             nItemLen = nWidth + nPrecision;
206           }
207           break;
208         case 'a':
209         case 'A':
210         case 'e':
211         case 'E':
212         case 'g':
213         case 'G':
214           va_arg(argList, double);
215           nItemLen = 128;
216           if (nItemLen < nWidth + nPrecision) {
217             nItemLen = nWidth + nPrecision;
218           }
219           break;
220         case 'f':
221           if (nWidth + nPrecision > 100) {
222             nItemLen = nPrecision + nWidth + 128;
223           } else {
224             double f;
225             char pszTemp[256];
226             f = va_arg(argList, double);
227             FXSYS_snprintf(pszTemp, sizeof(pszTemp), "%*.*f", nWidth,
228                            nPrecision + 6, f);
229             nItemLen = strlen(pszTemp);
230           }
231           break;
232         case 'p':
233           va_arg(argList, void*);
234           nItemLen = 32;
235           if (nItemLen < nWidth + nPrecision) {
236             nItemLen = nWidth + nPrecision;
237           }
238           break;
239         case 'n':
240           va_arg(argList, int*);
241           break;
242       }
243     }
244     nMaxLen += nItemLen;
245   }
246   nMaxLen += 32;  // Fudge factor.
247   return nMaxLen;
248 }
249 
250 // Returns string unless we ran out of space.
TryVSWPrintf(size_t size,const wchar_t * pFormat,va_list argList)251 Optional<WideString> TryVSWPrintf(size_t size,
252                                   const wchar_t* pFormat,
253                                   va_list argList) {
254   if (!size)
255     return {};
256 
257   WideString str;
258   {
259     // Span's lifetime must end before ReleaseBuffer() below.
260     pdfium::span<wchar_t> buffer = str.GetBuffer(size);
261 
262     // In the following two calls, there's always space in the WideString
263     // for a terminating NUL that's not included in the span.
264     // For vswprintf(), MSAN won't untaint the buffer on a truncated write's
265     // -1 return code even though the buffer is written. Probably just as well
266     // not to trust the vendor's implementation to write anything anyways.
267     // See https://crbug.com/705912.
268     memset(buffer.data(), 0, (size + 1) * sizeof(wchar_t));
269     int ret = vswprintf(buffer.data(), size + 1, pFormat, argList);
270 
271     bool bSufficientBuffer = ret >= 0 || buffer[size - 1] == 0;
272     if (!bSufficientBuffer)
273       return {};
274   }
275   str.ReleaseBuffer(str.GetStringLength());
276   return {str};
277 }
278 
279 }  // namespace
280 
281 namespace fxcrt {
282 
283 static_assert(sizeof(WideString) <= sizeof(wchar_t*),
284               "Strings must not require more space than pointers");
285 
286 // static
FormatV(const wchar_t * format,va_list argList)287 WideString WideString::FormatV(const wchar_t* format, va_list argList) {
288   va_list argListCopy;
289   va_copy(argListCopy, argList);
290   int maxLen = vswprintf(nullptr, 0, format, argListCopy);
291   va_end(argListCopy);
292 
293   if (maxLen <= 0) {
294     va_copy(argListCopy, argList);
295     auto guess = GuessSizeForVSWPrintf(format, argListCopy);
296     va_end(argListCopy);
297 
298     if (!guess.has_value())
299       return WideString();
300     maxLen = pdfium::base::checked_cast<int>(guess.value());
301   }
302 
303   while (maxLen < 32 * 1024) {
304     va_copy(argListCopy, argList);
305     Optional<WideString> ret =
306         TryVSWPrintf(static_cast<size_t>(maxLen), format, argListCopy);
307     va_end(argListCopy);
308 
309     if (ret)
310       return *ret;
311     maxLen *= 2;
312   }
313   return WideString();
314 }
315 
316 // static
Format(const wchar_t * pFormat,...)317 WideString WideString::Format(const wchar_t* pFormat, ...) {
318   va_list argList;
319   va_start(argList, pFormat);
320   WideString ret = FormatV(pFormat, argList);
321   va_end(argList);
322   return ret;
323 }
324 
WideString()325 WideString::WideString() {}
326 
WideString(const WideString & other)327 WideString::WideString(const WideString& other) : m_pData(other.m_pData) {}
328 
WideString(WideString && other)329 WideString::WideString(WideString&& other) noexcept {
330   m_pData.Swap(other.m_pData);
331 }
332 
WideString(const wchar_t * pStr,size_t nLen)333 WideString::WideString(const wchar_t* pStr, size_t nLen) {
334   if (nLen)
335     m_pData.Reset(StringData::Create(pStr, nLen));
336 }
337 
WideString(wchar_t ch)338 WideString::WideString(wchar_t ch) {
339   m_pData.Reset(StringData::Create(1));
340   m_pData->m_String[0] = ch;
341 }
342 
WideString(const wchar_t * ptr)343 WideString::WideString(const wchar_t* ptr)
344     : WideString(ptr, ptr ? wcslen(ptr) : 0) {}
345 
WideString(WideStringView stringSrc)346 WideString::WideString(WideStringView stringSrc) {
347   if (!stringSrc.IsEmpty()) {
348     m_pData.Reset(StringData::Create(stringSrc.unterminated_c_str(),
349                                      stringSrc.GetLength()));
350   }
351 }
352 
WideString(WideStringView str1,WideStringView str2)353 WideString::WideString(WideStringView str1, WideStringView str2) {
354   FX_SAFE_SIZE_T nSafeLen = str1.GetLength();
355   nSafeLen += str2.GetLength();
356 
357   size_t nNewLen = nSafeLen.ValueOrDie();
358   if (nNewLen == 0)
359     return;
360 
361   m_pData.Reset(StringData::Create(nNewLen));
362   m_pData->CopyContents(str1.unterminated_c_str(), str1.GetLength());
363   m_pData->CopyContentsAt(str1.GetLength(), str2.unterminated_c_str(),
364                           str2.GetLength());
365 }
366 
WideString(const std::initializer_list<WideStringView> & list)367 WideString::WideString(const std::initializer_list<WideStringView>& list) {
368   FX_SAFE_SIZE_T nSafeLen = 0;
369   for (const auto& item : list)
370     nSafeLen += item.GetLength();
371 
372   size_t nNewLen = nSafeLen.ValueOrDie();
373   if (nNewLen == 0)
374     return;
375 
376   m_pData.Reset(StringData::Create(nNewLen));
377 
378   size_t nOffset = 0;
379   for (const auto& item : list) {
380     m_pData->CopyContentsAt(nOffset, item.unterminated_c_str(),
381                             item.GetLength());
382     nOffset += item.GetLength();
383   }
384 }
385 
~WideString()386 WideString::~WideString() {}
387 
operator =(const wchar_t * str)388 WideString& WideString::operator=(const wchar_t* str) {
389   if (!str || !str[0])
390     clear();
391   else
392     AssignCopy(str, wcslen(str));
393 
394   return *this;
395 }
396 
operator =(WideStringView str)397 WideString& WideString::operator=(WideStringView str) {
398   if (str.IsEmpty())
399     clear();
400   else
401     AssignCopy(str.unterminated_c_str(), str.GetLength());
402 
403   return *this;
404 }
405 
operator =(const WideString & that)406 WideString& WideString::operator=(const WideString& that) {
407   if (m_pData != that.m_pData)
408     m_pData = that.m_pData;
409 
410   return *this;
411 }
412 
operator =(WideString && that)413 WideString& WideString::operator=(WideString&& that) {
414   if (m_pData != that.m_pData)
415     m_pData = std::move(that.m_pData);
416 
417   return *this;
418 }
419 
operator +=(const wchar_t * str)420 WideString& WideString::operator+=(const wchar_t* str) {
421   if (str)
422     Concat(str, wcslen(str));
423 
424   return *this;
425 }
426 
operator +=(wchar_t ch)427 WideString& WideString::operator+=(wchar_t ch) {
428   Concat(&ch, 1);
429   return *this;
430 }
431 
operator +=(const WideString & str)432 WideString& WideString::operator+=(const WideString& str) {
433   if (str.m_pData)
434     Concat(str.m_pData->m_String, str.m_pData->m_nDataLength);
435 
436   return *this;
437 }
438 
operator +=(WideStringView str)439 WideString& WideString::operator+=(WideStringView str) {
440   if (!str.IsEmpty())
441     Concat(str.unterminated_c_str(), str.GetLength());
442 
443   return *this;
444 }
445 
operator ==(const wchar_t * ptr) const446 bool WideString::operator==(const wchar_t* ptr) const {
447   if (!m_pData)
448     return !ptr || !ptr[0];
449 
450   if (!ptr)
451     return m_pData->m_nDataLength == 0;
452 
453   return wcslen(ptr) == m_pData->m_nDataLength &&
454          wmemcmp(ptr, m_pData->m_String, m_pData->m_nDataLength) == 0;
455 }
456 
operator ==(WideStringView str) const457 bool WideString::operator==(WideStringView str) const {
458   if (!m_pData)
459     return str.IsEmpty();
460 
461   return m_pData->m_nDataLength == str.GetLength() &&
462          wmemcmp(m_pData->m_String, str.unterminated_c_str(),
463                  str.GetLength()) == 0;
464 }
465 
operator ==(const WideString & other) const466 bool WideString::operator==(const WideString& other) const {
467   if (m_pData == other.m_pData)
468     return true;
469 
470   if (IsEmpty())
471     return other.IsEmpty();
472 
473   if (other.IsEmpty())
474     return false;
475 
476   return other.m_pData->m_nDataLength == m_pData->m_nDataLength &&
477          wmemcmp(other.m_pData->m_String, m_pData->m_String,
478                  m_pData->m_nDataLength) == 0;
479 }
480 
operator <(const wchar_t * ptr) const481 bool WideString::operator<(const wchar_t* ptr) const {
482   return Compare(ptr) < 0;
483 }
484 
operator <(WideStringView str) const485 bool WideString::operator<(WideStringView str) const {
486   if (!m_pData && !str.unterminated_c_str())
487     return false;
488   if (c_str() == str.unterminated_c_str())
489     return false;
490 
491   size_t len = GetLength();
492   size_t other_len = str.GetLength();
493   int result =
494       wmemcmp(c_str(), str.unterminated_c_str(), std::min(len, other_len));
495   return result < 0 || (result == 0 && len < other_len);
496 }
497 
operator <(const WideString & other) const498 bool WideString::operator<(const WideString& other) const {
499   return Compare(other) < 0;
500 }
501 
AssignCopy(const wchar_t * pSrcData,size_t nSrcLen)502 void WideString::AssignCopy(const wchar_t* pSrcData, size_t nSrcLen) {
503   AllocBeforeWrite(nSrcLen);
504   m_pData->CopyContents(pSrcData, nSrcLen);
505   m_pData->m_nDataLength = nSrcLen;
506 }
507 
ReallocBeforeWrite(size_t nNewLength)508 void WideString::ReallocBeforeWrite(size_t nNewLength) {
509   if (m_pData && m_pData->CanOperateInPlace(nNewLength))
510     return;
511 
512   if (nNewLength == 0) {
513     clear();
514     return;
515   }
516 
517   RetainPtr<StringData> pNewData(StringData::Create(nNewLength));
518   if (m_pData) {
519     size_t nCopyLength = std::min(m_pData->m_nDataLength, nNewLength);
520     pNewData->CopyContents(m_pData->m_String, nCopyLength);
521     pNewData->m_nDataLength = nCopyLength;
522   } else {
523     pNewData->m_nDataLength = 0;
524   }
525   pNewData->m_String[pNewData->m_nDataLength] = 0;
526   m_pData.Swap(pNewData);
527 }
528 
AllocBeforeWrite(size_t nNewLength)529 void WideString::AllocBeforeWrite(size_t nNewLength) {
530   if (m_pData && m_pData->CanOperateInPlace(nNewLength))
531     return;
532 
533   if (nNewLength == 0) {
534     clear();
535     return;
536   }
537 
538   m_pData.Reset(StringData::Create(nNewLength));
539 }
540 
ReleaseBuffer(size_t nNewLength)541 void WideString::ReleaseBuffer(size_t nNewLength) {
542   if (!m_pData)
543     return;
544 
545   nNewLength = std::min(nNewLength, m_pData->m_nAllocLength);
546   if (nNewLength == 0) {
547     clear();
548     return;
549   }
550 
551   ASSERT(m_pData->m_nRefs == 1);
552   m_pData->m_nDataLength = nNewLength;
553   m_pData->m_String[nNewLength] = 0;
554   if (m_pData->m_nAllocLength - nNewLength >= 32) {
555     // Over arbitrary threshold, so pay the price to relocate.  Force copy to
556     // always occur by holding a second reference to the string.
557     WideString preserve(*this);
558     ReallocBeforeWrite(nNewLength);
559   }
560 }
561 
Reserve(size_t len)562 void WideString::Reserve(size_t len) {
563   GetBuffer(len);
564 }
565 
GetBuffer(size_t nMinBufLength)566 pdfium::span<wchar_t> WideString::GetBuffer(size_t nMinBufLength) {
567   if (!m_pData) {
568     if (nMinBufLength == 0)
569       return pdfium::span<wchar_t>();
570 
571     m_pData.Reset(StringData::Create(nMinBufLength));
572     m_pData->m_nDataLength = 0;
573     m_pData->m_String[0] = 0;
574     return pdfium::span<wchar_t>(m_pData->m_String, m_pData->m_nAllocLength);
575   }
576 
577   if (m_pData->CanOperateInPlace(nMinBufLength))
578     return pdfium::span<wchar_t>(m_pData->m_String, m_pData->m_nAllocLength);
579 
580   nMinBufLength = std::max(nMinBufLength, m_pData->m_nDataLength);
581   if (nMinBufLength == 0)
582     return pdfium::span<wchar_t>();
583 
584   RetainPtr<StringData> pNewData(StringData::Create(nMinBufLength));
585   pNewData->CopyContents(*m_pData);
586   pNewData->m_nDataLength = m_pData->m_nDataLength;
587   m_pData.Swap(pNewData);
588   return pdfium::span<wchar_t>(m_pData->m_String, m_pData->m_nAllocLength);
589 }
590 
Delete(size_t index,size_t count)591 size_t WideString::Delete(size_t index, size_t count) {
592   if (!m_pData)
593     return 0;
594 
595   size_t old_length = m_pData->m_nDataLength;
596   if (count == 0 || index != pdfium::clamp<size_t>(index, 0, old_length))
597     return old_length;
598 
599   size_t removal_length = index + count;
600   if (removal_length > old_length)
601     return old_length;
602 
603   ReallocBeforeWrite(old_length);
604   size_t chars_to_copy = old_length - removal_length + 1;
605   wmemmove(m_pData->m_String + index, m_pData->m_String + removal_length,
606            chars_to_copy);
607   m_pData->m_nDataLength = old_length - count;
608   return m_pData->m_nDataLength;
609 }
610 
Concat(const wchar_t * pSrcData,size_t nSrcLen)611 void WideString::Concat(const wchar_t* pSrcData, size_t nSrcLen) {
612   if (!pSrcData || nSrcLen == 0)
613     return;
614 
615   if (!m_pData) {
616     m_pData.Reset(StringData::Create(pSrcData, nSrcLen));
617     return;
618   }
619 
620   if (m_pData->CanOperateInPlace(m_pData->m_nDataLength + nSrcLen)) {
621     m_pData->CopyContentsAt(m_pData->m_nDataLength, pSrcData, nSrcLen);
622     m_pData->m_nDataLength += nSrcLen;
623     return;
624   }
625 
626   size_t nConcatLen = std::max(m_pData->m_nDataLength / 2, nSrcLen);
627   RetainPtr<StringData> pNewData(
628       StringData::Create(m_pData->m_nDataLength + nConcatLen));
629   pNewData->CopyContents(*m_pData);
630   pNewData->CopyContentsAt(m_pData->m_nDataLength, pSrcData, nSrcLen);
631   pNewData->m_nDataLength = m_pData->m_nDataLength + nSrcLen;
632   m_pData.Swap(pNewData);
633 }
634 
ReferenceCountForTesting() const635 intptr_t WideString::ReferenceCountForTesting() const {
636   return m_pData ? m_pData->m_nRefs : 0;
637 }
638 
ToASCII() const639 ByteString WideString::ToASCII() const {
640   ByteString result;
641   result.Reserve(GetLength());
642   for (wchar_t wc : *this)
643     result.InsertAtBack(static_cast<char>(wc & 0x7f));
644   return result;
645 }
646 
ToLatin1() const647 ByteString WideString::ToLatin1() const {
648   ByteString result;
649   result.Reserve(GetLength());
650   for (wchar_t wc : *this)
651     result.InsertAtBack(static_cast<char>(wc & 0xff));
652   return result;
653 }
654 
ToDefANSI() const655 ByteString WideString::ToDefANSI() const {
656   int src_len = GetLength();
657   int dest_len = FXSYS_WideCharToMultiByte(
658       FX_CODEPAGE_DefANSI, 0, c_str(), src_len, nullptr, 0, nullptr, nullptr);
659   if (!dest_len)
660     return ByteString();
661 
662   ByteString bstr;
663   {
664     // Span's lifetime must end before ReleaseBuffer() below.
665     pdfium::span<char> dest_buf = bstr.GetBuffer(dest_len);
666     FXSYS_WideCharToMultiByte(FX_CODEPAGE_DefANSI, 0, c_str(), src_len,
667                               dest_buf.data(), dest_len, nullptr, nullptr);
668   }
669   bstr.ReleaseBuffer(dest_len);
670   return bstr;
671 }
672 
ToUTF8() const673 ByteString WideString::ToUTF8() const {
674   return FX_UTF8Encode(AsStringView());
675 }
676 
ToUTF16LE() const677 ByteString WideString::ToUTF16LE() const {
678   if (!m_pData)
679     return ByteString("\0\0", 2);
680 
681   ByteString result;
682   int len = m_pData->m_nDataLength;
683   {
684     // Span's lifetime must end before ReleaseBuffer() below.
685     pdfium::span<char> buffer = result.GetBuffer(len * 2 + 2);
686     for (int i = 0; i < len; i++) {
687       buffer[i * 2] = m_pData->m_String[i] & 0xff;
688       buffer[i * 2 + 1] = m_pData->m_String[i] >> 8;
689     }
690     buffer[len * 2] = 0;
691     buffer[len * 2 + 1] = 0;
692   }
693   result.ReleaseBuffer(len * 2 + 2);
694   return result;
695 }
696 
Substr(size_t first,size_t count) const697 WideString WideString::Substr(size_t first, size_t count) const {
698   if (!m_pData)
699     return WideString();
700 
701   if (!IsValidIndex(first))
702     return WideString();
703 
704   if (count == 0 || !IsValidLength(count))
705     return WideString();
706 
707   if (!IsValidIndex(first + count - 1))
708     return WideString();
709 
710   if (first == 0 && count == GetLength())
711     return *this;
712 
713   WideString dest;
714   AllocCopy(dest, count, first);
715   return dest;
716 }
717 
First(size_t count) const718 WideString WideString::First(size_t count) const {
719   if (count == 0 || !IsValidLength(count))
720     return WideString();
721   return Substr(0, count);
722 }
723 
Last(size_t count) const724 WideString WideString::Last(size_t count) const {
725   if (count == 0 || !IsValidLength(count))
726     return WideString();
727   return Substr(GetLength() - count, count);
728 }
729 
AllocCopy(WideString & dest,size_t nCopyLen,size_t nCopyIndex) const730 void WideString::AllocCopy(WideString& dest,
731                            size_t nCopyLen,
732                            size_t nCopyIndex) const {
733   if (nCopyLen == 0)
734     return;
735 
736   RetainPtr<StringData> pNewData(
737       StringData::Create(m_pData->m_String + nCopyIndex, nCopyLen));
738   dest.m_pData.Swap(pNewData);
739 }
740 
Insert(size_t index,wchar_t ch)741 size_t WideString::Insert(size_t index, wchar_t ch) {
742   const size_t cur_length = GetLength();
743   if (!IsValidLength(index))
744     return cur_length;
745 
746   const size_t new_length = cur_length + 1;
747   ReallocBeforeWrite(new_length);
748   wmemmove(m_pData->m_String + index + 1, m_pData->m_String + index,
749            new_length - index);
750   m_pData->m_String[index] = ch;
751   m_pData->m_nDataLength = new_length;
752   return new_length;
753 }
754 
Find(wchar_t ch,size_t start) const755 Optional<size_t> WideString::Find(wchar_t ch, size_t start) const {
756   if (!m_pData)
757     return pdfium::nullopt;
758 
759   if (!IsValidIndex(start))
760     return pdfium::nullopt;
761 
762   const wchar_t* pStr =
763       wmemchr(m_pData->m_String + start, ch, m_pData->m_nDataLength - start);
764   return pStr ? Optional<size_t>(static_cast<size_t>(pStr - m_pData->m_String))
765               : pdfium::nullopt;
766 }
767 
Find(WideStringView subStr,size_t start) const768 Optional<size_t> WideString::Find(WideStringView subStr, size_t start) const {
769   if (!m_pData)
770     return pdfium::nullopt;
771 
772   if (!IsValidIndex(start))
773     return pdfium::nullopt;
774 
775   const wchar_t* pStr =
776       FX_wcsstr(m_pData->m_String + start, m_pData->m_nDataLength - start,
777                 subStr.unterminated_c_str(), subStr.GetLength());
778   return pStr ? Optional<size_t>(static_cast<size_t>(pStr - m_pData->m_String))
779               : pdfium::nullopt;
780 }
781 
ReverseFind(wchar_t ch) const782 Optional<size_t> WideString::ReverseFind(wchar_t ch) const {
783   if (!m_pData)
784     return pdfium::nullopt;
785 
786   size_t nLength = m_pData->m_nDataLength;
787   while (nLength--) {
788     if (m_pData->m_String[nLength] == ch)
789       return nLength;
790   }
791   return pdfium::nullopt;
792 }
793 
MakeLower()794 void WideString::MakeLower() {
795   if (!m_pData)
796     return;
797 
798   ReallocBeforeWrite(m_pData->m_nDataLength);
799   FXSYS_wcslwr(m_pData->m_String);
800 }
801 
MakeUpper()802 void WideString::MakeUpper() {
803   if (!m_pData)
804     return;
805 
806   ReallocBeforeWrite(m_pData->m_nDataLength);
807   FXSYS_wcsupr(m_pData->m_String);
808 }
809 
Remove(wchar_t chRemove)810 size_t WideString::Remove(wchar_t chRemove) {
811   if (!m_pData || m_pData->m_nDataLength == 0)
812     return 0;
813 
814   wchar_t* pstrSource = m_pData->m_String;
815   wchar_t* pstrEnd = m_pData->m_String + m_pData->m_nDataLength;
816   while (pstrSource < pstrEnd) {
817     if (*pstrSource == chRemove)
818       break;
819     pstrSource++;
820   }
821   if (pstrSource == pstrEnd)
822     return 0;
823 
824   ptrdiff_t copied = pstrSource - m_pData->m_String;
825   ReallocBeforeWrite(m_pData->m_nDataLength);
826   pstrSource = m_pData->m_String + copied;
827   pstrEnd = m_pData->m_String + m_pData->m_nDataLength;
828 
829   wchar_t* pstrDest = pstrSource;
830   while (pstrSource < pstrEnd) {
831     if (*pstrSource != chRemove) {
832       *pstrDest = *pstrSource;
833       pstrDest++;
834     }
835     pstrSource++;
836   }
837 
838   *pstrDest = 0;
839   size_t count = static_cast<size_t>(pstrSource - pstrDest);
840   m_pData->m_nDataLength -= count;
841   return count;
842 }
843 
Replace(WideStringView pOld,WideStringView pNew)844 size_t WideString::Replace(WideStringView pOld, WideStringView pNew) {
845   if (!m_pData || pOld.IsEmpty())
846     return 0;
847 
848   size_t nSourceLen = pOld.GetLength();
849   size_t nReplacementLen = pNew.GetLength();
850   size_t count = 0;
851   const wchar_t* pStart = m_pData->m_String;
852   wchar_t* pEnd = m_pData->m_String + m_pData->m_nDataLength;
853   while (1) {
854     const wchar_t* pTarget =
855         FX_wcsstr(pStart, static_cast<size_t>(pEnd - pStart),
856                   pOld.unterminated_c_str(), nSourceLen);
857     if (!pTarget)
858       break;
859 
860     count++;
861     pStart = pTarget + nSourceLen;
862   }
863   if (count == 0)
864     return 0;
865 
866   size_t nNewLength =
867       m_pData->m_nDataLength + (nReplacementLen - nSourceLen) * count;
868 
869   if (nNewLength == 0) {
870     clear();
871     return count;
872   }
873 
874   RetainPtr<StringData> pNewData(StringData::Create(nNewLength));
875   pStart = m_pData->m_String;
876   wchar_t* pDest = pNewData->m_String;
877   for (size_t i = 0; i < count; i++) {
878     const wchar_t* pTarget =
879         FX_wcsstr(pStart, static_cast<size_t>(pEnd - pStart),
880                   pOld.unterminated_c_str(), nSourceLen);
881     wmemcpy(pDest, pStart, pTarget - pStart);
882     pDest += pTarget - pStart;
883     wmemcpy(pDest, pNew.unterminated_c_str(), pNew.GetLength());
884     pDest += pNew.GetLength();
885     pStart = pTarget + nSourceLen;
886   }
887   wmemcpy(pDest, pStart, pEnd - pStart);
888   m_pData.Swap(pNewData);
889   return count;
890 }
891 
892 // static
FromASCII(ByteStringView bstr)893 WideString WideString::FromASCII(ByteStringView bstr) {
894   WideString result;
895   result.Reserve(bstr.GetLength());
896   for (char c : bstr)
897     result.InsertAtBack(static_cast<wchar_t>(c & 0x7f));
898   return result;
899 }
900 
901 // static
FromLatin1(ByteStringView bstr)902 WideString WideString::FromLatin1(ByteStringView bstr) {
903   WideString result;
904   result.Reserve(bstr.GetLength());
905   for (char c : bstr)
906     result.InsertAtBack(static_cast<wchar_t>(c & 0xff));
907   return result;
908 }
909 
910 // static
FromDefANSI(ByteStringView bstr)911 WideString WideString::FromDefANSI(ByteStringView bstr) {
912   int src_len = bstr.GetLength();
913   int dest_len = FXSYS_MultiByteToWideChar(
914       FX_CODEPAGE_DefANSI, 0, bstr.unterminated_c_str(), src_len, nullptr, 0);
915   if (!dest_len)
916     return WideString();
917 
918   WideString wstr;
919   {
920     // Span's lifetime must end before ReleaseBuffer() below.
921     pdfium::span<wchar_t> dest_buf = wstr.GetBuffer(dest_len);
922     FXSYS_MultiByteToWideChar(FX_CODEPAGE_DefANSI, 0, bstr.unterminated_c_str(),
923                               src_len, dest_buf.data(), dest_len);
924   }
925   wstr.ReleaseBuffer(dest_len);
926   return wstr;
927 }
928 
929 // static
FromUTF8(ByteStringView str)930 WideString WideString::FromUTF8(ByteStringView str) {
931   return FX_UTF8Decode(str);
932 }
933 
934 // static
FromUTF16LE(const unsigned short * wstr,size_t wlen)935 WideString WideString::FromUTF16LE(const unsigned short* wstr, size_t wlen) {
936   if (!wstr || wlen == 0)
937     return WideString();
938 
939   WideString result;
940   {
941     // Span's lifetime must end before ReleaseBuffer() below.
942     pdfium::span<wchar_t> buf = result.GetBuffer(wlen);
943     for (size_t i = 0; i < wlen; i++)
944       buf[i] = wstr[i];
945   }
946   result.ReleaseBuffer(wlen);
947   return result;
948 }
949 
FromUTF16BE(const unsigned short * wstr,size_t wlen)950 WideString WideString::FromUTF16BE(const unsigned short* wstr, size_t wlen) {
951   if (!wstr || wlen == 0)
952     return WideString();
953 
954   WideString result;
955   {
956     // Span's lifetime must end before ReleaseBuffer() below.
957     pdfium::span<wchar_t> buf = result.GetBuffer(wlen);
958     for (size_t i = 0; i < wlen; i++) {
959       auto wch = wstr[i];
960       wch = (wch >> 8) | (wch << 8);
961       buf[i] = wch;
962     }
963   }
964   result.ReleaseBuffer(wlen);
965   return result;
966 }
967 
SetAt(size_t index,wchar_t c)968 void WideString::SetAt(size_t index, wchar_t c) {
969   ASSERT(IsValidIndex(index));
970   ReallocBeforeWrite(m_pData->m_nDataLength);
971   m_pData->m_String[index] = c;
972 }
973 
Compare(const wchar_t * str) const974 int WideString::Compare(const wchar_t* str) const {
975   if (m_pData)
976     return str ? wcscmp(m_pData->m_String, str) : 1;
977   return (!str || str[0] == 0) ? 0 : -1;
978 }
979 
Compare(const WideString & str) const980 int WideString::Compare(const WideString& str) const {
981   if (!m_pData)
982     return str.m_pData ? -1 : 0;
983   if (!str.m_pData)
984     return 1;
985 
986   size_t this_len = m_pData->m_nDataLength;
987   size_t that_len = str.m_pData->m_nDataLength;
988   size_t min_len = std::min(this_len, that_len);
989   int result = wmemcmp(m_pData->m_String, str.m_pData->m_String, min_len);
990   if (result != 0)
991     return result;
992   if (this_len == that_len)
993     return 0;
994   return this_len < that_len ? -1 : 1;
995 }
996 
CompareNoCase(const wchar_t * str) const997 int WideString::CompareNoCase(const wchar_t* str) const {
998   if (m_pData)
999     return str ? FXSYS_wcsicmp(m_pData->m_String, str) : 1;
1000   return (!str || str[0] == 0) ? 0 : -1;
1001 }
1002 
WStringLength(const unsigned short * str)1003 size_t WideString::WStringLength(const unsigned short* str) {
1004   size_t len = 0;
1005   if (str)
1006     while (str[len])
1007       len++;
1008   return len;
1009 }
1010 
Trim()1011 void WideString::Trim() {
1012   TrimRight(kWideTrimChars);
1013   TrimLeft(kWideTrimChars);
1014 }
1015 
Trim(wchar_t target)1016 void WideString::Trim(wchar_t target) {
1017   wchar_t str[2] = {target, 0};
1018   TrimRight(str);
1019   TrimLeft(str);
1020 }
1021 
Trim(WideStringView targets)1022 void WideString::Trim(WideStringView targets) {
1023   TrimRight(targets);
1024   TrimLeft(targets);
1025 }
1026 
TrimLeft()1027 void WideString::TrimLeft() {
1028   TrimLeft(kWideTrimChars);
1029 }
1030 
TrimLeft(wchar_t target)1031 void WideString::TrimLeft(wchar_t target) {
1032   wchar_t str[2] = {target, 0};
1033   TrimLeft(str);
1034 }
1035 
TrimLeft(WideStringView targets)1036 void WideString::TrimLeft(WideStringView targets) {
1037   if (!m_pData || targets.IsEmpty())
1038     return;
1039 
1040   size_t len = GetLength();
1041   if (len == 0)
1042     return;
1043 
1044   size_t pos = 0;
1045   while (pos < len) {
1046     size_t i = 0;
1047     while (i < targets.GetLength() &&
1048            targets.CharAt(i) != m_pData->m_String[pos]) {
1049       i++;
1050     }
1051     if (i == targets.GetLength())
1052       break;
1053     pos++;
1054   }
1055   if (!pos)
1056     return;
1057 
1058   ReallocBeforeWrite(len);
1059   size_t nDataLength = len - pos;
1060   memmove(m_pData->m_String, m_pData->m_String + pos,
1061           (nDataLength + 1) * sizeof(wchar_t));
1062   m_pData->m_nDataLength = nDataLength;
1063 }
1064 
TrimRight()1065 void WideString::TrimRight() {
1066   TrimRight(kWideTrimChars);
1067 }
1068 
TrimRight(wchar_t target)1069 void WideString::TrimRight(wchar_t target) {
1070   wchar_t str[2] = {target, 0};
1071   TrimRight(str);
1072 }
1073 
TrimRight(WideStringView targets)1074 void WideString::TrimRight(WideStringView targets) {
1075   if (IsEmpty() || targets.IsEmpty())
1076     return;
1077 
1078   size_t pos = GetLength();
1079   while (pos && targets.Contains(m_pData->m_String[pos - 1]))
1080     pos--;
1081 
1082   if (pos < m_pData->m_nDataLength) {
1083     ReallocBeforeWrite(m_pData->m_nDataLength);
1084     m_pData->m_String[pos] = 0;
1085     m_pData->m_nDataLength = pos;
1086   }
1087 }
1088 
GetInteger() const1089 int WideString::GetInteger() const {
1090   return m_pData ? FXSYS_wtoi(m_pData->m_String) : 0;
1091 }
1092 
operator <<(std::wostream & os,const WideString & str)1093 std::wostream& operator<<(std::wostream& os, const WideString& str) {
1094   return os.write(str.c_str(), str.GetLength());
1095 }
1096 
operator <<(std::ostream & os,const WideString & str)1097 std::ostream& operator<<(std::ostream& os, const WideString& str) {
1098   os << str.ToUTF8();
1099   return os;
1100 }
1101 
operator <<(std::wostream & os,WideStringView str)1102 std::wostream& operator<<(std::wostream& os, WideStringView str) {
1103   return os.write(str.unterminated_c_str(), str.GetLength());
1104 }
1105 
operator <<(std::ostream & os,WideStringView str)1106 std::ostream& operator<<(std::ostream& os, WideStringView str) {
1107   os << FX_UTF8Encode(str);
1108   return os;
1109 }
1110 
1111 }  // namespace fxcrt
1112 
FX_HashCode_GetW(WideStringView str,bool bIgnoreCase)1113 uint32_t FX_HashCode_GetW(WideStringView str, bool bIgnoreCase) {
1114   uint32_t dwHashCode = 0;
1115   if (bIgnoreCase) {
1116     for (wchar_t c : str)  // match FXSYS_towlower() arg type.
1117       dwHashCode = 1313 * dwHashCode + FXSYS_towlower(c);
1118   } else {
1119     for (WideStringView::UnsignedType c : str)
1120       dwHashCode = 1313 * dwHashCode + c;
1121   }
1122   return dwHashCode;
1123 }
1124