1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fxcrt/widestring.h"
8 
9 #include <stddef.h>
10 
11 #include <algorithm>
12 #include <cctype>
13 #include <cwctype>
14 
15 #include "core/fxcrt/cfx_utf8decoder.h"
16 #include "core/fxcrt/fx_codepage.h"
17 #include "core/fxcrt/fx_extension.h"
18 #include "core/fxcrt/fx_safe_types.h"
19 #include "core/fxcrt/string_pool_template.h"
20 #include "third_party/base/numerics/safe_math.h"
21 #include "third_party/base/stl_util.h"
22 
23 template class fxcrt::StringDataTemplate<wchar_t>;
24 template class fxcrt::StringViewTemplate<wchar_t>;
25 template class fxcrt::StringPoolTemplate<WideString>;
26 template struct std::hash<WideString>;
27 
28 #define FORCE_ANSI 0x10000
29 #define FORCE_UNICODE 0x20000
30 #define FORCE_INT64 0x40000
31 
32 namespace {
33 
34 constexpr wchar_t kWideTrimChars[] = L"\x09\x0a\x0b\x0c\x0d\x20";
35 
FX_wcsstr(const wchar_t * haystack,int haystack_len,const wchar_t * needle,int needle_len)36 const wchar_t* FX_wcsstr(const wchar_t* haystack,
37                          int haystack_len,
38                          const wchar_t* needle,
39                          int needle_len) {
40   if (needle_len > haystack_len || needle_len == 0) {
41     return nullptr;
42   }
43   const wchar_t* end_ptr = haystack + haystack_len - needle_len;
44   while (haystack <= end_ptr) {
45     int i = 0;
46     while (1) {
47       if (haystack[i] != needle[i]) {
48         break;
49       }
50       i++;
51       if (i == needle_len) {
52         return haystack;
53       }
54     }
55     haystack++;
56   }
57   return nullptr;
58 }
59 
GuessSizeForVSWPrintf(const wchar_t * pFormat,va_list argList)60 Optional<size_t> GuessSizeForVSWPrintf(const wchar_t* pFormat,
61                                        va_list argList) {
62   size_t nMaxLen = 0;
63   for (const wchar_t* pStr = pFormat; *pStr != 0; pStr++) {
64     if (*pStr != '%' || *(pStr = pStr + 1) == '%') {
65       ++nMaxLen;
66       continue;
67     }
68     int nItemLen = 0;
69     int nWidth = 0;
70     for (; *pStr != 0; pStr++) {
71       if (*pStr == '#') {
72         nMaxLen += 2;
73       } else if (*pStr == '*') {
74         nWidth = va_arg(argList, int);
75       } else if (*pStr != '-' && *pStr != '+' && *pStr != '0' && *pStr != ' ') {
76         break;
77       }
78     }
79     if (nWidth == 0) {
80       nWidth = FXSYS_wtoi(pStr);
81       while (std::iswdigit(*pStr))
82         ++pStr;
83     }
84     if (nWidth < 0 || nWidth > 128 * 1024)
85       return Optional<size_t>();
86     int nPrecision = 0;
87     if (*pStr == '.') {
88       pStr++;
89       if (*pStr == '*') {
90         nPrecision = va_arg(argList, int);
91         pStr++;
92       } else {
93         nPrecision = FXSYS_wtoi(pStr);
94         while (std::iswdigit(*pStr))
95           ++pStr;
96       }
97     }
98     if (nPrecision < 0 || nPrecision > 128 * 1024)
99       return Optional<size_t>();
100     int nModifier = 0;
101     if (*pStr == L'I' && *(pStr + 1) == L'6' && *(pStr + 2) == L'4') {
102       pStr += 3;
103       nModifier = FORCE_INT64;
104     } else {
105       switch (*pStr) {
106         case 'h':
107           nModifier = FORCE_ANSI;
108           pStr++;
109           break;
110         case 'l':
111           nModifier = FORCE_UNICODE;
112           pStr++;
113           break;
114         case 'F':
115         case 'N':
116         case 'L':
117           pStr++;
118           break;
119       }
120     }
121     switch (*pStr | nModifier) {
122       case 'c':
123       case 'C':
124         nItemLen = 2;
125         va_arg(argList, int);
126         break;
127       case 'c' | FORCE_ANSI:
128       case 'C' | FORCE_ANSI:
129         nItemLen = 2;
130         va_arg(argList, int);
131         break;
132       case 'c' | FORCE_UNICODE:
133       case 'C' | FORCE_UNICODE:
134         nItemLen = 2;
135         va_arg(argList, int);
136         break;
137       case 's': {
138         const wchar_t* pstrNextArg = va_arg(argList, const wchar_t*);
139         if (pstrNextArg) {
140           nItemLen = wcslen(pstrNextArg);
141           if (nItemLen < 1) {
142             nItemLen = 1;
143           }
144         } else {
145           nItemLen = 6;
146         }
147       } break;
148       case 'S': {
149         const char* pstrNextArg = va_arg(argList, const char*);
150         if (pstrNextArg) {
151           nItemLen = strlen(pstrNextArg);
152           if (nItemLen < 1) {
153             nItemLen = 1;
154           }
155         } else {
156           nItemLen = 6;
157         }
158       } break;
159       case 's' | FORCE_ANSI:
160       case 'S' | FORCE_ANSI: {
161         const char* pstrNextArg = va_arg(argList, const char*);
162         if (pstrNextArg) {
163           nItemLen = strlen(pstrNextArg);
164           if (nItemLen < 1) {
165             nItemLen = 1;
166           }
167         } else {
168           nItemLen = 6;
169         }
170       } break;
171       case 's' | FORCE_UNICODE:
172       case 'S' | FORCE_UNICODE: {
173         const wchar_t* pstrNextArg = va_arg(argList, wchar_t*);
174         if (pstrNextArg) {
175           nItemLen = wcslen(pstrNextArg);
176           if (nItemLen < 1) {
177             nItemLen = 1;
178           }
179         } else {
180           nItemLen = 6;
181         }
182       } break;
183     }
184     if (nItemLen != 0) {
185       if (nPrecision != 0 && nItemLen > nPrecision) {
186         nItemLen = nPrecision;
187       }
188       if (nItemLen < nWidth) {
189         nItemLen = nWidth;
190       }
191     } else {
192       switch (*pStr) {
193         case 'd':
194         case 'i':
195         case 'u':
196         case 'x':
197         case 'X':
198         case 'o':
199           if (nModifier & FORCE_INT64) {
200             va_arg(argList, int64_t);
201           } else {
202             va_arg(argList, int);
203           }
204           nItemLen = 32;
205           if (nItemLen < nWidth + nPrecision) {
206             nItemLen = nWidth + nPrecision;
207           }
208           break;
209         case 'a':
210         case 'A':
211         case 'e':
212         case 'E':
213         case 'g':
214         case 'G':
215           va_arg(argList, double);
216           nItemLen = 128;
217           if (nItemLen < nWidth + nPrecision) {
218             nItemLen = nWidth + nPrecision;
219           }
220           break;
221         case 'f':
222           if (nWidth + nPrecision > 100) {
223             nItemLen = nPrecision + nWidth + 128;
224           } else {
225             double f;
226             char pszTemp[256];
227             f = va_arg(argList, double);
228             FXSYS_snprintf(pszTemp, sizeof(pszTemp), "%*.*f", nWidth,
229                            nPrecision + 6, f);
230             nItemLen = strlen(pszTemp);
231           }
232           break;
233         case 'p':
234           va_arg(argList, void*);
235           nItemLen = 32;
236           if (nItemLen < nWidth + nPrecision) {
237             nItemLen = nWidth + nPrecision;
238           }
239           break;
240         case 'n':
241           va_arg(argList, int*);
242           break;
243       }
244     }
245     nMaxLen += nItemLen;
246   }
247   nMaxLen += 32;  // Fudge factor.
248   return Optional<size_t>(nMaxLen);
249 }
250 
251 // Returns string unless we ran out of space.
TryVSWPrintf(size_t size,const wchar_t * pFormat,va_list argList)252 Optional<WideString> TryVSWPrintf(size_t size,
253                                   const wchar_t* pFormat,
254                                   va_list argList) {
255   WideString str;
256   wchar_t* buffer = str.GetBuffer(size);
257 
258   // In the following two calls, there's always space in the buffer for
259   // a terminating NUL that's not included in nMaxLen.
260   // For vswprintf(), MSAN won't untaint the buffer on a truncated write's
261   // -1 return code even though the buffer is written. Probably just as well
262   // not to trust the vendor's implementation to write anything anyways.
263   // See https://crbug.com/705912.
264   memset(buffer, 0, (size + 1) * sizeof(wchar_t));
265   int ret = vswprintf(buffer, size + 1, pFormat, argList);
266 
267   bool bSufficientBuffer = ret >= 0 || buffer[size - 1] == 0;
268   if (!bSufficientBuffer)
269     return {};
270 
271   str.ReleaseBuffer(str.GetStringLength());
272   return {str};
273 }
274 
275 #ifndef NDEBUG
IsValidWideCodePage(uint16_t codepage)276 bool IsValidWideCodePage(uint16_t codepage) {
277   switch (codepage) {
278     case FX_CODEPAGE_DefANSI:
279     case FX_CODEPAGE_ShiftJIS:
280     case FX_CODEPAGE_ChineseSimplified:
281     case FX_CODEPAGE_Hangul:
282     case FX_CODEPAGE_ChineseTraditional:
283       return true;
284     default:
285       return false;
286   }
287 }
288 #endif
289 
GetWideString(uint16_t codepage,const ByteStringView & bstr)290 WideString GetWideString(uint16_t codepage, const ByteStringView& bstr) {
291 #ifndef NDEBUG
292   ASSERT(IsValidWideCodePage(codepage));
293 #endif
294 
295   int src_len = bstr.GetLength();
296   int dest_len = FXSYS_MultiByteToWideChar(
297       codepage, 0, bstr.unterminated_c_str(), src_len, nullptr, 0);
298   if (!dest_len)
299     return WideString();
300 
301   WideString wstr;
302   wchar_t* dest_buf = wstr.GetBuffer(dest_len);
303   FXSYS_MultiByteToWideChar(codepage, 0, bstr.unterminated_c_str(), src_len,
304                             dest_buf, dest_len);
305   wstr.ReleaseBuffer(dest_len);
306   return wstr;
307 }
308 
309 }  // namespace
310 
311 namespace fxcrt {
312 
313 static_assert(sizeof(WideString) <= sizeof(wchar_t*),
314               "Strings must not require more space than pointers");
315 
316 // static
FormatV(const wchar_t * format,va_list argList)317 WideString WideString::FormatV(const wchar_t* format, va_list argList) {
318   va_list argListCopy;
319   va_copy(argListCopy, argList);
320   int maxLen = vswprintf(nullptr, 0, format, argListCopy);
321   va_end(argListCopy);
322 
323   if (maxLen <= 0) {
324     va_copy(argListCopy, argList);
325     auto guess = GuessSizeForVSWPrintf(format, argListCopy);
326     va_end(argListCopy);
327 
328     if (!guess.has_value())
329       return L"";
330     maxLen = pdfium::base::checked_cast<int>(guess.value());
331   }
332 
333   while (maxLen < 32 * 1024) {
334     va_copy(argListCopy, argList);
335     Optional<WideString> ret =
336         TryVSWPrintf(static_cast<size_t>(maxLen), format, argListCopy);
337     va_end(argListCopy);
338 
339     if (ret)
340       return *ret;
341     maxLen *= 2;
342   }
343   return L"";
344 }
345 
346 // static
Format(const wchar_t * pFormat,...)347 WideString WideString::Format(const wchar_t* pFormat, ...) {
348   va_list argList;
349   va_start(argList, pFormat);
350   WideString ret = FormatV(pFormat, argList);
351   va_end(argList);
352   return ret;
353 }
354 
WideString()355 WideString::WideString() {}
356 
WideString(const WideString & other)357 WideString::WideString(const WideString& other) : m_pData(other.m_pData) {}
358 
WideString(WideString && other)359 WideString::WideString(WideString&& other) noexcept {
360   m_pData.Swap(other.m_pData);
361 }
362 
WideString(const wchar_t * pStr,size_t nLen)363 WideString::WideString(const wchar_t* pStr, size_t nLen) {
364   if (nLen)
365     m_pData.Reset(StringData::Create(pStr, nLen));
366 }
367 
WideString(wchar_t ch)368 WideString::WideString(wchar_t ch) {
369   m_pData.Reset(StringData::Create(1));
370   m_pData->m_String[0] = ch;
371 }
372 
WideString(const wchar_t * ptr)373 WideString::WideString(const wchar_t* ptr)
374     : WideString(ptr, ptr ? wcslen(ptr) : 0) {}
375 
WideString(const WideStringView & stringSrc)376 WideString::WideString(const WideStringView& stringSrc) {
377   if (!stringSrc.IsEmpty()) {
378     m_pData.Reset(StringData::Create(stringSrc.unterminated_c_str(),
379                                      stringSrc.GetLength()));
380   }
381 }
382 
WideString(const WideStringView & str1,const WideStringView & str2)383 WideString::WideString(const WideStringView& str1, const WideStringView& str2) {
384   FX_SAFE_SIZE_T nSafeLen = str1.GetLength();
385   nSafeLen += str2.GetLength();
386 
387   size_t nNewLen = nSafeLen.ValueOrDie();
388   if (nNewLen == 0)
389     return;
390 
391   m_pData.Reset(StringData::Create(nNewLen));
392   m_pData->CopyContents(str1.unterminated_c_str(), str1.GetLength());
393   m_pData->CopyContentsAt(str1.GetLength(), str2.unterminated_c_str(),
394                           str2.GetLength());
395 }
396 
WideString(const std::initializer_list<WideStringView> & list)397 WideString::WideString(const std::initializer_list<WideStringView>& list) {
398   FX_SAFE_SIZE_T nSafeLen = 0;
399   for (const auto& item : list)
400     nSafeLen += item.GetLength();
401 
402   size_t nNewLen = nSafeLen.ValueOrDie();
403   if (nNewLen == 0)
404     return;
405 
406   m_pData.Reset(StringData::Create(nNewLen));
407 
408   size_t nOffset = 0;
409   for (const auto& item : list) {
410     m_pData->CopyContentsAt(nOffset, item.unterminated_c_str(),
411                             item.GetLength());
412     nOffset += item.GetLength();
413   }
414 }
415 
~WideString()416 WideString::~WideString() {}
417 
operator =(const wchar_t * pStr)418 const WideString& WideString::operator=(const wchar_t* pStr) {
419   if (!pStr || !pStr[0])
420     clear();
421   else
422     AssignCopy(pStr, wcslen(pStr));
423 
424   return *this;
425 }
426 
operator =(const WideStringView & stringSrc)427 const WideString& WideString::operator=(const WideStringView& stringSrc) {
428   if (stringSrc.IsEmpty())
429     clear();
430   else
431     AssignCopy(stringSrc.unterminated_c_str(), stringSrc.GetLength());
432 
433   return *this;
434 }
435 
operator =(const WideString & stringSrc)436 const WideString& WideString::operator=(const WideString& stringSrc) {
437   if (m_pData != stringSrc.m_pData)
438     m_pData = stringSrc.m_pData;
439 
440   return *this;
441 }
442 
operator +=(const wchar_t * pStr)443 const WideString& WideString::operator+=(const wchar_t* pStr) {
444   if (pStr)
445     Concat(pStr, wcslen(pStr));
446 
447   return *this;
448 }
449 
operator +=(wchar_t ch)450 const WideString& WideString::operator+=(wchar_t ch) {
451   Concat(&ch, 1);
452   return *this;
453 }
454 
operator +=(const WideString & str)455 const WideString& WideString::operator+=(const WideString& str) {
456   if (str.m_pData)
457     Concat(str.m_pData->m_String, str.m_pData->m_nDataLength);
458 
459   return *this;
460 }
461 
operator +=(const WideStringView & str)462 const WideString& WideString::operator+=(const WideStringView& str) {
463   if (!str.IsEmpty())
464     Concat(str.unterminated_c_str(), str.GetLength());
465 
466   return *this;
467 }
468 
operator ==(const wchar_t * ptr) const469 bool WideString::operator==(const wchar_t* ptr) const {
470   if (!m_pData)
471     return !ptr || !ptr[0];
472 
473   if (!ptr)
474     return m_pData->m_nDataLength == 0;
475 
476   return wcslen(ptr) == m_pData->m_nDataLength &&
477          wmemcmp(ptr, m_pData->m_String, m_pData->m_nDataLength) == 0;
478 }
479 
operator ==(const WideStringView & str) const480 bool WideString::operator==(const WideStringView& str) const {
481   if (!m_pData)
482     return str.IsEmpty();
483 
484   return m_pData->m_nDataLength == str.GetLength() &&
485          wmemcmp(m_pData->m_String, str.unterminated_c_str(),
486                  str.GetLength()) == 0;
487 }
488 
operator ==(const WideString & other) const489 bool WideString::operator==(const WideString& other) const {
490   if (m_pData == other.m_pData)
491     return true;
492 
493   if (IsEmpty())
494     return other.IsEmpty();
495 
496   if (other.IsEmpty())
497     return false;
498 
499   return other.m_pData->m_nDataLength == m_pData->m_nDataLength &&
500          wmemcmp(other.m_pData->m_String, m_pData->m_String,
501                  m_pData->m_nDataLength) == 0;
502 }
503 
operator <(const wchar_t * ptr) const504 bool WideString::operator<(const wchar_t* ptr) const {
505   return Compare(ptr) < 0;
506 }
507 
operator <(const WideStringView & str) const508 bool WideString::operator<(const WideStringView& str) const {
509   if (!m_pData && !str.unterminated_c_str())
510     return false;
511   if (c_str() == str.unterminated_c_str())
512     return false;
513 
514   size_t len = GetLength();
515   size_t other_len = str.GetLength();
516   int result =
517       wmemcmp(c_str(), str.unterminated_c_str(), std::min(len, other_len));
518   return result < 0 || (result == 0 && len < other_len);
519 }
520 
operator <(const WideString & other) const521 bool WideString::operator<(const WideString& other) const {
522   return Compare(other) < 0;
523 }
524 
AssignCopy(const wchar_t * pSrcData,size_t nSrcLen)525 void WideString::AssignCopy(const wchar_t* pSrcData, size_t nSrcLen) {
526   AllocBeforeWrite(nSrcLen);
527   m_pData->CopyContents(pSrcData, nSrcLen);
528   m_pData->m_nDataLength = nSrcLen;
529 }
530 
ReallocBeforeWrite(size_t nNewLength)531 void WideString::ReallocBeforeWrite(size_t nNewLength) {
532   if (m_pData && m_pData->CanOperateInPlace(nNewLength))
533     return;
534 
535   if (nNewLength == 0) {
536     clear();
537     return;
538   }
539 
540   RetainPtr<StringData> pNewData(StringData::Create(nNewLength));
541   if (m_pData) {
542     size_t nCopyLength = std::min(m_pData->m_nDataLength, nNewLength);
543     pNewData->CopyContents(m_pData->m_String, nCopyLength);
544     pNewData->m_nDataLength = nCopyLength;
545   } else {
546     pNewData->m_nDataLength = 0;
547   }
548   pNewData->m_String[pNewData->m_nDataLength] = 0;
549   m_pData.Swap(pNewData);
550 }
551 
AllocBeforeWrite(size_t nNewLength)552 void WideString::AllocBeforeWrite(size_t nNewLength) {
553   if (m_pData && m_pData->CanOperateInPlace(nNewLength))
554     return;
555 
556   if (nNewLength == 0) {
557     clear();
558     return;
559   }
560 
561   m_pData.Reset(StringData::Create(nNewLength));
562 }
563 
ReleaseBuffer(size_t nNewLength)564 void WideString::ReleaseBuffer(size_t nNewLength) {
565   if (!m_pData)
566     return;
567 
568   nNewLength = std::min(nNewLength, m_pData->m_nAllocLength);
569   if (nNewLength == 0) {
570     clear();
571     return;
572   }
573 
574   ASSERT(m_pData->m_nRefs == 1);
575   m_pData->m_nDataLength = nNewLength;
576   m_pData->m_String[nNewLength] = 0;
577   if (m_pData->m_nAllocLength - nNewLength >= 32) {
578     // Over arbitrary threshold, so pay the price to relocate.  Force copy to
579     // always occur by holding a second reference to the string.
580     WideString preserve(*this);
581     ReallocBeforeWrite(nNewLength);
582   }
583 }
584 
Reserve(size_t len)585 void WideString::Reserve(size_t len) {
586   GetBuffer(len);
587 }
588 
GetBuffer(size_t nMinBufLength)589 wchar_t* WideString::GetBuffer(size_t nMinBufLength) {
590   if (!m_pData) {
591     if (nMinBufLength == 0)
592       return nullptr;
593 
594     m_pData.Reset(StringData::Create(nMinBufLength));
595     m_pData->m_nDataLength = 0;
596     m_pData->m_String[0] = 0;
597     return m_pData->m_String;
598   }
599 
600   if (m_pData->CanOperateInPlace(nMinBufLength))
601     return m_pData->m_String;
602 
603   nMinBufLength = std::max(nMinBufLength, m_pData->m_nDataLength);
604   if (nMinBufLength == 0)
605     return nullptr;
606 
607   RetainPtr<StringData> pNewData(StringData::Create(nMinBufLength));
608   pNewData->CopyContents(*m_pData);
609   pNewData->m_nDataLength = m_pData->m_nDataLength;
610   m_pData.Swap(pNewData);
611   return m_pData->m_String;
612 }
613 
Delete(size_t index,size_t count)614 size_t WideString::Delete(size_t index, size_t count) {
615   if (!m_pData)
616     return 0;
617 
618   size_t old_length = m_pData->m_nDataLength;
619   if (count == 0 ||
620       index != pdfium::clamp(index, static_cast<size_t>(0), old_length))
621     return old_length;
622 
623   size_t removal_length = index + count;
624   if (removal_length > old_length)
625     return old_length;
626 
627   ReallocBeforeWrite(old_length);
628   size_t chars_to_copy = old_length - removal_length + 1;
629   wmemmove(m_pData->m_String + index, m_pData->m_String + removal_length,
630            chars_to_copy);
631   m_pData->m_nDataLength = old_length - count;
632   return m_pData->m_nDataLength;
633 }
634 
Concat(const wchar_t * pSrcData,size_t nSrcLen)635 void WideString::Concat(const wchar_t* pSrcData, size_t nSrcLen) {
636   if (!pSrcData || nSrcLen == 0)
637     return;
638 
639   if (!m_pData) {
640     m_pData.Reset(StringData::Create(pSrcData, nSrcLen));
641     return;
642   }
643 
644   if (m_pData->CanOperateInPlace(m_pData->m_nDataLength + nSrcLen)) {
645     m_pData->CopyContentsAt(m_pData->m_nDataLength, pSrcData, nSrcLen);
646     m_pData->m_nDataLength += nSrcLen;
647     return;
648   }
649 
650   RetainPtr<StringData> pNewData(
651       StringData::Create(m_pData->m_nDataLength + nSrcLen));
652   pNewData->CopyContents(*m_pData);
653   pNewData->CopyContentsAt(m_pData->m_nDataLength, pSrcData, nSrcLen);
654   m_pData.Swap(pNewData);
655 }
656 
UTF8Encode() const657 ByteString WideString::UTF8Encode() const {
658   return FX_UTF8Encode(AsStringView());
659 }
660 
UTF16LE_Encode() const661 ByteString WideString::UTF16LE_Encode() const {
662   if (!m_pData) {
663     return ByteString("\0\0", 2);
664   }
665   int len = m_pData->m_nDataLength;
666   ByteString result;
667   char* buffer = result.GetBuffer(len * 2 + 2);
668   for (int i = 0; i < len; i++) {
669     buffer[i * 2] = m_pData->m_String[i] & 0xff;
670     buffer[i * 2 + 1] = m_pData->m_String[i] >> 8;
671   }
672   buffer[len * 2] = 0;
673   buffer[len * 2 + 1] = 0;
674   result.ReleaseBuffer(len * 2 + 2);
675   return result;
676 }
677 
Mid(size_t first,size_t count) const678 WideString WideString::Mid(size_t first, size_t count) const {
679   if (!m_pData)
680     return WideString();
681 
682   if (!IsValidIndex(first))
683     return WideString();
684 
685   if (count == 0 || !IsValidLength(count))
686     return WideString();
687 
688   if (!IsValidIndex(first + count - 1))
689     return WideString();
690 
691   if (first == 0 && count == GetLength())
692     return *this;
693 
694   WideString dest;
695   AllocCopy(dest, count, first);
696   return dest;
697 }
698 
Left(size_t count) const699 WideString WideString::Left(size_t count) const {
700   if (count == 0 || !IsValidLength(count))
701     return WideString();
702   return Mid(0, count);
703 }
704 
Right(size_t count) const705 WideString WideString::Right(size_t count) const {
706   if (count == 0 || !IsValidLength(count))
707     return WideString();
708   return Mid(GetLength() - count, count);
709 }
710 
AllocCopy(WideString & dest,size_t nCopyLen,size_t nCopyIndex) const711 void WideString::AllocCopy(WideString& dest,
712                            size_t nCopyLen,
713                            size_t nCopyIndex) const {
714   if (nCopyLen == 0)
715     return;
716 
717   RetainPtr<StringData> pNewData(
718       StringData::Create(m_pData->m_String + nCopyIndex, nCopyLen));
719   dest.m_pData.Swap(pNewData);
720 }
721 
Insert(size_t location,wchar_t ch)722 size_t WideString::Insert(size_t location, wchar_t ch) {
723   const size_t cur_length = m_pData ? m_pData->m_nDataLength : 0;
724   if (!IsValidLength(location))
725     return cur_length;
726 
727   const size_t new_length = cur_length + 1;
728   ReallocBeforeWrite(new_length);
729   wmemmove(m_pData->m_String + location + 1, m_pData->m_String + location,
730            new_length - location);
731   m_pData->m_String[location] = ch;
732   m_pData->m_nDataLength = new_length;
733   return new_length;
734 }
735 
Find(wchar_t ch,size_t start) const736 Optional<size_t> WideString::Find(wchar_t ch, size_t start) const {
737   if (!m_pData)
738     return Optional<size_t>();
739 
740   if (!IsValidIndex(start))
741     return Optional<size_t>();
742 
743   const wchar_t* pStr =
744       wmemchr(m_pData->m_String + start, ch, m_pData->m_nDataLength - start);
745   return pStr ? Optional<size_t>(static_cast<size_t>(pStr - m_pData->m_String))
746               : Optional<size_t>();
747 }
748 
Find(const WideStringView & subStr,size_t start) const749 Optional<size_t> WideString::Find(const WideStringView& subStr,
750                                   size_t start) const {
751   if (!m_pData)
752     return Optional<size_t>();
753 
754   if (!IsValidIndex(start))
755     return Optional<size_t>();
756 
757   const wchar_t* pStr =
758       FX_wcsstr(m_pData->m_String + start, m_pData->m_nDataLength - start,
759                 subStr.unterminated_c_str(), subStr.GetLength());
760   return pStr ? Optional<size_t>(static_cast<size_t>(pStr - m_pData->m_String))
761               : Optional<size_t>();
762 }
763 
MakeLower()764 void WideString::MakeLower() {
765   if (!m_pData)
766     return;
767 
768   ReallocBeforeWrite(m_pData->m_nDataLength);
769   FXSYS_wcslwr(m_pData->m_String);
770 }
771 
MakeUpper()772 void WideString::MakeUpper() {
773   if (!m_pData)
774     return;
775 
776   ReallocBeforeWrite(m_pData->m_nDataLength);
777   FXSYS_wcsupr(m_pData->m_String);
778 }
779 
Remove(wchar_t chRemove)780 size_t WideString::Remove(wchar_t chRemove) {
781   if (!m_pData || m_pData->m_nDataLength < 1)
782     return 0;
783 
784   wchar_t* pstrSource = m_pData->m_String;
785   wchar_t* pstrEnd = m_pData->m_String + m_pData->m_nDataLength;
786   while (pstrSource < pstrEnd) {
787     if (*pstrSource == chRemove)
788       break;
789     pstrSource++;
790   }
791   if (pstrSource == pstrEnd)
792     return 0;
793 
794   ptrdiff_t copied = pstrSource - m_pData->m_String;
795   ReallocBeforeWrite(m_pData->m_nDataLength);
796   pstrSource = m_pData->m_String + copied;
797   pstrEnd = m_pData->m_String + m_pData->m_nDataLength;
798 
799   wchar_t* pstrDest = pstrSource;
800   while (pstrSource < pstrEnd) {
801     if (*pstrSource != chRemove) {
802       *pstrDest = *pstrSource;
803       pstrDest++;
804     }
805     pstrSource++;
806   }
807 
808   *pstrDest = 0;
809   size_t count = static_cast<size_t>(pstrSource - pstrDest);
810   m_pData->m_nDataLength -= count;
811   return count;
812 }
813 
Replace(const WideStringView & pOld,const WideStringView & pNew)814 size_t WideString::Replace(const WideStringView& pOld,
815                            const WideStringView& pNew) {
816   if (!m_pData || pOld.IsEmpty())
817     return 0;
818 
819   size_t nSourceLen = pOld.GetLength();
820   size_t nReplacementLen = pNew.GetLength();
821   size_t count = 0;
822   const wchar_t* pStart = m_pData->m_String;
823   wchar_t* pEnd = m_pData->m_String + m_pData->m_nDataLength;
824   while (1) {
825     const wchar_t* pTarget =
826         FX_wcsstr(pStart, static_cast<size_t>(pEnd - pStart),
827                   pOld.unterminated_c_str(), nSourceLen);
828     if (!pTarget)
829       break;
830 
831     count++;
832     pStart = pTarget + nSourceLen;
833   }
834   if (count == 0)
835     return 0;
836 
837   size_t nNewLength =
838       m_pData->m_nDataLength + (nReplacementLen - nSourceLen) * count;
839 
840   if (nNewLength == 0) {
841     clear();
842     return count;
843   }
844 
845   RetainPtr<StringData> pNewData(StringData::Create(nNewLength));
846   pStart = m_pData->m_String;
847   wchar_t* pDest = pNewData->m_String;
848   for (size_t i = 0; i < count; i++) {
849     const wchar_t* pTarget =
850         FX_wcsstr(pStart, static_cast<size_t>(pEnd - pStart),
851                   pOld.unterminated_c_str(), nSourceLen);
852     wmemcpy(pDest, pStart, pTarget - pStart);
853     pDest += pTarget - pStart;
854     wmemcpy(pDest, pNew.unterminated_c_str(), pNew.GetLength());
855     pDest += pNew.GetLength();
856     pStart = pTarget + nSourceLen;
857   }
858   wmemcpy(pDest, pStart, pEnd - pStart);
859   m_pData.Swap(pNewData);
860   return count;
861 }
862 
863 // static
FromLocal(const ByteStringView & str)864 WideString WideString::FromLocal(const ByteStringView& str) {
865   return FromCodePage(str, 0);
866 }
867 
868 // static
FromCodePage(const ByteStringView & str,uint16_t codepage)869 WideString WideString::FromCodePage(const ByteStringView& str,
870                                     uint16_t codepage) {
871   return GetWideString(codepage, str);
872 }
873 
874 // static
FromUTF8(const ByteStringView & str)875 WideString WideString::FromUTF8(const ByteStringView& str) {
876   if (str.IsEmpty())
877     return WideString();
878 
879   CFX_UTF8Decoder decoder;
880   for (size_t i = 0; i < str.GetLength(); i++)
881     decoder.Input(str[i]);
882 
883   return WideString(decoder.GetResult());
884 }
885 
886 // static
FromUTF16LE(const unsigned short * wstr,size_t wlen)887 WideString WideString::FromUTF16LE(const unsigned short* wstr, size_t wlen) {
888   if (!wstr || wlen == 0) {
889     return WideString();
890   }
891 
892   WideString result;
893   wchar_t* buf = result.GetBuffer(wlen);
894   for (size_t i = 0; i < wlen; i++) {
895     buf[i] = wstr[i];
896   }
897   result.ReleaseBuffer(wlen);
898   return result;
899 }
900 
SetAt(size_t index,wchar_t c)901 void WideString::SetAt(size_t index, wchar_t c) {
902   ASSERT(IsValidIndex(index));
903   ReallocBeforeWrite(m_pData->m_nDataLength);
904   m_pData->m_String[index] = c;
905 }
906 
Compare(const wchar_t * lpsz) const907 int WideString::Compare(const wchar_t* lpsz) const {
908   if (m_pData)
909     return lpsz ? wcscmp(m_pData->m_String, lpsz) : 1;
910   return (!lpsz || lpsz[0] == 0) ? 0 : -1;
911 }
912 
Compare(const WideString & str) const913 int WideString::Compare(const WideString& str) const {
914   if (!m_pData)
915     return str.m_pData ? -1 : 0;
916   if (!str.m_pData)
917     return 1;
918 
919   size_t this_len = m_pData->m_nDataLength;
920   size_t that_len = str.m_pData->m_nDataLength;
921   size_t min_len = std::min(this_len, that_len);
922   int result = wmemcmp(m_pData->m_String, str.m_pData->m_String, min_len);
923   if (result != 0)
924     return result;
925   if (this_len == that_len)
926     return 0;
927   return this_len < that_len;
928 }
929 
CompareNoCase(const wchar_t * lpsz) const930 int WideString::CompareNoCase(const wchar_t* lpsz) const {
931   if (m_pData)
932     return lpsz ? FXSYS_wcsicmp(m_pData->m_String, lpsz) : 1;
933   return (!lpsz || lpsz[0] == 0) ? 0 : -1;
934 }
935 
WStringLength(const unsigned short * str)936 size_t WideString::WStringLength(const unsigned short* str) {
937   size_t len = 0;
938   if (str)
939     while (str[len])
940       len++;
941   return len;
942 }
943 
Trim()944 void WideString::Trim() {
945   TrimRight(kWideTrimChars);
946   TrimLeft(kWideTrimChars);
947 }
948 
Trim(wchar_t target)949 void WideString::Trim(wchar_t target) {
950   wchar_t str[2] = {target, 0};
951   TrimRight(str);
952   TrimLeft(str);
953 }
954 
Trim(const WideStringView & targets)955 void WideString::Trim(const WideStringView& targets) {
956   TrimRight(targets);
957   TrimLeft(targets);
958 }
959 
TrimLeft()960 void WideString::TrimLeft() {
961   TrimLeft(kWideTrimChars);
962 }
963 
TrimLeft(wchar_t target)964 void WideString::TrimLeft(wchar_t target) {
965   wchar_t str[2] = {target, 0};
966   TrimLeft(str);
967 }
968 
TrimLeft(const WideStringView & targets)969 void WideString::TrimLeft(const WideStringView& targets) {
970   if (!m_pData || targets.IsEmpty())
971     return;
972 
973   size_t len = GetLength();
974   if (len == 0)
975     return;
976 
977   size_t pos = 0;
978   while (pos < len) {
979     size_t i = 0;
980     while (i < targets.GetLength() &&
981            targets.CharAt(i) != m_pData->m_String[pos]) {
982       i++;
983     }
984     if (i == targets.GetLength())
985       break;
986     pos++;
987   }
988   if (!pos)
989     return;
990 
991   ReallocBeforeWrite(len);
992   size_t nDataLength = len - pos;
993   memmove(m_pData->m_String, m_pData->m_String + pos,
994           (nDataLength + 1) * sizeof(wchar_t));
995   m_pData->m_nDataLength = nDataLength;
996 }
997 
TrimRight()998 void WideString::TrimRight() {
999   TrimRight(kWideTrimChars);
1000 }
1001 
TrimRight(wchar_t target)1002 void WideString::TrimRight(wchar_t target) {
1003   wchar_t str[2] = {target, 0};
1004   TrimRight(str);
1005 }
1006 
TrimRight(const WideStringView & targets)1007 void WideString::TrimRight(const WideStringView& targets) {
1008   if (IsEmpty() || targets.IsEmpty())
1009     return;
1010 
1011   size_t pos = GetLength();
1012   while (pos && targets.Contains(m_pData->m_String[pos - 1]))
1013     pos--;
1014 
1015   if (pos < m_pData->m_nDataLength) {
1016     ReallocBeforeWrite(m_pData->m_nDataLength);
1017     m_pData->m_String[pos] = 0;
1018     m_pData->m_nDataLength = pos;
1019   }
1020 }
1021 
FX_wtof(const wchar_t * str,int len)1022 float FX_wtof(const wchar_t* str, int len) {
1023   if (len == 0) {
1024     return 0.0;
1025   }
1026   int cc = 0;
1027   bool bNegative = false;
1028   if (str[0] == '+') {
1029     cc++;
1030   } else if (str[0] == '-') {
1031     bNegative = true;
1032     cc++;
1033   }
1034   int integer = 0;
1035   while (cc < len) {
1036     if (str[cc] == '.') {
1037       break;
1038     }
1039     integer = integer * 10 + FXSYS_DecimalCharToInt(str[cc]);
1040     cc++;
1041   }
1042   float fraction = 0;
1043   if (str[cc] == '.') {
1044     cc++;
1045     float scale = 0.1f;
1046     while (cc < len) {
1047       fraction += scale * FXSYS_DecimalCharToInt(str[cc]);
1048       scale *= 0.1f;
1049       cc++;
1050     }
1051   }
1052   fraction += static_cast<float>(integer);
1053   return bNegative ? -fraction : fraction;
1054 }
1055 
GetInteger() const1056 int WideString::GetInteger() const {
1057   return m_pData ? FXSYS_wtoi(m_pData->m_String) : 0;
1058 }
1059 
GetFloat() const1060 float WideString::GetFloat() const {
1061   return m_pData ? FX_wtof(m_pData->m_String, m_pData->m_nDataLength) : 0.0f;
1062 }
1063 
operator <<(std::wostream & os,const WideString & str)1064 std::wostream& operator<<(std::wostream& os, const WideString& str) {
1065   return os.write(str.c_str(), str.GetLength());
1066 }
1067 
operator <<(std::ostream & os,const WideString & str)1068 std::ostream& operator<<(std::ostream& os, const WideString& str) {
1069   os << str.UTF8Encode();
1070   return os;
1071 }
1072 
operator <<(std::wostream & os,const WideStringView & str)1073 std::wostream& operator<<(std::wostream& os, const WideStringView& str) {
1074   return os.write(str.unterminated_c_str(), str.GetLength());
1075 }
1076 
operator <<(std::ostream & os,const WideStringView & str)1077 std::ostream& operator<<(std::ostream& os, const WideStringView& str) {
1078   os << FX_UTF8Encode(str);
1079   return os;
1080 }
1081 
1082 }  // namespace fxcrt
1083