1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 // bytesinkutil.cpp
5 // created: 2017sep14 Markus W. Scherer
6 
7 #include "unicode/utypes.h"
8 #include "unicode/bytestream.h"
9 #include "unicode/edits.h"
10 #include "unicode/stringoptions.h"
11 #include "unicode/utf8.h"
12 #include "unicode/utf16.h"
13 #include "bytesinkutil.h"
14 #include "charstr.h"
15 #include "cmemory.h"
16 #include "uassert.h"
17 
18 U_NAMESPACE_BEGIN
19 
20 UBool
appendChange(int32_t length,const char16_t * s16,int32_t s16Length,ByteSink & sink,Edits * edits,UErrorCode & errorCode)21 ByteSinkUtil::appendChange(int32_t length, const char16_t *s16, int32_t s16Length,
22                            ByteSink &sink, Edits *edits, UErrorCode &errorCode) {
23     if (U_FAILURE(errorCode)) { return FALSE; }
24     char scratch[200];
25     int32_t s8Length = 0;
26     for (int32_t i = 0; i < s16Length;) {
27         int32_t capacity;
28         int32_t desiredCapacity = s16Length - i;
29         if (desiredCapacity < (INT32_MAX / 3)) {
30             desiredCapacity *= 3;  // max 3 UTF-8 bytes per UTF-16 code unit
31         } else if (desiredCapacity < (INT32_MAX / 2)) {
32             desiredCapacity *= 2;
33         } else {
34             desiredCapacity = INT32_MAX;
35         }
36         char *buffer = sink.GetAppendBuffer(U8_MAX_LENGTH, desiredCapacity,
37                                             scratch, UPRV_LENGTHOF(scratch), &capacity);
38         capacity -= U8_MAX_LENGTH - 1;
39         int32_t j = 0;
40         for (; i < s16Length && j < capacity;) {
41             UChar32 c;
42             U16_NEXT_UNSAFE(s16, i, c);
43             U8_APPEND_UNSAFE(buffer, j, c);
44         }
45         if (j > (INT32_MAX - s8Length)) {
46             errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
47             return FALSE;
48         }
49         sink.Append(buffer, j);
50         s8Length += j;
51     }
52     if (edits != nullptr) {
53         edits->addReplace(length, s8Length);
54     }
55     return TRUE;
56 }
57 
58 UBool
appendChange(const uint8_t * s,const uint8_t * limit,const char16_t * s16,int32_t s16Length,ByteSink & sink,Edits * edits,UErrorCode & errorCode)59 ByteSinkUtil::appendChange(const uint8_t *s, const uint8_t *limit,
60                            const char16_t *s16, int32_t s16Length,
61                            ByteSink &sink, Edits *edits, UErrorCode &errorCode) {
62     if (U_FAILURE(errorCode)) { return FALSE; }
63     if ((limit - s) > INT32_MAX) {
64         errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
65         return FALSE;
66     }
67     return appendChange((int32_t)(limit - s), s16, s16Length, sink, edits, errorCode);
68 }
69 
70 void
appendCodePoint(int32_t length,UChar32 c,ByteSink & sink,Edits * edits)71 ByteSinkUtil::appendCodePoint(int32_t length, UChar32 c, ByteSink &sink, Edits *edits) {
72     char s8[U8_MAX_LENGTH];
73     int32_t s8Length = 0;
74     U8_APPEND_UNSAFE(s8, s8Length, c);
75     if (edits != nullptr) {
76         edits->addReplace(length, s8Length);
77     }
78     sink.Append(s8, s8Length);
79 }
80 
81 namespace {
82 
83 // See unicode/utf8.h U8_APPEND_UNSAFE().
getTwoByteLead(UChar32 c)84 inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); }
getTwoByteTrail(UChar32 c)85 inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); }
86 
87 }  // namespace
88 
89 void
appendTwoBytes(UChar32 c,ByteSink & sink)90 ByteSinkUtil::appendTwoBytes(UChar32 c, ByteSink &sink) {
91     U_ASSERT(0x80 <= c && c <= 0x7ff);  // 2-byte UTF-8
92     char s8[2] = { (char)getTwoByteLead(c), (char)getTwoByteTrail(c) };
93     sink.Append(s8, 2);
94 }
95 
96 void
appendNonEmptyUnchanged(const uint8_t * s,int32_t length,ByteSink & sink,uint32_t options,Edits * edits)97 ByteSinkUtil::appendNonEmptyUnchanged(const uint8_t *s, int32_t length,
98                                       ByteSink &sink, uint32_t options, Edits *edits) {
99     U_ASSERT(length > 0);
100     if (edits != nullptr) {
101         edits->addUnchanged(length);
102     }
103     if ((options & U_OMIT_UNCHANGED_TEXT) == 0) {
104         sink.Append(reinterpret_cast<const char *>(s), length);
105     }
106 }
107 
108 UBool
appendUnchanged(const uint8_t * s,const uint8_t * limit,ByteSink & sink,uint32_t options,Edits * edits,UErrorCode & errorCode)109 ByteSinkUtil::appendUnchanged(const uint8_t *s, const uint8_t *limit,
110                               ByteSink &sink, uint32_t options, Edits *edits,
111                               UErrorCode &errorCode) {
112     if (U_FAILURE(errorCode)) { return FALSE; }
113     if ((limit - s) > INT32_MAX) {
114         errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
115         return FALSE;
116     }
117     int32_t length = (int32_t)(limit - s);
118     if (length > 0) {
119         appendNonEmptyUnchanged(s, length, sink, options, edits);
120     }
121     return TRUE;
122 }
123 
CharStringByteSink(CharString * dest)124 CharStringByteSink::CharStringByteSink(CharString* dest) : dest_(*dest) {
125 }
126 
127 CharStringByteSink::~CharStringByteSink() = default;
128 
129 void
Append(const char * bytes,int32_t n)130 CharStringByteSink::Append(const char* bytes, int32_t n) {
131     UErrorCode status = U_ZERO_ERROR;
132     dest_.append(bytes, n, status);
133     // Any errors are silently ignored.
134 }
135 
136 char*
GetAppendBuffer(int32_t min_capacity,int32_t desired_capacity_hint,char * scratch,int32_t scratch_capacity,int32_t * result_capacity)137 CharStringByteSink::GetAppendBuffer(int32_t min_capacity,
138                                     int32_t desired_capacity_hint,
139                                     char* scratch,
140                                     int32_t scratch_capacity,
141                                     int32_t* result_capacity) {
142     if (min_capacity < 1 || scratch_capacity < min_capacity) {
143         *result_capacity = 0;
144         return nullptr;
145     }
146 
147     UErrorCode status = U_ZERO_ERROR;
148     char* result = dest_.getAppendBuffer(
149             min_capacity,
150             desired_capacity_hint,
151             *result_capacity,
152             status);
153     if (U_SUCCESS(status)) {
154         return result;
155     }
156 
157     *result_capacity = scratch_capacity;
158     return scratch;
159 }
160 
161 U_NAMESPACE_END
162