1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 *   Copyright (C) 2001-2009, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 **********************************************************************
8 *   Date        Name        Description
9 *   05/23/00    aliu        Creation.
10 **********************************************************************
11 */
12 
13 #include <algorithm>
14 #include <vector>
15 #include "unicode/utypes.h"
16 #include "unicode/edits.h"
17 #include "unicode/unistr.h"
18 #include "unicode/utf16.h"
19 #include "cmemory.h"
20 #include "testutil.h"
21 #include "intltest.h"
22 
23 static const UChar HEX[] = u"0123456789ABCDEF";
24 
appendHex(UnicodeString & buf,UChar32 ch)25 UnicodeString &TestUtility::appendHex(UnicodeString &buf, UChar32 ch) {
26     if (ch >= 0x10000) {
27         if (ch >= 0x100000) {
28             buf.append(HEX[0xF&(ch>>20)]);
29         }
30         buf.append(HEX[0xF&(ch>>16)]);
31     }
32     buf.append(HEX[0xF&(ch>>12)]);
33     buf.append(HEX[0xF&(ch>>8)]);
34     buf.append(HEX[0xF&(ch>>4)]);
35     buf.append(HEX[0xF&ch]);
36     return buf;
37 }
38 
hex(UChar32 ch)39 UnicodeString TestUtility::hex(UChar32 ch) {
40     UnicodeString buf;
41     appendHex(buf, ch);
42     return buf;
43 }
44 
hex(const UnicodeString & s)45 UnicodeString TestUtility::hex(const UnicodeString& s) {
46     return hex(s, u',');
47 }
48 
hex(const UnicodeString & s,UChar sep)49 UnicodeString TestUtility::hex(const UnicodeString& s, UChar sep) {
50     UnicodeString result;
51     if (s.isEmpty()) return result;
52     UChar32 c;
53     for (int32_t i = 0; i < s.length(); i += U16_LENGTH(c)) {
54         c = s.char32At(i);
55         if (i > 0) {
56             result.append(sep);
57         }
58         appendHex(result, c);
59     }
60     return result;
61 }
62 
hex(const uint8_t * bytes,int32_t len)63 UnicodeString TestUtility::hex(const uint8_t* bytes, int32_t len) {
64     UnicodeString buf;
65     for (int32_t i = 0; i < len; ++i) {
66         buf.append(HEX[0x0F & (bytes[i] >> 4)]);
67         buf.append(HEX[0x0F & bytes[i]]);
68     }
69     return buf;
70 }
71 
72 namespace {
73 
printOneEdit(const Edits::Iterator & ei)74 UnicodeString printOneEdit(const Edits::Iterator &ei) {
75     if (ei.hasChange()) {
76         return UnicodeString() + ei.oldLength() + u"->" + ei.newLength();
77     } else {
78         return UnicodeString() + ei.oldLength() + u"=" + ei.newLength();
79     }
80 }
81 
82 /**
83  * Maps indexes according to the expected edits.
84  * A destination index can occur multiple times when there are source deletions.
85  * Map according to the last occurrence, normally in a non-empty destination span.
86  * Simplest is to search from the back.
87  */
srcIndexFromDest(const EditChange expected[],int32_t expLength,int32_t srcLength,int32_t destLength,int32_t index)88 int32_t srcIndexFromDest(const EditChange expected[], int32_t expLength,
89                          int32_t srcLength, int32_t destLength, int32_t index) {
90     int32_t srcIndex = srcLength;
91     int32_t destIndex = destLength;
92     int32_t i = expLength;
93     while (index < destIndex && i > 0) {
94         --i;
95         int32_t prevSrcIndex = srcIndex - expected[i].oldLength;
96         int32_t prevDestIndex = destIndex - expected[i].newLength;
97         if (index == prevDestIndex) {
98             return prevSrcIndex;
99         } else if (index > prevDestIndex) {
100             if (expected[i].change) {
101                 // In a change span, map to its end.
102                 return srcIndex;
103             } else {
104                 // In an unchanged span, offset within it.
105                 return prevSrcIndex + (index - prevDestIndex);
106             }
107         }
108         srcIndex = prevSrcIndex;
109         destIndex = prevDestIndex;
110     }
111     // index is outside the string.
112     return srcIndex;
113 }
114 
destIndexFromSrc(const EditChange expected[],int32_t expLength,int32_t srcLength,int32_t destLength,int32_t index)115 int32_t destIndexFromSrc(const EditChange expected[], int32_t expLength,
116                          int32_t srcLength, int32_t destLength, int32_t index) {
117     int32_t srcIndex = srcLength;
118     int32_t destIndex = destLength;
119     int32_t i = expLength;
120     while (index < srcIndex && i > 0) {
121         --i;
122         int32_t prevSrcIndex = srcIndex - expected[i].oldLength;
123         int32_t prevDestIndex = destIndex - expected[i].newLength;
124         if (index == prevSrcIndex) {
125             return prevDestIndex;
126         } else if (index > prevSrcIndex) {
127             if (expected[i].change) {
128                 // In a change span, map to its end.
129                 return destIndex;
130             } else {
131                 // In an unchanged span, offset within it.
132                 return prevDestIndex + (index - prevSrcIndex);
133             }
134         }
135         srcIndex = prevSrcIndex;
136         destIndex = prevDestIndex;
137     }
138     // index is outside the string.
139     return destIndex;
140 }
141 
142 }  // namespace
143 
144 // For debugging, set -v to see matching edits up to a failure.
checkEqualEdits(IntlTest & test,const UnicodeString & name,const Edits & e1,const Edits & e2,UErrorCode & errorCode)145 UBool TestUtility::checkEqualEdits(IntlTest &test, const UnicodeString &name,
146                                    const Edits &e1, const Edits &e2, UErrorCode &errorCode) {
147     Edits::Iterator ei1 = e1.getFineIterator();
148     Edits::Iterator ei2 = e2.getFineIterator();
149     UBool ok = TRUE;
150     for (int32_t i = 0; ok; ++i) {
151         UBool ei1HasNext = ei1.next(errorCode);
152         UBool ei2HasNext = ei2.next(errorCode);
153         ok &= test.assertEquals(name + u" next()[" + i + u"]" + __LINE__,
154                                 ei1HasNext, ei2HasNext);
155         ok &= test.assertSuccess(name + u" errorCode[" + i + u"]" + __LINE__, errorCode);
156         ok &= test.assertEquals(name + u" edit[" + i + u"]" + __LINE__,
157                                 printOneEdit(ei1), printOneEdit(ei2));
158         if (!ei1HasNext || !ei2HasNext) {
159             break;
160         }
161         test.logln();
162     }
163     return ok;
164 }
165 
checkEditsIter(IntlTest & test,const UnicodeString & name,Edits::Iterator ei1,Edits::Iterator ei2,const EditChange expected[],int32_t expLength,UBool withUnchanged,UErrorCode & errorCode)166 void TestUtility::checkEditsIter(
167         IntlTest &test,
168         const UnicodeString &name,
169         Edits::Iterator ei1, Edits::Iterator ei2,  // two equal iterators
170         const EditChange expected[], int32_t expLength, UBool withUnchanged,
171         UErrorCode &errorCode) {
172     test.assertFalse(name + u":" + __LINE__, ei2.findSourceIndex(-1, errorCode));
173     test.assertFalse(name + u":" + __LINE__, ei2.findDestinationIndex(-1, errorCode));
174 
175     int32_t expSrcIndex = 0;
176     int32_t expDestIndex = 0;
177     int32_t expReplIndex = 0;
178     for (int32_t expIndex = 0; expIndex < expLength; ++expIndex) {
179         const EditChange &expect = expected[expIndex];
180         UnicodeString msg = UnicodeString(name).append(u' ') + expIndex;
181         if (withUnchanged || expect.change) {
182             test.assertTrue(msg + u":" + __LINE__, ei1.next(errorCode));
183             test.assertEquals(msg + u":" + __LINE__, expect.change, ei1.hasChange());
184             test.assertEquals(msg + u":" + __LINE__, expect.oldLength, ei1.oldLength());
185             test.assertEquals(msg + u":" + __LINE__, expect.newLength, ei1.newLength());
186             test.assertEquals(msg + u":" + __LINE__, expSrcIndex, ei1.sourceIndex());
187             test.assertEquals(msg + u":" + __LINE__, expDestIndex, ei1.destinationIndex());
188             test.assertEquals(msg + u":" + __LINE__, expReplIndex, ei1.replacementIndex());
189         }
190 
191         if (expect.oldLength > 0) {
192             test.assertTrue(msg + u":" + __LINE__, ei2.findSourceIndex(expSrcIndex, errorCode));
193             test.assertEquals(msg + u":" + __LINE__, expect.change, ei2.hasChange());
194             test.assertEquals(msg + u":" + __LINE__, expect.oldLength, ei2.oldLength());
195             test.assertEquals(msg + u":" + __LINE__, expect.newLength, ei2.newLength());
196             test.assertEquals(msg + u":" + __LINE__, expSrcIndex, ei2.sourceIndex());
197             test.assertEquals(msg + u":" + __LINE__, expDestIndex, ei2.destinationIndex());
198             test.assertEquals(msg + u":" + __LINE__, expReplIndex, ei2.replacementIndex());
199             if (!withUnchanged) {
200                 // For some iterators, move past the current range
201                 // so that findSourceIndex() has to look before the current index.
202                 ei2.next(errorCode);
203                 ei2.next(errorCode);
204             }
205         }
206 
207         if (expect.newLength > 0) {
208             test.assertTrue(msg + u":" + __LINE__, ei2.findDestinationIndex(expDestIndex, errorCode));
209             test.assertEquals(msg + u":" + __LINE__, expect.change, ei2.hasChange());
210             test.assertEquals(msg + u":" + __LINE__, expect.oldLength, ei2.oldLength());
211             test.assertEquals(msg + u":" + __LINE__, expect.newLength, ei2.newLength());
212             test.assertEquals(msg + u":" + __LINE__, expSrcIndex, ei2.sourceIndex());
213             test.assertEquals(msg + u":" + __LINE__, expDestIndex, ei2.destinationIndex());
214             test.assertEquals(msg + u":" + __LINE__, expReplIndex, ei2.replacementIndex());
215             if (!withUnchanged) {
216                 // For some iterators, move past the current range
217                 // so that findSourceIndex() has to look before the current index.
218                 ei2.next(errorCode);
219                 ei2.next(errorCode);
220             }
221         }
222 
223         expSrcIndex += expect.oldLength;
224         expDestIndex += expect.newLength;
225         if (expect.change) {
226             expReplIndex += expect.newLength;
227         }
228     }
229     UnicodeString msg = UnicodeString(name).append(u" end");
230     test.assertFalse(msg + u":" + __LINE__, ei1.next(errorCode));
231     test.assertFalse(msg + u":" + __LINE__, ei1.hasChange());
232     test.assertEquals(msg + u":" + __LINE__, 0, ei1.oldLength());
233     test.assertEquals(msg + u":" + __LINE__, 0, ei1.newLength());
234     test.assertEquals(msg + u":" + __LINE__, expSrcIndex, ei1.sourceIndex());
235     test.assertEquals(msg + u":" + __LINE__, expDestIndex, ei1.destinationIndex());
236     test.assertEquals(msg + u":" + __LINE__, expReplIndex, ei1.replacementIndex());
237 
238     test.assertFalse(name + u":" + __LINE__, ei2.findSourceIndex(expSrcIndex, errorCode));
239     test.assertFalse(name + u":" + __LINE__, ei2.findDestinationIndex(expDestIndex, errorCode));
240 
241     // Check mapping of all indexes against a simple implementation
242     // that works on the expected changes.
243     // Iterate once forward, once backward, to cover more runtime conditions.
244     int32_t srcLength = expSrcIndex;
245     int32_t destLength = expDestIndex;
246     std::vector<int32_t> srcIndexes;
247     std::vector<int32_t> destIndexes;
248     srcIndexes.push_back(-1);
249     destIndexes.push_back(-1);
250     int32_t srcIndex = 0;
251     int32_t destIndex = 0;
252     for (int32_t i = 0; i < expLength; ++i) {
253         if (expected[i].oldLength > 0) {
254             srcIndexes.push_back(srcIndex);
255             if (expected[i].oldLength > 1) {
256                 srcIndexes.push_back(srcIndex + 1);
257                 if (expected[i].oldLength > 2) {
258                     srcIndexes.push_back(srcIndex + expected[i].oldLength - 1);
259                 }
260             }
261         }
262         if (expected[i].newLength > 0) {
263             destIndexes.push_back(destIndex);
264             if (expected[i].newLength > 1) {
265                 destIndexes.push_back(destIndex + 1);
266                 if (expected[i].newLength > 2) {
267                     destIndexes.push_back(destIndex + expected[i].newLength - 1);
268                 }
269             }
270         }
271         srcIndex += expected[i].oldLength;
272         destIndex += expected[i].newLength;
273     }
274     srcIndexes.push_back(srcLength);
275     destIndexes.push_back(destLength);
276     srcIndexes.push_back(srcLength + 1);
277     destIndexes.push_back(destLength + 1);
278     std::reverse(destIndexes.begin(), destIndexes.end());
279     // Zig-zag across the indexes to stress next() <-> previous().
280     static const int32_t ZIG_ZAG[] = { 0, 1, 2, 3, 2, 1 };
281     for (auto i = 0; i < (int32_t)srcIndexes.size(); ++i) {
282         for (int32_t ij = 0; ij < UPRV_LENGTHOF(ZIG_ZAG); ++ij) {
283             int32_t j = ZIG_ZAG[ij];
284             if ((i + j) < (int32_t)srcIndexes.size()) {
285                 int32_t si = srcIndexes[i + j];
286                 test.assertEquals(name + u" destIndexFromSrc(" + si + u"):" + __LINE__,
287                                   destIndexFromSrc(expected, expLength, srcLength, destLength, si),
288                                   ei2.destinationIndexFromSourceIndex(si, errorCode));
289             }
290         }
291     }
292     for (auto i = 0; i < (int32_t)destIndexes.size(); ++i) {
293         for (int32_t ij = 0; ij < UPRV_LENGTHOF(ZIG_ZAG); ++ij) {
294             int32_t j = ZIG_ZAG[ij];
295             if ((i + j) < (int32_t)destIndexes.size()) {
296                 int32_t di = destIndexes[i + j];
297                 test.assertEquals(name + u" srcIndexFromDest(" + di + u"):" + __LINE__,
298                                   srcIndexFromDest(expected, expLength, srcLength, destLength, di),
299                                   ei2.sourceIndexFromDestinationIndex(di, errorCode));
300             }
301         }
302     }
303 }
304