• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 <?xml version="1.0" encoding="UTF-8"?>
2 
3 <!-- Copyright (c) 2007-2009 IBM Corporation and others. All rights reserved -->
4 
5 <!-- Test data file for string search  -->
6 <!DOCTYPE stringsearch-tests [
7 <!ELEMENT stringsearch-tests (test-case+)>
8 <!ATTLIST stringsearch-tests debug IDREF #IMPLIED >
9 <!ELEMENT test-case (pattern, pre?, m?, post?)>
10 <!ATTLIST test-case
11           id ID #REQUIRED
12           locale CDATA "en"
13           strength (PRIMARY | SECONDARY | TERTIARY | QUATERNARY | IDENTICAL) "TERTIARY"
14           norm (ON | OFF) "OFF"
15           alternate_handling (NON_IGNORABLE | SHIFTED) "NON_IGNORABLE"
16           >
17 
18 <!ELEMENT pattern (#PCDATA)>
19 <!ELEMENT pre  (#PCDATA)>
20 <!ELEMENT m    (#PCDATA)>
21 <!ELEMENT post (#PCDATA)>
22 ]>
23 
24 <stringsearch-tests>
25   <!-- debug="test11"     (for copying into the above element)  -->
26 
27     <!-- Very simple match  -->
28     <test-case id="test01" >
29        <pattern>abc</pattern>
30        <pre>xxx</pre><m>abc</m><post>yyy</post>
31     </test-case>
32 
33     <!-- Very simple no-match  -->
34     <test-case id="test02" >
35        <pattern>abc</pattern>
36        <pre>xxx</pre><post>yyy</post>
37     </test-case>
38 
39     <!-- Match after several near-misses. -->
40     <test-case id="test03" >
41        <pattern>string</pattern>
42        <pre>silly spring stling strxng strilg strinx stri</pre><m>string</m><post> fling</post>
43     </test-case>
44 
45     <test-case id="test04" strength="PRIMARY" >
46        <pattern>FUSS</pattern>
47        <pre>abc</pre><m>fuss</m><post>sss</post>
48     </test-case>
49 
50     <test-case id="test05" strength="PRIMARY" >
51        <pattern>FUSS</pattern>
52        <pre>abc</pre><m>fuß</m><post>sss</post>
53     </test-case>
54 
55   <test-case id="test05.5" strength="PRIMARY" >
56     <pattern>fuss</pattern>
57     <pre>a </pre>
58     <m>fuß</m>
59     <post>ball table</post>
60   </test-case>
61 
62   <test-case id="test06" strength="PRIMARY" >
63       <pattern>fuß</pattern>
64        <pre>abc</pre><m>fuss</m><post>xyz</post>
65     </test-case>
66 
67     <test-case id="test07" strength="SECONDARY" >
68       <pattern>fuß</pattern>
69       <pre>abcfussxyz</pre>
70     </test-case>
71 
72     <test-case id="test08" strength="PRIMARY" >
73       <pattern>fus</pattern>
74       <pre>abcfuß</pre><post>xyz</post>
75     </test-case>
76 
77     <!-- A good match following an initial match that failed because
78          of not ending on a character boundary -->
79     <test-case id="test09" strength="PRIMARY">
80       <pattern>fus</pattern>
81       <pre>fuß  </pre><m>fus</m><post>sss</post>
82     </test-case>
83 
84 
85     <!-- Test cases from usrchdat.c  BREAKITERATOREXACT -->
86 
87     <test-case id="test10" strength="TERTIARY">
88       <pattern>fox</pattern>
89       <m>fox</m><post>y fox</post>
90     </test-case>
91 
92     <test-case id="test11" strength="PRIMARY" locale="de_DE@collation=phonebook">
93       <pattern>toe</pattern>
94       <pre>This is a </pre><m>Tö</m><post>ne</post>
95     </test-case>
96 
97     <test-case id="test11a" strength="SECONDARY" locale="de_DE@collation=phonebook">
98       <pattern>toe</pattern>
99       <pre>This is a </pre><post>Töne</post>
100     </test-case>
101 
102     <test-case id="test12" strength="TERTIARY">
103       <pattern>e</pattern>
104       <pre>tésting that é doés not match </pre><m>e</m><post></post>
105     </test-case>
106 
107     <test-case id="test13" strength="PRIMARY" locale="fr">
108       <pattern>e</pattern>
109       <pre></pre><m>É</m><post>É</post>
110     </test-case>
111 
112     <test-case id="test14" strength="PRIMARY" locale="fr">
113       <pattern>O</pattern>
114       <pre>C</pre><m>O\u0302</m><post>TÉ</post>
115     </test-case>
116 
117 
118     <!-- Test cases from usrchdat.c  STRENGTH -->
119 
120 
121     <test-case id="test15" strength="PRIMARY" locale="en">
122       <pattern>fox</pattern>
123       <pre>The quick brown </pre><m>fox</m><post> jumps over the lazy foxes</post>
124     </test-case>
125 
126     <test-case id="test16" strength="PRIMARY" locale="fr">
127       <pattern>peche</pattern>
128       <pre>blackbirds pat </pre><m>p\u00E9ch\u00E9</m><post> </post>
129     </test-case>
130 
131     <test-case id="test17" strength="PRIMARY" locale="fr">
132       <pattern>peche</pattern>
133       <pre>blackbirds pat </pre><m>p\u00EAche</m><post> </post>
134     </test-case>
135 
136     <test-case id="test18" strength="PRIMARY" locale="fr">
137       <pattern>peche</pattern>
138       <pre>blackbirds pat </pre><m>p\u00E9che</m><post>r </post>
139     </test-case>
140 
141     <test-case id="test19" strength="PRIMARY" locale="fr">
142       <pattern>peche</pattern>
143       <pre>blackbirds pat </pre><m>p\u00EAche</m><post>r </post>
144     </test-case>
145 
146     <test-case id="test20" strength="PRIMARY" locale="es">
147       <pattern>channel</pattern>
148       <pre>A </pre><m>channel</m><post>, </post>
149     </test-case>
150 
151     <test-case id="test21" strength="PRIMARY" locale="es">
152       <pattern>channel</pattern>
153       <pre>A </pre><m>CHANNEL</m><post>, </post>
154     </test-case>
155 
156     <test-case id="test22" strength="PRIMARY" locale="es">
157       <pattern>channel</pattern>
158       <pre>A </pre><m>Channel</m><post>s, </post>
159     </test-case>
160 
161     <test-case id="test23" strength="PRIMARY" locale="es">
162       <pattern>channel</pattern>
163       <pre>A </pre><m>channel</m><post>... </post>
164     </test-case>
165 
166     <test-case id="test24" strength="TERTIARY" locale="en">
167       <pattern>A\u0300</pattern>
168       <pre>A miss, and then </pre><m>\u00c0</m><post> should match but not A"</post>
169     </test-case>
170 
171     <!-- TODO:  In the original test data, this test matched at IDENTICAL strength.
172                 Doesn't seem right.  The characters are different.
173                 -->
174     <test-case id="test24a" strength="IDENTICAL" locale="en">
175       <pattern>A\u0300</pattern>
176       <pre>At IDENTICAL, shoud this match?  </pre><m>\u00c0</m><post></post>
177     </test-case>
178 
179   <test-case id="test24b" strength="IDENTICAL" alternate_handling="SHIFTED" locale="en">
180     <pattern>A\u0300</pattern>
181     <pre>At IDENTICAL, shoud this match?  </pre>
182     <m>\u00c0</m>
183     <post></post>
184   </test-case>
185 
186   <test-case id="test25" strength="SECONDARY" locale="en">
187       <pattern>Ű</pattern>
188       <pre>12</pre><m>ű</m><post> Ű</post>
189     </test-case>
190 
191     <test-case id="test26" strength="SECONDARY" locale="en">
192       <pattern>A</pattern>
193       <pre>12</pre><m>a</m><post>...</post>
194     </test-case>
195 
196 
197     <!--  Test Cases from usrchdat.c,  VARIABLE -->
198     <test-case id="test27" strength="TERTIARY" locale="en">
199       <pattern>blackbird</pattern>
200       <pre>black-bird </pre><m>blackbird</m><post>...</post>
201     </test-case>
202 
203     <test-case id="test28" strength="TERTIARY" locale="en">
204       <pattern>go</pattern>
205       <pre> on</pre>
206     </test-case>
207 
208     <!-- TODO:  this gives an U_ILLEGAL_ARGUMENT error when opening
209                 the UStringSearch.  How did the orignal test run? -->
210     <!--
211     <test-case id="test29" strength="PRIMARY" locale="en">
212       <pattern>  </pattern>
213       <pre></pre><m></m><post>abc</post>
214     </test-case>
215     -->
216 
217     <test-case id="test30" strength="SECONDARY" locale="en">
218       <pattern>abc</pattern>
219       <pre>  a bc   ab c    a  bc     ab  c"</pre>
220     </test-case>
221 
222     <test-case id="test31" strength="SECONDARY" locale="en">
223       <pattern>abc</pattern>
224       <pre>           ---------------</pre>
225     </test-case>
226 
227 
228     <!--  Normalization test cases from usrchdat.c  -->
229     <test-case id="test32" strength="TERTIARY"  norm="ON">
230       <pattern>a\u0325\u0300</pattern>
231       <pre></pre><m>a\u0300\u0325</m>
232     </test-case>
233 
234 
235     <test-case id="test32a" strength="TERTIARY"  norm="OFF">
236       <pattern>a\u0325\u0300</pattern>
237       <pre>a\u0300\u0325</pre>
238     </test-case>
239 
240 
241     <!-- COMPOSITEBOUNDARIES from usrchdat.c
242          Boundaries are not identical to orignal test data because
243          of matching only full combining sequences
244     -->
245     <test-case id="test40" strength="TERTIARY">
246       <pattern>A</pattern>
247       <pre>À</pre>   <!-- \u00C0 -->
248     </test-case>
249 
250     <test-case id="test41" strength="TERTIARY">
251       <pattern>A</pattern>
252       <pre>À</pre><m>A</m><post>C</post>
253     </test-case>
254 
255     <test-case id="test42" strength="TERTIARY">
256       <pattern>A\u030A</pattern>
257       <pre>À\u01FA</pre>
258     </test-case>
259 
260 
261 
262     <!-- SUPPLEMENTARYCANONICAL from usrchdat.c  -->
263     <test-case id="test50" strength="TERTIARY">
264       <pattern>\uD800\uDC00</pattern>
265       <pre>abc \uD802\uDC00 \uD800\uDC01 \uD801\uDC00 </pre><m>\uD800\uDC00</m>
266       <post>abc abc\uD800\uDC00 \uD800\uD800\uDC00 \uD800\uDC00\uDC00</post>
267     </test-case>
268 
269     <test-case id="test51" strength="TERTIARY">
270       <pattern>\\uD834\\uDDB9</pattern>
271       <pre>and</pre><m>\\uD834\\uDDB9</m><post>this sentence</post>
272     </test-case>
273 
274     <test-case id="test52" strength="TERTIARY">
275       <pattern> \\uD834\\uDDB9 </pattern>
276       <pre>and</pre><m> \\uD834\\uDDB9 </m><post>this sentence</post>
277     </test-case>
278 
279     <test-case id="test53" strength="TERTIARY">
280       <pattern>-\\uD834\\uDDB9-</pattern>
281       <pre>and</pre><m>-\\uD834\\uDDB9-</m><post>this sentence</post>
282     </test-case>
283 
284     <test-case id="test54" strength="TERTIARY">
285       <pattern>,\\uD834\\uDDB9,</pattern>
286       <pre>and</pre><m>,\\uD834\\uDDB9,</m><post>this sentence</post>
287     </test-case>
288 
289     <test-case id="test55" strength="TERTIARY">
290       <pattern>?\\uD834\\uDDB9?</pattern>
291       <pre>and</pre><m>?\\uD834\\uDDB9?</m><post>this sentence</post>
292     </test-case>
293 
294 
295     <!-- Long combining sequences  -->
296     <!-- Backwards search fails because patterns ends w/ ignorables
297     <test-case id="test60" strength="PRIMARY">
298       <pattern>A\u0301\u0301\u0301\u0301</pattern>
299       <m>A\u0301\u0301\u0301\u0301\u0301</m>
300     </test-case>
301     -->
302 
303     <test-case id="test61" strength="TERTIARY">
304       <pattern>A\u0301\u0301\u0301\u0301</pattern>
305           <pre>A\u0301\u0301\u0301\u0301\u0301</pre>
306     </test-case>
307 
308     <test-case id="test62" strength="TERTIARY">
309       <pattern>A\u0301\u0301\u0301\u0301</pattern>
310             <m>A\u0301\u0301\u0301\u0301</m>
311     </test-case>
312 
313     <!-- stand-alone combining marks don't match attached marks  -->
314     <test-case id="test63" strength="TERTIARY">
315       <pattern>\u0301</pattern>
316       <pre>A\u0301\u0301\u0301\u0301</pre>
317     </test-case>
318 
319     <test-case id="test64" strength="TERTIARY">
320       <pattern>\u0301</pattern>
321       <post>\u0301\u0301\u0301\u0301</post>
322     </test-case>
323 
324   <!-- stand-alone combining mark does match an un-attached combining mark -->
325     <test-case id="test65" strength="TERTIARY">
326        <pattern>\u0301</pattern>
327        <m>\u0301</m><post>A\u0301\u0301</post>
328     </test-case>
329 
330     <test-case id="test66" strength="TERTIARY">
331        <pattern>\u0301</pattern>
332        <m>\u0301</m>
333     </test-case>
334 
335     <!-- stand-alone combining marks at end of the target text -->
336     <test-case id="test67" strength="TERTIARY">
337        <pattern>\u0301</pattern>
338        <pre>abcd\r</pre><m>\u0301</m>
339     </test-case>
340 
341       <!-- attached combining marks at end of the target text, no match -->
342     <test-case id="test68" strength="TERTIARY">
343        <pattern>\u0301</pattern>
344        <pre>abcd\u0301</pre>
345     </test-case>
346 
347 
348 
349    <!-- no match within expansions at the start -->
350     <test-case id="test70" strength="PRIMARY">
351       <pattern>Eligature</pattern>
352       <pre>Æligature</pre>
353     </test-case>
354 
355     <test-case id="test71" strength="PRIMARY">
356       <pattern>AEligature</pattern>
357       <m>Æligature</m>
358     </test-case>
359 
360     <test-case id="test72" strength="PRIMARY">
361         <pattern>AEligature</pattern>
362         <m>Æligature</m>
363     </test-case>
364 
365     <!-- unattached combining Tilde will not match a Tilde that is
366          part of a composed Ñ  (\u00D1)  -->
367     <test-case id="test73" strength="SECONDARY">
368         <pattern>\u0303</pattern>  <!-- combining tilde -->
369         <pre>Ñ&#x0d;</pre><m>\u0303</m>
370     </test-case>
371 
372     <test-case id="test74" strength="SECONDARY">
373         <pattern>\u0303</pattern>  <!-- combining tilde -->
374         <pre>Ñ &#x0d;</pre><m>\u0303</m><post>a</post>
375     </test-case>
376 
377   <test-case id="test75" strength="TERTIARY" locale="fr">
378     <pattern>\u00EA</pattern>
379     <pre>p</pre><m>\u00EA</m><post>che</post>
380   </test-case>
381 
382   <test-case id="test76" strength="TERTIARY" locale="fr">
383     <pattern>\u00EA</pattern>
384     <pre>p</pre><m>e\u0302</m><post>che</post>
385   </test-case>
386 
387   <test-case id="test77" strength="TERTIARY" locale="fr">
388     <pattern>e\u0302</pattern>
389     <pre>p</pre><m>\u00EA</m><post>che</post>
390   </test-case>
391 
392   <!-- Test cases from ticket:5382 -->
393   <test-case id="test78" strength="SECONDARY" locale="hu_HU">
394     <pattern>\u0170</pattern>
395     <m>\u0171</m>
396     <post>12</post>
397   </test-case>
398 
399   <test-case id="test79" strength="SECONDARY" locale="hu_HU">
400     <pattern>\u0170</pattern>
401     <pre>1</pre>
402     <m>\u0171</m>
403     <post>2</post>
404   </test-case>
405 
406   <test-case id="test80" strength="SECONDARY" locale="hu_HU">
407     <pattern>\u0170</pattern>
408     <pre>12</pre>
409     <m>\u0171</m>
410   </test-case>
411 
412   <!-- Test cases from ticket:5959 -->
413   <test-case id="test81" strength="SECONDARY">
414     <pattern>\u2166</pattern>
415     <m>VII</m>
416   </test-case>
417 
418   <test-case id="test82" strength="SECONDARY">
419     <pattern>VII</pattern>
420     <m>\u2166</m>
421   </test-case>
422 
423   <test-case id="test83" strength="IDENTICAL" alternate_handling="SHIFTED" locale="en">
424     <pattern>Universal Declaration of Human Rights</pattern>
425     <pre>Proclaims this </pre><m>Universal Declaration of Human Rights</m><post> as a common standard of achievement for all peoples and all nations</post>
426   </test-case>
427 
428   <test-case id="test83b" strength="TERTIARY" alternate_handling="SHIFTED" locale="en">
429     <pattern>Universal Declaration of Human Rights</pattern>
430     <pre>Proclaims this </pre>
431     <m>Universal-Declaration-of-Human-Rights</m>
432     <post> as a common standard of achievement for all peoples and all nations</post>
433   </test-case>
434 
435   <test-case id="test84" strength="TERTIARY" locale="en">
436     <pattern>\u05E9\u0591\u05E9</pattern>
437     <m>\u05E9\u0592\u05E9</m>
438   </test-case>
439 
440   <test-case id="test84b" strength="IDENTICAL" locale="en">
441     <pattern>\u05E9\u0591\u05E9</pattern>
442     <pre>\u05E9\u0592\u05E9</pre>
443   </test-case>
444 </stringsearch-tests>
445 
446