1 /*
2  * Copyright (C) 2010 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /**
18  * This is a series of unit tests for snippet creation and highlighting
19  *
20  * You can run this entire test case with:
21  *   runtest -c com.android.emailcommon.utility.TextUtilitiesTests email
22  */
23 package com.android.emailcommon.utility;
24 
25 import android.test.AndroidTestCase;
26 import android.test.suitebuilder.annotation.SmallTest;
27 
28 import android.text.SpannableStringBuilder;
29 import android.text.style.BackgroundColorSpan;
30 
31 @SmallTest
32 public class TextUtilitiesTests extends AndroidTestCase {
33 
testPlainSnippet()34     public void testPlainSnippet() {
35         // Test the simplest cases
36         assertEquals("", TextUtilities.makeSnippetFromPlainText(null));
37         assertEquals("", TextUtilities.makeSnippetFromPlainText(""));
38 
39         // Test handling leading, trailing, and duplicated whitespace
40         // Just test common whitespace characters; we calls Character.isWhitespace() internally, so
41         // other whitespace should be fine as well
42         assertEquals("", TextUtilities.makeSnippetFromPlainText(" \n\r\t\r\t\n"));
43         char c = TextUtilities.NON_BREAKING_SPACE_CHARACTER;
44         assertEquals("foo", TextUtilities.makeSnippetFromPlainText(c + "\r\n\tfoo \n\t\r" + c));
45         assertEquals("foo bar",
46                 TextUtilities.makeSnippetFromPlainText(c + "\r\n\tfoo \r\n bar\n\t\r" + c));
47 
48         // Handle duplicated - and =
49         assertEquals("Foo-Bar=Bletch",
50                 TextUtilities.makeSnippetFromPlainText("Foo-----Bar=======Bletch"));
51 
52         // We shouldn't muck with HTML entities
53         assertEquals(" >", TextUtilities.makeSnippetFromPlainText(" >"));
54     }
55 
testHtmlSnippet()56     public void testHtmlSnippet() {
57         // Test the simplest cases
58         assertEquals("", TextUtilities.makeSnippetFromHtmlText(null));
59         assertEquals("", TextUtilities.makeSnippetFromHtmlText(""));
60 
61         // Test handling leading, trailing, and duplicated whitespace
62         // Just test common whitespace characters; we calls Character.isWhitespace() internally, so
63         // other whitespace should be fine as well
64         assertEquals("", TextUtilities.makeSnippetFromHtmlText(" \n\r\t\r\t\n"));
65         char c = TextUtilities.NON_BREAKING_SPACE_CHARACTER;
66         assertEquals("foo", TextUtilities.makeSnippetFromHtmlText(c + "\r\n\tfoo \n\t\r" + c));
67         assertEquals("foo bar",
68                 TextUtilities.makeSnippetFromHtmlText(c + "\r\n\tfoo \r\n bar\n\t\r" + c));
69 
70         // Handle duplicated - and =
71         assertEquals("Foo-Bar=Bletch",
72                 TextUtilities.makeSnippetFromPlainText("Foo-----Bar=======Bletch"));
73 
74         // We should catch HTML entities in these tests
75         assertEquals(">", TextUtilities.makeSnippetFromHtmlText(" >"));
76         assertEquals("&<> \"", TextUtilities.makeSnippetFromHtmlText("&amp;&lt;&gt;&nbsp;&quot;"));
77         // Test for decimal and hex entities
78         assertEquals("ABC", TextUtilities.makeSnippetFromHtmlText("&#65;&#66;&#67;"));
79         assertEquals("ABC", TextUtilities.makeSnippetFromHtmlText("&#x41;&#x42;&#x43;"));
80 
81         // Test for stripping simple tags
82         assertEquals("Hi there", TextUtilities.makeSnippetFromHtmlText("<html>Hi there</html>"));
83         // TODO: Add tests here if/when we find problematic HTML
84     }
85 
testStripHtmlEntityEdgeCases()86     public void testStripHtmlEntityEdgeCases() {
87         int[] skipCount = new int[1];
88         // Bare & isn't an entity
89         char c = TextUtilities.stripHtmlEntity("&", 0, skipCount);
90         assertEquals(c, '&');
91         assertEquals(0, skipCount[0]);
92         // Also not legal
93         c = TextUtilities.stripHtmlEntity("&;", 0, skipCount);
94         assertEquals(c, '&');
95         assertEquals(0, skipCount[0]);
96         // This is an entity, but shouldn't be found
97         c = TextUtilities.stripHtmlEntity("&nosuch;", 0, skipCount);
98         assertEquals(c, '&');
99         assertEquals(0, skipCount[0]);
100         // This is too long for an entity, even though it starts like a valid one
101         c = TextUtilities.stripHtmlEntity("&nbspandmore;", 0, skipCount);
102         assertEquals(c, '&');
103         assertEquals(0, skipCount[0]);
104         // Illegal decimal entities
105         c = TextUtilities.stripHtmlEntity("&#ABC", 0, skipCount);
106         assertEquals(c, '&');
107         assertEquals(0, skipCount[0]);
108         c = TextUtilities.stripHtmlEntity("&#12B", 0, skipCount);
109         assertEquals(c, '&');
110         assertEquals(0, skipCount[0]);
111         // Illegal hex entities
112         c = TextUtilities.stripHtmlEntity("&#xABC", 0, skipCount);
113         assertEquals(c, '&');
114         assertEquals(0, skipCount[0]);
115         // Illegal hex entities
116         c = TextUtilities.stripHtmlEntity("&#x19G", 0, skipCount);
117         assertEquals(c, '&');
118         assertEquals(0, skipCount[0]);
119     }
120 
testStripContent()121     public void testStripContent() {
122         assertEquals("Visible", TextUtilities.makeSnippetFromHtmlText(
123             "<html><style foo=\"bar\">Not</style>Visible</html>"));
124         assertEquals("Visible", TextUtilities.makeSnippetFromHtmlText(
125             "<html><STYLE foo=\"bar\">Not</STYLE>Visible</html>"));
126         assertEquals("IsVisible", TextUtilities.makeSnippetFromHtmlText(
127             "<html><nostrip foo=\"bar\">Is</nostrip>Visible</html>"));
128         assertEquals("Visible", TextUtilities.makeSnippetFromHtmlText(
129             "<html>Visible<style foo=\"bar\">Not"));
130         assertEquals("VisibleAgainVisible", TextUtilities.makeSnippetFromHtmlText(
131             "<html>Visible<style foo=\"bar\">Not</style>AgainVisible"));
132         assertEquals("VisibleAgainVisible", TextUtilities.makeSnippetFromHtmlText(
133             "<html>Visible<style foo=\"bar\"/>AgainVisible"));
134         assertEquals("VisibleAgainVisible", TextUtilities.makeSnippetFromHtmlText(
135             "<html>Visible<style foo=\"bar\"/><head><//blah<style>Not</head>AgainVisible"));
136     }
137 
138     /**
139      * We pass in HTML text in which an ampersand (@) is two chars ahead of the correct end position
140      * for the tag named 'tag' and then check whether the calculated end position matches the known
141      * correct position.  HTML text not containing an ampersand should generate a calculated end of
142      * -1
143      * @param text the HTML text to test
144      */
findTagEnd(String text, String tag)145     private void findTagEnd(String text, String tag) {
146         int calculatedEnd = TextUtilities.findTagEnd(text , tag, 0);
147         int knownEnd = text.indexOf('@') + 2;
148         if (knownEnd == 1) {
149             // indexOf will return -1, so we'll get 1 as knownEnd
150             assertEquals(-1, calculatedEnd);
151         } else {
152             assertEquals(calculatedEnd, knownEnd);
153         }
154     }
155 
testFindTagEnd()156     public void testFindTagEnd() {
157         // Test with <tag ... />
158         findTagEnd("<tag foo=\"bar\"@ /> <blah blah>", "tag");
159         // Test with <tag ...> ... </tag>
160         findTagEnd("<tag foo=\"bar\">some text@</tag>some more text", "tag");
161         // Test with incomplete tag
162         findTagEnd("<tag foo=\"bar\">some more text but no end tag", "tag");
163         // Test with space at end of tag
164         findTagEnd("<tag foo=\"bar\">some more text but no end tag", "tag ");
165     }
166 
assertHighlightUnchanged(String str)167     private void assertHighlightUnchanged(String str) {
168         assertEquals(str, TextUtilities.highlightTermsInHtml(str, null));
169     }
170 
testHighlightNoTerm()171     public void testHighlightNoTerm() {
172         // With no search terms, the html should be unchanged
173         assertHighlightUnchanged("<html><style foo=\"bar\">Not</style>Visible</html>");
174         assertHighlightUnchanged("<html><nostrip foo=\"bar\">Is</nostrip>Visible</html>");
175         assertHighlightUnchanged("<html>Visible<style foo=\"bar\">Not");
176         assertHighlightUnchanged("<html>Visible<style foo=\"bar\">Not</style>AgainVisible");
177         assertHighlightUnchanged("<html>Visible<style foo=\"bar\"/>AgainVisible");
178         assertHighlightUnchanged(
179                 "<html>Visible<style foo=\"bar\"/><head><//blah<style>Not</head>AgainVisible");
180     }
181 
testHighlightSingleTermHtml()182     public void testHighlightSingleTermHtml() {
183         String str = "<html><style foo=\"bar\">Not</style>Visible</html>";
184         // Test that tags aren't highlighted
185         assertEquals(str, TextUtilities.highlightTermsInHtml(
186                 "<html><style foo=\"bar\">Not</style>Visible</html>", "style"));
187         // Test that non-tags are
188         assertEquals("<html><style foo=\"bar\">Not</style><span " +
189                 "style=\"background-color: " + TextUtilities.HIGHLIGHT_COLOR_STRING +
190                 "\">Visi</span>ble</html>",
191                 TextUtilities.highlightTermsInHtml(str, "Visi"));
192         assertEquals("<html>Visible<style foo=\"bar\">Not</style>A<span" +
193                 " style=\"background-color: " + TextUtilities.HIGHLIGHT_COLOR_STRING +
194                 "\">gain</span>Visible",
195                 TextUtilities.highlightTermsInHtml(
196                         "<html>Visible<style foo=\"bar\">Not</style>AgainVisible", "gain"));
197     }
198 
brokentestHighlightSingleTermText()199     public void brokentestHighlightSingleTermText() {
200         // Sprinkle text with a few HTML characters to make sure they're ignored
201         String text = "This< should be visibl>e";
202         // We should find this, because search terms are case insensitive
203         SpannableStringBuilder ssb =
204             (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "Visi");
205         BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class);
206         assertEquals(1, spans.length);
207         BackgroundColorSpan span = spans[0];
208         assertEquals(text.indexOf("visi"), ssb.getSpanStart(span));
209         assertEquals(text.indexOf("bl>e"), ssb.getSpanEnd(span));
210         // Heh; this next test fails.. we use the search term!
211         assertEquals(text, ssb.toString());
212 
213         // Multiple instances of the term
214         text = "The research word should be a search result";
215         ssb = (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "Search");
216         spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class);
217         assertEquals(2, spans.length);
218         span = spans[0];
219         assertEquals(text.indexOf("search word"), ssb.getSpanStart(span));
220         assertEquals(text.indexOf(" word"), ssb.getSpanEnd(span));
221         span = spans[1];
222         assertEquals(text.indexOf("search result"), ssb.getSpanStart(span));
223         assertEquals(text.indexOf(" result"), ssb.getSpanEnd(span));
224         assertEquals(text, ssb.toString());
225     }
226 
brokentestHighlightTwoTermText()227     public void brokentestHighlightTwoTermText() {
228         String text = "This should be visible";
229         // We should find this, because search terms are case insensitive
230         SpannableStringBuilder ssb =
231             (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "visi should");
232         BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class);
233         assertEquals(2, spans.length);
234         BackgroundColorSpan span = spans[0];
235         assertEquals(text.indexOf("should"), ssb.getSpanStart(span));
236         assertEquals(text.indexOf(" be"), ssb.getSpanEnd(span));
237         span = spans[1];
238         assertEquals(text.indexOf("visi"), ssb.getSpanStart(span));
239         assertEquals(text.indexOf("ble"), ssb.getSpanEnd(span));
240         assertEquals(text, ssb.toString());
241     }
242 
brokentestHighlightDuplicateTermText()243     public void brokentestHighlightDuplicateTermText() {
244         String text = "This should be visible";
245         // We should find this, because search terms are case insensitive
246         SpannableStringBuilder ssb =
247             (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "should should");
248         BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class);
249         assertEquals(1, spans.length);
250         BackgroundColorSpan span = spans[0];
251         assertEquals(text.indexOf("should"), ssb.getSpanStart(span));
252         assertEquals(text.indexOf(" be"), ssb.getSpanEnd(span));
253     }
254 
brokentestHighlightOverlapTermText()255     public void brokentestHighlightOverlapTermText() {
256         String text = "This shoulder is visible";
257         // We should find this, because search terms are case insensitive
258         SpannableStringBuilder ssb =
259             (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "should ould");
260         BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class);
261         assertEquals(1, spans.length);
262         BackgroundColorSpan span = spans[0];
263         assertEquals(text.indexOf("should"), ssb.getSpanStart(span));
264         assertEquals(text.indexOf("er is"), ssb.getSpanEnd(span));
265     }
266 
267 
brokentestHighlightOverlapTermText2()268     public void brokentestHighlightOverlapTermText2() {
269         String text = "The shoulders are visible";
270         // We should find this, because search terms are case insensitive
271         SpannableStringBuilder ssb =
272             (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "shoulder shoulders");
273         BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class);
274         assertEquals(2, spans.length);
275         BackgroundColorSpan span = spans[0];
276         assertEquals(text.indexOf("shoulder"), ssb.getSpanStart(span));
277         assertEquals(text.indexOf("s are visible"), ssb.getSpanEnd(span));
278         span = spans[1];
279         // Just the 's' should be caught in the 2nd span
280         assertEquals(text.indexOf("s are visible"), ssb.getSpanStart(span));
281         assertEquals(text.indexOf(" are visible"), ssb.getSpanEnd(span));
282         assertEquals(text, ssb.toString());
283     }
284     // For debugging large HTML samples
285 
286 //    private String readLargeSnippet(String fn) {
287 //        File file = mContext.getFileStreamPath(fn);
288 //        StringBuffer sb = new StringBuffer();
289 //        BufferedReader reader = null;
290 //        try {
291 //            String text;
292 //            reader = new BufferedReader(new FileReader(file));
293 //            while ((text = reader.readLine()) != null) {
294 //                sb.append(text);
295 //                sb.append(" ");
296 //            }
297 //        } catch (IOException e) {
298 //        }
299 //        return sb.toString();
300 //    }
301  }
302