1 // Copyright 2015 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <memory>
6
7 #include "core/fxcrt/fx_basic.h"
8 #include "public/fpdf_text.h"
9 #include "public/fpdfview.h"
10 #include "testing/embedder_test.h"
11 #include "testing/gtest/include/gtest/gtest.h"
12 #include "testing/test_support.h"
13
14 namespace {
15
check_unsigned_shorts(const char * expected,const unsigned short * actual,size_t length)16 bool check_unsigned_shorts(const char* expected,
17 const unsigned short* actual,
18 size_t length) {
19 if (length > strlen(expected) + 1) {
20 return false;
21 }
22 for (size_t i = 0; i < length; ++i) {
23 if (actual[i] != static_cast<unsigned short>(expected[i])) {
24 return false;
25 }
26 }
27 return true;
28 }
29
30 } // namespace
31
32 class FPDFTextEmbeddertest : public EmbedderTest {};
33
TEST_F(FPDFTextEmbeddertest,Text)34 TEST_F(FPDFTextEmbeddertest, Text) {
35 EXPECT_TRUE(OpenDocument("hello_world.pdf"));
36 FPDF_PAGE page = LoadPage(0);
37 EXPECT_TRUE(page);
38
39 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
40 EXPECT_TRUE(textpage);
41
42 static const char expected[] = "Hello, world!\r\nGoodbye, world!";
43 unsigned short fixed_buffer[128];
44 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
45
46 // Check includes the terminating NUL that is provided.
47 int num_chars = FPDFText_GetText(textpage, 0, 128, fixed_buffer);
48 ASSERT_GE(num_chars, 0);
49 EXPECT_EQ(sizeof(expected), static_cast<size_t>(num_chars));
50 EXPECT_TRUE(check_unsigned_shorts(expected, fixed_buffer, sizeof(expected)));
51
52 // Count does not include the terminating NUL in the string literal.
53 EXPECT_EQ(sizeof(expected) - 1,
54 static_cast<size_t>(FPDFText_CountChars(textpage)));
55 for (size_t i = 0; i < sizeof(expected) - 1; ++i) {
56 EXPECT_EQ(static_cast<unsigned int>(expected[i]),
57 FPDFText_GetUnicode(textpage, i))
58 << " at " << i;
59 }
60
61 EXPECT_EQ(12.0, FPDFText_GetFontSize(textpage, 0));
62 EXPECT_EQ(16.0, FPDFText_GetFontSize(textpage, 15));
63
64 double left = 0.0;
65 double right = 0.0;
66 double bottom = 0.0;
67 double top = 0.0;
68 FPDFText_GetCharBox(textpage, 4, &left, &right, &bottom, &top);
69 EXPECT_NEAR(41.071, left, 0.001);
70 EXPECT_NEAR(46.243, right, 0.001);
71 EXPECT_NEAR(49.844, bottom, 0.001);
72 EXPECT_NEAR(55.520, top, 0.001);
73
74 EXPECT_EQ(4, FPDFText_GetCharIndexAtPos(textpage, 42.0, 50.0, 1.0, 1.0));
75 EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(textpage, 0.0, 0.0, 1.0, 1.0));
76 EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(textpage, 199.0, 199.0, 1.0, 1.0));
77
78 // Test out of range indicies.
79 EXPECT_EQ(-1,
80 FPDFText_GetCharIndexAtPos(textpage, 42.0, 10000000.0, 1.0, 1.0));
81 EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(textpage, -1.0, 50.0, 1.0, 1.0));
82
83 // Count does not include the terminating NUL in the string literal.
84 EXPECT_EQ(2, FPDFText_CountRects(textpage, 0, sizeof(expected) - 1));
85
86 left = 0.0;
87 right = 0.0;
88 bottom = 0.0;
89 top = 0.0;
90 FPDFText_GetRect(textpage, 1, &left, &top, &right, &bottom);
91 EXPECT_NEAR(20.847, left, 0.001);
92 EXPECT_NEAR(135.167, right, 0.001);
93 EXPECT_NEAR(96.655, bottom, 0.001);
94 EXPECT_NEAR(116.000, top, 0.001);
95
96 // Test out of range indicies set outputs to (0.0, 0.0, 0.0, 0.0).
97 left = -1.0;
98 right = -1.0;
99 bottom = -1.0;
100 top = -1.0;
101 FPDFText_GetRect(textpage, -1, &left, &top, &right, &bottom);
102 EXPECT_EQ(0.0, left);
103 EXPECT_EQ(0.0, right);
104 EXPECT_EQ(0.0, bottom);
105 EXPECT_EQ(0.0, top);
106
107 left = -2.0;
108 right = -2.0;
109 bottom = -2.0;
110 top = -2.0;
111 FPDFText_GetRect(textpage, 2, &left, &top, &right, &bottom);
112 EXPECT_EQ(0.0, left);
113 EXPECT_EQ(0.0, right);
114 EXPECT_EQ(0.0, bottom);
115 EXPECT_EQ(0.0, top);
116
117 EXPECT_EQ(9, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0, 0, 0));
118
119 // Extract starting at character 4 as above.
120 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
121 EXPECT_EQ(1, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0,
122 fixed_buffer, 1));
123 EXPECT_TRUE(check_unsigned_shorts(expected + 4, fixed_buffer, 1));
124 EXPECT_EQ(0xbdbd, fixed_buffer[1]);
125
126 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
127 EXPECT_EQ(9, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0,
128 fixed_buffer, 9));
129 EXPECT_TRUE(check_unsigned_shorts(expected + 4, fixed_buffer, 9));
130 EXPECT_EQ(0xbdbd, fixed_buffer[9]);
131
132 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
133 EXPECT_EQ(10, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0,
134 fixed_buffer, 128));
135 EXPECT_TRUE(check_unsigned_shorts(expected + 4, fixed_buffer, 9));
136 EXPECT_EQ(0u, fixed_buffer[9]);
137 EXPECT_EQ(0xbdbd, fixed_buffer[10]);
138
139 FPDFText_ClosePage(textpage);
140 UnloadPage(page);
141 }
142
TEST_F(FPDFTextEmbeddertest,TextSearch)143 TEST_F(FPDFTextEmbeddertest, TextSearch) {
144 EXPECT_TRUE(OpenDocument("hello_world.pdf"));
145 FPDF_PAGE page = LoadPage(0);
146 EXPECT_TRUE(page);
147
148 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
149 EXPECT_TRUE(textpage);
150
151 std::unique_ptr<unsigned short, pdfium::FreeDeleter> nope =
152 GetFPDFWideString(L"nope");
153 std::unique_ptr<unsigned short, pdfium::FreeDeleter> world =
154 GetFPDFWideString(L"world");
155 std::unique_ptr<unsigned short, pdfium::FreeDeleter> world_caps =
156 GetFPDFWideString(L"WORLD");
157 std::unique_ptr<unsigned short, pdfium::FreeDeleter> world_substr =
158 GetFPDFWideString(L"orld");
159
160 // No occurences of "nope" in test page.
161 FPDF_SCHHANDLE search = FPDFText_FindStart(textpage, nope.get(), 0, 0);
162 EXPECT_TRUE(search);
163 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search));
164 EXPECT_EQ(0, FPDFText_GetSchCount(search));
165
166 // Advancing finds nothing.
167 EXPECT_FALSE(FPDFText_FindNext(search));
168 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search));
169 EXPECT_EQ(0, FPDFText_GetSchCount(search));
170
171 // Retreating finds nothing.
172 EXPECT_FALSE(FPDFText_FindPrev(search));
173 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search));
174 EXPECT_EQ(0, FPDFText_GetSchCount(search));
175 FPDFText_FindClose(search);
176
177 // Two occurences of "world" in test page.
178 search = FPDFText_FindStart(textpage, world.get(), 0, 2);
179 EXPECT_TRUE(search);
180
181 // Remains not found until advanced.
182 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search));
183 EXPECT_EQ(0, FPDFText_GetSchCount(search));
184
185 // First occurence of "world" in this test page.
186 EXPECT_TRUE(FPDFText_FindNext(search));
187 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search));
188 EXPECT_EQ(5, FPDFText_GetSchCount(search));
189
190 // Last occurence of "world" in this test page.
191 EXPECT_TRUE(FPDFText_FindNext(search));
192 EXPECT_EQ(24, FPDFText_GetSchResultIndex(search));
193 EXPECT_EQ(5, FPDFText_GetSchCount(search));
194
195 // Found position unchanged when fails to advance.
196 EXPECT_FALSE(FPDFText_FindNext(search));
197 EXPECT_EQ(24, FPDFText_GetSchResultIndex(search));
198 EXPECT_EQ(5, FPDFText_GetSchCount(search));
199
200 // Back to first occurence.
201 EXPECT_TRUE(FPDFText_FindPrev(search));
202 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search));
203 EXPECT_EQ(5, FPDFText_GetSchCount(search));
204
205 // Found position unchanged when fails to retreat.
206 EXPECT_FALSE(FPDFText_FindPrev(search));
207 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search));
208 EXPECT_EQ(5, FPDFText_GetSchCount(search));
209 FPDFText_FindClose(search);
210
211 // Exact search unaffected by case sensitiity and whole word flags.
212 search = FPDFText_FindStart(textpage, world.get(),
213 FPDF_MATCHCASE | FPDF_MATCHWHOLEWORD, 0);
214 EXPECT_TRUE(search);
215 EXPECT_TRUE(FPDFText_FindNext(search));
216 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search));
217 EXPECT_EQ(5, FPDFText_GetSchCount(search));
218 FPDFText_FindClose(search);
219
220 // Default is case-insensitive, so matching agaist caps works.
221 search = FPDFText_FindStart(textpage, world_caps.get(), 0, 0);
222 EXPECT_TRUE(search);
223 EXPECT_TRUE(FPDFText_FindNext(search));
224 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search));
225 EXPECT_EQ(5, FPDFText_GetSchCount(search));
226 FPDFText_FindClose(search);
227
228 // But can be made case sensitive, in which case this fails.
229 search = FPDFText_FindStart(textpage, world_caps.get(), FPDF_MATCHCASE, 0);
230 EXPECT_FALSE(FPDFText_FindNext(search));
231 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search));
232 EXPECT_EQ(0, FPDFText_GetSchCount(search));
233 FPDFText_FindClose(search);
234
235 // Default is match anywhere within word, so matching substirng works.
236 search = FPDFText_FindStart(textpage, world_substr.get(), 0, 0);
237 EXPECT_TRUE(FPDFText_FindNext(search));
238 EXPECT_EQ(8, FPDFText_GetSchResultIndex(search));
239 EXPECT_EQ(4, FPDFText_GetSchCount(search));
240 FPDFText_FindClose(search);
241
242 // But can be made to mach word boundaries, in which case this fails.
243 search =
244 FPDFText_FindStart(textpage, world_substr.get(), FPDF_MATCHWHOLEWORD, 0);
245 EXPECT_FALSE(FPDFText_FindNext(search));
246 // TODO(tsepez): investigate strange index/count values in this state.
247 FPDFText_FindClose(search);
248
249 FPDFText_ClosePage(textpage);
250 UnloadPage(page);
251 }
252
253 // Test that the page has characters despite a bad stream length.
TEST_F(FPDFTextEmbeddertest,StreamLengthPastEndOfFile)254 TEST_F(FPDFTextEmbeddertest, StreamLengthPastEndOfFile) {
255 EXPECT_TRUE(OpenDocument("bug_57.pdf"));
256 FPDF_PAGE page = LoadPage(0);
257 EXPECT_TRUE(page);
258
259 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
260 EXPECT_TRUE(textpage);
261 EXPECT_EQ(13, FPDFText_CountChars(textpage));
262
263 FPDFText_ClosePage(textpage);
264 UnloadPage(page);
265 }
266
TEST_F(FPDFTextEmbeddertest,WebLinks)267 TEST_F(FPDFTextEmbeddertest, WebLinks) {
268 EXPECT_TRUE(OpenDocument("weblinks.pdf"));
269 FPDF_PAGE page = LoadPage(0);
270 EXPECT_TRUE(page);
271
272 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
273 EXPECT_TRUE(textpage);
274
275 FPDF_PAGELINK pagelink = FPDFLink_LoadWebLinks(textpage);
276 EXPECT_TRUE(pagelink);
277
278 // Page contains two HTTP-style URLs.
279 EXPECT_EQ(2, FPDFLink_CountWebLinks(pagelink));
280
281 // Only a terminating NUL required for bogus links.
282 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 2, nullptr, 0));
283 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 1400, nullptr, 0));
284 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, -1, nullptr, 0));
285
286 // Query the number of characters required for each link (incl NUL).
287 EXPECT_EQ(25, FPDFLink_GetURL(pagelink, 0, nullptr, 0));
288 EXPECT_EQ(26, FPDFLink_GetURL(pagelink, 1, nullptr, 0));
289
290 static const char expected_url[] = "http://example.com?q=foo";
291 static const size_t expected_len = sizeof(expected_url);
292 unsigned short fixed_buffer[128];
293
294 // Retrieve a link with too small a buffer. Buffer will not be
295 // NUL-terminated, but must not be modified past indicated length,
296 // so pre-fill with a pattern to check write bounds.
297 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
298 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 0, fixed_buffer, 1));
299 EXPECT_TRUE(check_unsigned_shorts(expected_url, fixed_buffer, 1));
300 EXPECT_EQ(0xbdbd, fixed_buffer[1]);
301
302 // Check buffer that doesn't have space for a terminating NUL.
303 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
304 EXPECT_EQ(static_cast<int>(expected_len - 1),
305 FPDFLink_GetURL(pagelink, 0, fixed_buffer, expected_len - 1));
306 EXPECT_TRUE(
307 check_unsigned_shorts(expected_url, fixed_buffer, expected_len - 1));
308 EXPECT_EQ(0xbdbd, fixed_buffer[expected_len - 1]);
309
310 // Retreive link with exactly-sized buffer.
311 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
312 EXPECT_EQ(static_cast<int>(expected_len),
313 FPDFLink_GetURL(pagelink, 0, fixed_buffer, expected_len));
314 EXPECT_TRUE(check_unsigned_shorts(expected_url, fixed_buffer, expected_len));
315 EXPECT_EQ(0u, fixed_buffer[expected_len - 1]);
316 EXPECT_EQ(0xbdbd, fixed_buffer[expected_len]);
317
318 // Retreive link with ample-sized-buffer.
319 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
320 EXPECT_EQ(static_cast<int>(expected_len),
321 FPDFLink_GetURL(pagelink, 0, fixed_buffer, 128));
322 EXPECT_TRUE(check_unsigned_shorts(expected_url, fixed_buffer, expected_len));
323 EXPECT_EQ(0u, fixed_buffer[expected_len - 1]);
324 EXPECT_EQ(0xbdbd, fixed_buffer[expected_len]);
325
326 // Each link rendered in a single rect in this test page.
327 EXPECT_EQ(1, FPDFLink_CountRects(pagelink, 0));
328 EXPECT_EQ(1, FPDFLink_CountRects(pagelink, 1));
329
330 // Each link rendered in a single rect in this test page.
331 EXPECT_EQ(0, FPDFLink_CountRects(pagelink, -1));
332 EXPECT_EQ(0, FPDFLink_CountRects(pagelink, 2));
333 EXPECT_EQ(0, FPDFLink_CountRects(pagelink, 10000));
334
335 // Check boundary of valid link index with valid rect index.
336 double left = 0.0;
337 double right = 0.0;
338 double top = 0.0;
339 double bottom = 0.0;
340 FPDFLink_GetRect(pagelink, 0, 0, &left, &top, &right, &bottom);
341 EXPECT_NEAR(50.791, left, 0.001);
342 EXPECT_NEAR(187.963, right, 0.001);
343 EXPECT_NEAR(97.624, bottom, 0.001);
344 EXPECT_NEAR(108.736, top, 0.001);
345
346 // Check that valid link with invalid rect index leaves parameters unchanged.
347 left = -1.0;
348 right = -1.0;
349 top = -1.0;
350 bottom = -1.0;
351 FPDFLink_GetRect(pagelink, 0, 1, &left, &top, &right, &bottom);
352 EXPECT_EQ(-1.0, left);
353 EXPECT_EQ(-1.0, right);
354 EXPECT_EQ(-1.0, bottom);
355 EXPECT_EQ(-1.0, top);
356
357 // Check that invalid link index leaves parameters unchanged.
358 left = -2.0;
359 right = -2.0;
360 top = -2.0;
361 bottom = -2.0;
362 FPDFLink_GetRect(pagelink, -1, 0, &left, &top, &right, &bottom);
363 EXPECT_EQ(-2.0, left);
364 EXPECT_EQ(-2.0, right);
365 EXPECT_EQ(-2.0, bottom);
366 EXPECT_EQ(-2.0, top);
367
368 FPDFLink_CloseWebLinks(pagelink);
369 FPDFText_ClosePage(textpage);
370 UnloadPage(page);
371 }
372
TEST_F(FPDFTextEmbeddertest,GetFontSize)373 TEST_F(FPDFTextEmbeddertest, GetFontSize) {
374 EXPECT_TRUE(OpenDocument("hello_world.pdf"));
375 FPDF_PAGE page = LoadPage(0);
376 EXPECT_TRUE(page);
377
378 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
379 EXPECT_TRUE(textpage);
380
381 const double kExpectedFontsSizes[] = {12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
382 12, 12, 12, 1, 1, 16, 16, 16, 16, 16,
383 16, 16, 16, 16, 16, 16, 16, 16, 16, 16};
384
385 int count = FPDFText_CountChars(textpage);
386 ASSERT_EQ(FX_ArraySize(kExpectedFontsSizes), static_cast<size_t>(count));
387 for (int i = 0; i < count; ++i)
388 EXPECT_EQ(kExpectedFontsSizes[i], FPDFText_GetFontSize(textpage, i)) << i;
389
390 FPDFText_ClosePage(textpage);
391 UnloadPage(page);
392 }
393
TEST_F(FPDFTextEmbeddertest,ToUnicode)394 TEST_F(FPDFTextEmbeddertest, ToUnicode) {
395 EXPECT_TRUE(OpenDocument("bug_583.pdf"));
396 FPDF_PAGE page = LoadPage(0);
397 EXPECT_TRUE(page);
398
399 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
400 EXPECT_TRUE(textpage);
401
402 ASSERT_EQ(1, FPDFText_CountChars(textpage));
403 EXPECT_EQ(static_cast<unsigned int>(0), FPDFText_GetUnicode(textpage, 0));
404
405 FPDFText_ClosePage(textpage);
406 UnloadPage(page);
407 }
408