1 // Copyright 2015 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <algorithm>
6 #include <memory>
7 #include <utility>
8 #include <vector>
9
10 #include "build/build_config.h"
11 #include "core/fxcrt/fx_memory.h"
12 #include "core/fxge/fx_font.h"
13 #include "public/cpp/fpdf_scopers.h"
14 #include "public/fpdf_text.h"
15 #include "public/fpdf_transformpage.h"
16 #include "public/fpdfview.h"
17 #include "testing/embedder_test.h"
18 #include "testing/fx_string_testhelpers.h"
19 #include "testing/gtest/include/gtest/gtest.h"
20
21 namespace {
22
23 constexpr char kHelloGoodbyeText[] = "Hello, world!\r\nGoodbye, world!";
24 constexpr int kHelloGoodbyeTextSize = FX_ArraySize(kHelloGoodbyeText);
25
check_unsigned_shorts(const char * expected,const unsigned short * actual,size_t length)26 bool check_unsigned_shorts(const char* expected,
27 const unsigned short* actual,
28 size_t length) {
29 if (length > strlen(expected) + 1)
30 return false;
31
32 for (size_t i = 0; i < length; ++i) {
33 if (actual[i] != static_cast<unsigned short>(expected[i]))
34 return false;
35 }
36 return true;
37 }
38
39 } // namespace
40
41 class FPDFTextEmbedderTest : public EmbedderTest {};
42
TEST_F(FPDFTextEmbedderTest,Text)43 TEST_F(FPDFTextEmbedderTest, Text) {
44 ASSERT_TRUE(OpenDocument("hello_world.pdf"));
45 FPDF_PAGE page = LoadPage(0);
46 ASSERT_TRUE(page);
47
48 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
49 ASSERT_TRUE(textpage);
50
51 unsigned short buffer[128];
52 memset(buffer, 0xbd, sizeof(buffer));
53
54 // Check that edge cases are handled gracefully
55 EXPECT_EQ(0, FPDFText_GetText(textpage, 0, 128, nullptr));
56 EXPECT_EQ(0, FPDFText_GetText(textpage, -1, 128, buffer));
57 EXPECT_EQ(0, FPDFText_GetText(textpage, 0, -1, buffer));
58 EXPECT_EQ(1, FPDFText_GetText(textpage, 0, 0, buffer));
59 EXPECT_EQ(0, buffer[0]);
60
61 // Keep going and check the next case.
62 memset(buffer, 0xbd, sizeof(buffer));
63 EXPECT_EQ(2, FPDFText_GetText(textpage, 0, 1, buffer));
64 EXPECT_EQ(kHelloGoodbyeText[0], buffer[0]);
65 EXPECT_EQ(0, buffer[1]);
66
67 // Check includes the terminating NUL that is provided.
68 int num_chars = FPDFText_GetText(textpage, 0, 128, buffer);
69 ASSERT_EQ(kHelloGoodbyeTextSize, num_chars);
70 EXPECT_TRUE(
71 check_unsigned_shorts(kHelloGoodbyeText, buffer, kHelloGoodbyeTextSize));
72
73 // Count does not include the terminating NUL in the string literal.
74 EXPECT_EQ(kHelloGoodbyeTextSize - 1, FPDFText_CountChars(textpage));
75 for (size_t i = 0; i < kHelloGoodbyeTextSize - 1; ++i) {
76 EXPECT_EQ(static_cast<unsigned int>(kHelloGoodbyeText[i]),
77 FPDFText_GetUnicode(textpage, i))
78 << " at " << i;
79 }
80
81 // Extracting using a buffer that will be completely filled. Small buffer is
82 // 12 elements long, since it will need 2 locations per displayed character in
83 // the expected string, plus 2 more for the terminating character.
84 static const char kSmallExpected[] = "Hello";
85 unsigned short small_buffer[12];
86 memset(buffer, 0xbd, sizeof(buffer));
87 EXPECT_EQ(6, FPDFText_GetText(textpage, 0, 5, small_buffer));
88 EXPECT_TRUE(check_unsigned_shorts(kSmallExpected, small_buffer,
89 sizeof(kSmallExpected)));
90
91 EXPECT_EQ(12.0, FPDFText_GetFontSize(textpage, 0));
92 EXPECT_EQ(16.0, FPDFText_GetFontSize(textpage, 15));
93
94 double left = 1.0;
95 double right = 2.0;
96 double bottom = 3.0;
97 double top = 4.0;
98 EXPECT_FALSE(FPDFText_GetCharBox(nullptr, 4, &left, &right, &bottom, &top));
99 EXPECT_DOUBLE_EQ(1.0, left);
100 EXPECT_DOUBLE_EQ(2.0, right);
101 EXPECT_DOUBLE_EQ(3.0, bottom);
102 EXPECT_DOUBLE_EQ(4.0, top);
103 EXPECT_FALSE(FPDFText_GetCharBox(textpage, -1, &left, &right, &bottom, &top));
104 EXPECT_DOUBLE_EQ(1.0, left);
105 EXPECT_DOUBLE_EQ(2.0, right);
106 EXPECT_DOUBLE_EQ(3.0, bottom);
107 EXPECT_DOUBLE_EQ(4.0, top);
108 EXPECT_FALSE(FPDFText_GetCharBox(textpage, 55, &left, &right, &bottom, &top));
109 EXPECT_DOUBLE_EQ(1.0, left);
110 EXPECT_DOUBLE_EQ(2.0, right);
111 EXPECT_DOUBLE_EQ(3.0, bottom);
112 EXPECT_DOUBLE_EQ(4.0, top);
113 EXPECT_FALSE(
114 FPDFText_GetCharBox(textpage, 4, nullptr, &right, &bottom, &top));
115 EXPECT_FALSE(FPDFText_GetCharBox(textpage, 4, &left, nullptr, &bottom, &top));
116 EXPECT_FALSE(FPDFText_GetCharBox(textpage, 4, &left, &right, nullptr, &top));
117 EXPECT_FALSE(
118 FPDFText_GetCharBox(textpage, 4, &left, &right, &bottom, nullptr));
119 EXPECT_FALSE(
120 FPDFText_GetCharBox(textpage, 4, nullptr, nullptr, nullptr, nullptr));
121
122 EXPECT_TRUE(FPDFText_GetCharBox(textpage, 4, &left, &right, &bottom, &top));
123 EXPECT_NEAR(41.071, left, 0.001);
124 EXPECT_NEAR(46.243, right, 0.001);
125 EXPECT_NEAR(49.844, bottom, 0.001);
126 EXPECT_NEAR(55.520, top, 0.001);
127
128 FS_RECTF rect = {4.0f, 1.0f, 3.0f, 2.0f};
129 EXPECT_FALSE(FPDFText_GetLooseCharBox(nullptr, 4, &rect));
130 EXPECT_FLOAT_EQ(4.0f, rect.left);
131 EXPECT_FLOAT_EQ(3.0f, rect.right);
132 EXPECT_FLOAT_EQ(2.0f, rect.bottom);
133 EXPECT_FLOAT_EQ(1.0f, rect.top);
134 EXPECT_FALSE(FPDFText_GetLooseCharBox(textpage, -1, &rect));
135 EXPECT_FLOAT_EQ(4.0f, rect.left);
136 EXPECT_FLOAT_EQ(3.0f, rect.right);
137 EXPECT_FLOAT_EQ(2.0f, rect.bottom);
138 EXPECT_FLOAT_EQ(1.0f, rect.top);
139 EXPECT_FALSE(FPDFText_GetLooseCharBox(textpage, 55, &rect));
140 EXPECT_FLOAT_EQ(4.0f, rect.left);
141 EXPECT_FLOAT_EQ(3.0f, rect.right);
142 EXPECT_FLOAT_EQ(2.0f, rect.bottom);
143 EXPECT_FLOAT_EQ(1.0f, rect.top);
144 EXPECT_FALSE(FPDFText_GetLooseCharBox(textpage, 4, nullptr));
145
146 EXPECT_TRUE(FPDFText_GetLooseCharBox(textpage, 4, &rect));
147 EXPECT_FLOAT_EQ(40.664001f, rect.left);
148 EXPECT_FLOAT_EQ(46.664001f, rect.right);
149 EXPECT_FLOAT_EQ(47.667271f, rect.bottom);
150 EXPECT_FLOAT_EQ(59.667271f, rect.top);
151
152 double x = 0.0;
153 double y = 0.0;
154 EXPECT_TRUE(FPDFText_GetCharOrigin(textpage, 4, &x, &y));
155 EXPECT_NEAR(40.664, x, 0.001);
156 EXPECT_NEAR(50.000, y, 0.001);
157
158 EXPECT_EQ(4, FPDFText_GetCharIndexAtPos(textpage, 42.0, 50.0, 1.0, 1.0));
159 EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(textpage, 0.0, 0.0, 1.0, 1.0));
160 EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(textpage, 199.0, 199.0, 1.0, 1.0));
161
162 // Test out of range indicies.
163 EXPECT_EQ(-1,
164 FPDFText_GetCharIndexAtPos(textpage, 42.0, 10000000.0, 1.0, 1.0));
165 EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(textpage, -1.0, 50.0, 1.0, 1.0));
166
167 // Count does not include the terminating NUL in the string literal.
168 EXPECT_EQ(2, FPDFText_CountRects(textpage, 0, kHelloGoodbyeTextSize - 1));
169
170 left = 0.0;
171 right = 0.0;
172 bottom = 0.0;
173 top = 0.0;
174 EXPECT_TRUE(FPDFText_GetRect(textpage, 1, &left, &top, &right, &bottom));
175 EXPECT_NEAR(20.847, left, 0.001);
176 EXPECT_NEAR(135.167, right, 0.001);
177 EXPECT_NEAR(96.655, bottom, 0.001);
178 EXPECT_NEAR(116.000, top, 0.001);
179
180 // Test out of range indicies set outputs to (0.0, 0.0, 0.0, 0.0).
181 left = -1.0;
182 right = -1.0;
183 bottom = -1.0;
184 top = -1.0;
185 EXPECT_FALSE(FPDFText_GetRect(textpage, -1, &left, &top, &right, &bottom));
186 EXPECT_EQ(0.0, left);
187 EXPECT_EQ(0.0, right);
188 EXPECT_EQ(0.0, bottom);
189 EXPECT_EQ(0.0, top);
190
191 left = -2.0;
192 right = -2.0;
193 bottom = -2.0;
194 top = -2.0;
195 EXPECT_FALSE(FPDFText_GetRect(textpage, 2, &left, &top, &right, &bottom));
196 EXPECT_EQ(0.0, left);
197 EXPECT_EQ(0.0, right);
198 EXPECT_EQ(0.0, bottom);
199 EXPECT_EQ(0.0, top);
200
201 EXPECT_EQ(
202 9, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0, nullptr, 0));
203
204 // Extract starting at character 4 as above.
205 memset(buffer, 0xbd, sizeof(buffer));
206 EXPECT_EQ(
207 1, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0, buffer, 1));
208 EXPECT_TRUE(check_unsigned_shorts(kHelloGoodbyeText + 4, buffer, 1));
209 EXPECT_EQ(0xbdbd, buffer[1]);
210
211 memset(buffer, 0xbd, sizeof(buffer));
212 EXPECT_EQ(
213 9, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0, buffer, 9));
214 EXPECT_TRUE(check_unsigned_shorts(kHelloGoodbyeText + 4, buffer, 9));
215 EXPECT_EQ(0xbdbd, buffer[9]);
216
217 memset(buffer, 0xbd, sizeof(buffer));
218 EXPECT_EQ(10, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0,
219 buffer, 128));
220 EXPECT_TRUE(check_unsigned_shorts(kHelloGoodbyeText + 4, buffer, 9));
221 EXPECT_EQ(0u, buffer[9]);
222 EXPECT_EQ(0xbdbd, buffer[10]);
223
224 FPDFText_ClosePage(textpage);
225 UnloadPage(page);
226 }
227
TEST_F(FPDFTextEmbedderTest,TextVertical)228 TEST_F(FPDFTextEmbedderTest, TextVertical) {
229 ASSERT_TRUE(OpenDocument("vertical_text.pdf"));
230 FPDF_PAGE page = LoadPage(0);
231 ASSERT_TRUE(page);
232
233 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
234 ASSERT_TRUE(textpage);
235
236 EXPECT_EQ(12.0, FPDFText_GetFontSize(textpage, 0));
237
238 double x = 0.0;
239 double y = 0.0;
240 EXPECT_TRUE(FPDFText_GetCharOrigin(textpage, 1, &x, &y));
241 EXPECT_NEAR(6.664, x, 0.001);
242 EXPECT_NEAR(171.508, y, 0.001);
243
244 EXPECT_TRUE(FPDFText_GetCharOrigin(textpage, 2, &x, &y));
245 EXPECT_NEAR(8.668, x, 0.001);
246 EXPECT_NEAR(160.492, y, 0.001);
247
248 FS_RECTF rect;
249 EXPECT_TRUE(FPDFText_GetLooseCharBox(textpage, 1, &rect));
250 EXPECT_NEAR(4, rect.left, 0.001);
251 EXPECT_NEAR(16, rect.right, 0.001);
252 EXPECT_NEAR(178.984, rect.bottom, 0.001);
253 EXPECT_NEAR(170.308, rect.top, 0.001);
254
255 EXPECT_TRUE(FPDFText_GetLooseCharBox(textpage, 2, &rect));
256 EXPECT_NEAR(4, rect.left, 0.001);
257 EXPECT_NEAR(16, rect.right, 0.001);
258 EXPECT_NEAR(170.308, rect.bottom, 0.001);
259 EXPECT_NEAR(159.292, rect.top, 0.001);
260
261 FPDFText_ClosePage(textpage);
262 UnloadPage(page);
263 }
264
TEST_F(FPDFTextEmbedderTest,TextSearch)265 TEST_F(FPDFTextEmbedderTest, TextSearch) {
266 ASSERT_TRUE(OpenDocument("hello_world.pdf"));
267 FPDF_PAGE page = LoadPage(0);
268 ASSERT_TRUE(page);
269
270 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
271 ASSERT_TRUE(textpage);
272
273 ScopedFPDFWideString nope = GetFPDFWideString(L"nope");
274 ScopedFPDFWideString world = GetFPDFWideString(L"world");
275 ScopedFPDFWideString world_caps = GetFPDFWideString(L"WORLD");
276 ScopedFPDFWideString world_substr = GetFPDFWideString(L"orld");
277
278 {
279 // No occurrences of "nope" in test page.
280 ScopedFPDFTextFind search(FPDFText_FindStart(textpage, nope.get(), 0, 0));
281 EXPECT_TRUE(search);
282 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
283 EXPECT_EQ(0, FPDFText_GetSchCount(search.get()));
284
285 // Advancing finds nothing.
286 EXPECT_FALSE(FPDFText_FindNext(search.get()));
287 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
288 EXPECT_EQ(0, FPDFText_GetSchCount(search.get()));
289
290 // Retreating finds nothing.
291 EXPECT_FALSE(FPDFText_FindPrev(search.get()));
292 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
293 EXPECT_EQ(0, FPDFText_GetSchCount(search.get()));
294 }
295
296 {
297 // Two occurrences of "world" in test page.
298 ScopedFPDFTextFind search(FPDFText_FindStart(textpage, world.get(), 0, 2));
299 EXPECT_TRUE(search);
300
301 // Remains not found until advanced.
302 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
303 EXPECT_EQ(0, FPDFText_GetSchCount(search.get()));
304
305 // First occurrence of "world" in this test page.
306 EXPECT_TRUE(FPDFText_FindNext(search.get()));
307 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search.get()));
308 EXPECT_EQ(5, FPDFText_GetSchCount(search.get()));
309
310 // Last occurrence of "world" in this test page.
311 EXPECT_TRUE(FPDFText_FindNext(search.get()));
312 EXPECT_EQ(24, FPDFText_GetSchResultIndex(search.get()));
313 EXPECT_EQ(5, FPDFText_GetSchCount(search.get()));
314
315 // Found position unchanged when fails to advance.
316 EXPECT_FALSE(FPDFText_FindNext(search.get()));
317 EXPECT_EQ(24, FPDFText_GetSchResultIndex(search.get()));
318 EXPECT_EQ(5, FPDFText_GetSchCount(search.get()));
319
320 // Back to first occurrence.
321 EXPECT_TRUE(FPDFText_FindPrev(search.get()));
322 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search.get()));
323 EXPECT_EQ(5, FPDFText_GetSchCount(search.get()));
324
325 // Found position unchanged when fails to retreat.
326 EXPECT_FALSE(FPDFText_FindPrev(search.get()));
327 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search.get()));
328 EXPECT_EQ(5, FPDFText_GetSchCount(search.get()));
329 }
330
331 {
332 // Exact search unaffected by case sensitiity and whole word flags.
333 ScopedFPDFTextFind search(FPDFText_FindStart(
334 textpage, world.get(), FPDF_MATCHCASE | FPDF_MATCHWHOLEWORD, 0));
335 EXPECT_TRUE(search);
336 EXPECT_TRUE(FPDFText_FindNext(search.get()));
337 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search.get()));
338 EXPECT_EQ(5, FPDFText_GetSchCount(search.get()));
339 }
340
341 {
342 // Default is case-insensitive, so matching agaist caps works.
343 ScopedFPDFTextFind search(
344 FPDFText_FindStart(textpage, world_caps.get(), 0, 0));
345 EXPECT_TRUE(search);
346 EXPECT_TRUE(FPDFText_FindNext(search.get()));
347 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search.get()));
348 EXPECT_EQ(5, FPDFText_GetSchCount(search.get()));
349 }
350
351 {
352 // But can be made case sensitive, in which case this fails.
353 ScopedFPDFTextFind search(
354 FPDFText_FindStart(textpage, world_caps.get(), FPDF_MATCHCASE, 0));
355 EXPECT_FALSE(FPDFText_FindNext(search.get()));
356 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
357 EXPECT_EQ(0, FPDFText_GetSchCount(search.get()));
358 }
359
360 {
361 // Default is match anywhere within word, so matching substring works.
362 ScopedFPDFTextFind search(
363 FPDFText_FindStart(textpage, world_substr.get(), 0, 0));
364 EXPECT_TRUE(FPDFText_FindNext(search.get()));
365 EXPECT_EQ(8, FPDFText_GetSchResultIndex(search.get()));
366 EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
367 }
368
369 {
370 // But can be made to mach word boundaries, in which case this fails.
371 ScopedFPDFTextFind search(FPDFText_FindStart(textpage, world_substr.get(),
372 FPDF_MATCHWHOLEWORD, 0));
373 EXPECT_FALSE(FPDFText_FindNext(search.get()));
374 // TODO(tsepez): investigate strange index/count values in this state.
375 }
376
377 FPDFText_ClosePage(textpage);
378 UnloadPage(page);
379 }
380
TEST_F(FPDFTextEmbedderTest,TextSearchConsecutive)381 TEST_F(FPDFTextEmbedderTest, TextSearchConsecutive) {
382 ASSERT_TRUE(OpenDocument("find_text_consecutive.pdf"));
383 FPDF_PAGE page = LoadPage(0);
384 ASSERT_TRUE(page);
385
386 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
387 ASSERT_TRUE(textpage);
388
389 ScopedFPDFWideString aaaa = GetFPDFWideString(L"aaaa");
390
391 {
392 // Search for "aaaa" yields 2 results in "aaaaaaaaaa".
393 ScopedFPDFTextFind search(FPDFText_FindStart(textpage, aaaa.get(), 0, 0));
394 EXPECT_TRUE(search);
395
396 // Remains not found until advanced.
397 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
398 EXPECT_EQ(0, FPDFText_GetSchCount(search.get()));
399
400 // First occurrence of "aaaa" in this test page.
401 EXPECT_TRUE(FPDFText_FindNext(search.get()));
402 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
403 EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
404
405 // Last occurrence of "aaaa" in this test page.
406 EXPECT_TRUE(FPDFText_FindNext(search.get()));
407 EXPECT_EQ(4, FPDFText_GetSchResultIndex(search.get()));
408 EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
409
410 // Found position unchanged when fails to advance.
411 EXPECT_FALSE(FPDFText_FindNext(search.get()));
412 EXPECT_EQ(4, FPDFText_GetSchResultIndex(search.get()));
413 EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
414
415 // Back to first occurrence.
416 EXPECT_TRUE(FPDFText_FindPrev(search.get()));
417 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
418 EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
419
420 // Found position unchanged when fails to retreat.
421 EXPECT_FALSE(FPDFText_FindPrev(search.get()));
422 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
423 EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
424 }
425
426 {
427 // Search for "aaaa" yields 7 results in "aaaaaaaaaa", when searching with
428 // FPDF_CONSECUTIVE.
429 ScopedFPDFTextFind search(
430 FPDFText_FindStart(textpage, aaaa.get(), FPDF_CONSECUTIVE, 0));
431 EXPECT_TRUE(search);
432
433 // Remains not found until advanced.
434 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
435 EXPECT_EQ(0, FPDFText_GetSchCount(search.get()));
436
437 // Find consecutive occurrences of "aaaa" in this test page:
438 for (int i = 0; i < 7; ++i) {
439 EXPECT_TRUE(FPDFText_FindNext(search.get()));
440 EXPECT_EQ(i, FPDFText_GetSchResultIndex(search.get()));
441 EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
442 }
443
444 // Found position unchanged when fails to advance.
445 EXPECT_FALSE(FPDFText_FindNext(search.get()));
446 EXPECT_EQ(6, FPDFText_GetSchResultIndex(search.get()));
447 EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
448
449 for (int i = 5; i >= 0; --i) {
450 EXPECT_TRUE(FPDFText_FindPrev(search.get()));
451 EXPECT_EQ(i, FPDFText_GetSchResultIndex(search.get()));
452 EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
453 }
454
455 // Found position unchanged when fails to retreat.
456 EXPECT_FALSE(FPDFText_FindPrev(search.get()));
457 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
458 EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
459 }
460
461 FPDFText_ClosePage(textpage);
462 UnloadPage(page);
463 }
464
465 // Fails on Windows. https://crbug.com/pdfium/1370
466 #if defined(OS_WIN)
467 #define MAYBE_TextSearchLatinExtended DISABLED_TextSearchLatinExtended
468 #else
469 #define MAYBE_TextSearchLatinExtended TextSearchLatinExtended
470 #endif
TEST_F(FPDFTextEmbedderTest,MAYBE_TextSearchLatinExtended)471 TEST_F(FPDFTextEmbedderTest, MAYBE_TextSearchLatinExtended) {
472 ASSERT_TRUE(OpenDocument("latin_extended.pdf"));
473 FPDF_PAGE page = LoadPage(0);
474 ASSERT_TRUE(page);
475
476 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
477 ASSERT_TRUE(textpage);
478
479 // Upper/lowercase 'a' with breve.
480 constexpr FPDF_WCHAR kNeedleUpper[] = {0x0102, 0x0000};
481 constexpr FPDF_WCHAR kNeedleLower[] = {0x0103, 0x0000};
482
483 for (const auto* needle : {kNeedleUpper, kNeedleLower}) {
484 ScopedFPDFTextFind search(FPDFText_FindStart(textpage, needle, 0, 0));
485 EXPECT_TRUE(search);
486 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
487 EXPECT_EQ(0, FPDFText_GetSchCount(search.get()));
488
489 // Should find 2 results at position 21/22, both with length 1.
490 EXPECT_TRUE(FPDFText_FindNext(search.get()));
491 EXPECT_EQ(2, FPDFText_GetSchResultIndex(search.get()));
492 EXPECT_EQ(1, FPDFText_GetSchCount(search.get()));
493 EXPECT_TRUE(FPDFText_FindNext(search.get()));
494 EXPECT_EQ(3, FPDFText_GetSchResultIndex(search.get()));
495 EXPECT_EQ(1, FPDFText_GetSchCount(search.get()));
496 // And no more than 2 results.
497 EXPECT_FALSE(FPDFText_FindNext(search.get()));
498 }
499
500 FPDFText_ClosePage(textpage);
501 UnloadPage(page);
502 }
503
504 // Test that the page has characters despite a bad stream length.
TEST_F(FPDFTextEmbedderTest,StreamLengthPastEndOfFile)505 TEST_F(FPDFTextEmbedderTest, StreamLengthPastEndOfFile) {
506 ASSERT_TRUE(OpenDocument("bug_57.pdf"));
507 FPDF_PAGE page = LoadPage(0);
508 ASSERT_TRUE(page);
509
510 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
511 ASSERT_TRUE(textpage);
512 EXPECT_EQ(13, FPDFText_CountChars(textpage));
513
514 FPDFText_ClosePage(textpage);
515 UnloadPage(page);
516 }
517
TEST_F(FPDFTextEmbedderTest,WebLinks)518 TEST_F(FPDFTextEmbedderTest, WebLinks) {
519 ASSERT_TRUE(OpenDocument("weblinks.pdf"));
520 FPDF_PAGE page = LoadPage(0);
521 ASSERT_TRUE(page);
522
523 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
524 ASSERT_TRUE(textpage);
525
526 {
527 ScopedFPDFPageLink pagelink(FPDFLink_LoadWebLinks(textpage));
528 EXPECT_TRUE(pagelink);
529
530 // Page contains two HTTP-style URLs.
531 EXPECT_EQ(2, FPDFLink_CountWebLinks(pagelink.get()));
532
533 // Only a terminating NUL required for bogus links.
534 EXPECT_EQ(1, FPDFLink_GetURL(pagelink.get(), 2, nullptr, 0));
535 EXPECT_EQ(1, FPDFLink_GetURL(pagelink.get(), 1400, nullptr, 0));
536 EXPECT_EQ(1, FPDFLink_GetURL(pagelink.get(), -1, nullptr, 0));
537 }
538
539 FPDF_PAGELINK pagelink = FPDFLink_LoadWebLinks(textpage);
540 EXPECT_TRUE(pagelink);
541
542 // Query the number of characters required for each link (incl NUL).
543 EXPECT_EQ(25, FPDFLink_GetURL(pagelink, 0, nullptr, 0));
544 EXPECT_EQ(26, FPDFLink_GetURL(pagelink, 1, nullptr, 0));
545
546 static const char expected_url[] = "http://example.com?q=foo";
547 static const size_t expected_len = sizeof(expected_url);
548 unsigned short buffer[128];
549
550 // Retrieve a link with too small a buffer. Buffer will not be
551 // NUL-terminated, but must not be modified past indicated length,
552 // so pre-fill with a pattern to check write bounds.
553 memset(buffer, 0xbd, sizeof(buffer));
554 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 0, buffer, 1));
555 EXPECT_TRUE(check_unsigned_shorts(expected_url, buffer, 1));
556 EXPECT_EQ(0xbdbd, buffer[1]);
557
558 // Check buffer that doesn't have space for a terminating NUL.
559 memset(buffer, 0xbd, sizeof(buffer));
560 EXPECT_EQ(static_cast<int>(expected_len - 1),
561 FPDFLink_GetURL(pagelink, 0, buffer, expected_len - 1));
562 EXPECT_TRUE(check_unsigned_shorts(expected_url, buffer, expected_len - 1));
563 EXPECT_EQ(0xbdbd, buffer[expected_len - 1]);
564
565 // Retreive link with exactly-sized buffer.
566 memset(buffer, 0xbd, sizeof(buffer));
567 EXPECT_EQ(static_cast<int>(expected_len),
568 FPDFLink_GetURL(pagelink, 0, buffer, expected_len));
569 EXPECT_TRUE(check_unsigned_shorts(expected_url, buffer, expected_len));
570 EXPECT_EQ(0u, buffer[expected_len - 1]);
571 EXPECT_EQ(0xbdbd, buffer[expected_len]);
572
573 // Retreive link with ample-sized-buffer.
574 memset(buffer, 0xbd, sizeof(buffer));
575 EXPECT_EQ(static_cast<int>(expected_len),
576 FPDFLink_GetURL(pagelink, 0, buffer, 128));
577 EXPECT_TRUE(check_unsigned_shorts(expected_url, buffer, expected_len));
578 EXPECT_EQ(0u, buffer[expected_len - 1]);
579 EXPECT_EQ(0xbdbd, buffer[expected_len]);
580
581 // Each link rendered in a single rect in this test page.
582 EXPECT_EQ(1, FPDFLink_CountRects(pagelink, 0));
583 EXPECT_EQ(1, FPDFLink_CountRects(pagelink, 1));
584
585 // Each link rendered in a single rect in this test page.
586 EXPECT_EQ(0, FPDFLink_CountRects(pagelink, -1));
587 EXPECT_EQ(0, FPDFLink_CountRects(pagelink, 2));
588 EXPECT_EQ(0, FPDFLink_CountRects(pagelink, 10000));
589
590 // Check boundary of valid link index with valid rect index.
591 double left = 0.0;
592 double right = 0.0;
593 double top = 0.0;
594 double bottom = 0.0;
595 EXPECT_TRUE(FPDFLink_GetRect(pagelink, 0, 0, &left, &top, &right, &bottom));
596 EXPECT_NEAR(50.791, left, 0.001);
597 EXPECT_NEAR(187.963, right, 0.001);
598 EXPECT_NEAR(97.624, bottom, 0.001);
599 EXPECT_NEAR(108.736, top, 0.001);
600
601 // Check that valid link with invalid rect index leaves parameters unchanged.
602 left = -1.0;
603 right = -1.0;
604 top = -1.0;
605 bottom = -1.0;
606 EXPECT_FALSE(FPDFLink_GetRect(pagelink, 0, 1, &left, &top, &right, &bottom));
607 EXPECT_EQ(-1.0, left);
608 EXPECT_EQ(-1.0, right);
609 EXPECT_EQ(-1.0, bottom);
610 EXPECT_EQ(-1.0, top);
611
612 // Check that invalid link index leaves parameters unchanged.
613 left = -2.0;
614 right = -2.0;
615 top = -2.0;
616 bottom = -2.0;
617 EXPECT_FALSE(FPDFLink_GetRect(pagelink, -1, 0, &left, &top, &right, &bottom));
618 EXPECT_EQ(-2.0, left);
619 EXPECT_EQ(-2.0, right);
620 EXPECT_EQ(-2.0, bottom);
621 EXPECT_EQ(-2.0, top);
622
623 FPDFLink_CloseWebLinks(pagelink);
624 FPDFText_ClosePage(textpage);
625 UnloadPage(page);
626 }
627
TEST_F(FPDFTextEmbedderTest,WebLinksAcrossLines)628 TEST_F(FPDFTextEmbedderTest, WebLinksAcrossLines) {
629 ASSERT_TRUE(OpenDocument("weblinks_across_lines.pdf"));
630 FPDF_PAGE page = LoadPage(0);
631 ASSERT_TRUE(page);
632
633 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
634 ASSERT_TRUE(textpage);
635
636 FPDF_PAGELINK pagelink = FPDFLink_LoadWebLinks(textpage);
637 EXPECT_TRUE(pagelink);
638
639 static const char* const kExpectedUrls[] = {
640 "http://example.com", // from "http://www.example.com?\r\nfoo"
641 "http://example.com/", // from "http://www.example.com/\r\nfoo"
642 "http://example.com/test-foo", // from "http://example.com/test-\r\nfoo"
643 "http://abc.com/test-foo", // from "http://abc.com/test-\r\n\r\nfoo"
644 // Next two links from "http://www.example.com/\r\nhttp://www.abc.com/"
645 "http://example.com/",
646 "http://www.abc.com",
647 };
648 static const int kNumLinks = static_cast<int>(FX_ArraySize(kExpectedUrls));
649
650 EXPECT_EQ(kNumLinks, FPDFLink_CountWebLinks(pagelink));
651
652 unsigned short buffer[128];
653 for (int i = 0; i < kNumLinks; i++) {
654 const size_t expected_len = strlen(kExpectedUrls[i]) + 1;
655 memset(buffer, 0, sizeof(buffer));
656 EXPECT_EQ(static_cast<int>(expected_len),
657 FPDFLink_GetURL(pagelink, i, nullptr, 0));
658 EXPECT_EQ(static_cast<int>(expected_len),
659 FPDFLink_GetURL(pagelink, i, buffer, FX_ArraySize(buffer)));
660 EXPECT_TRUE(check_unsigned_shorts(kExpectedUrls[i], buffer, expected_len));
661 }
662
663 FPDFLink_CloseWebLinks(pagelink);
664 FPDFText_ClosePage(textpage);
665 UnloadPage(page);
666 }
667
TEST_F(FPDFTextEmbedderTest,WebLinksAcrossLinesBug)668 TEST_F(FPDFTextEmbedderTest, WebLinksAcrossLinesBug) {
669 ASSERT_TRUE(OpenDocument("bug_650.pdf"));
670 FPDF_PAGE page = LoadPage(0);
671 ASSERT_TRUE(page);
672
673 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
674 ASSERT_TRUE(textpage);
675
676 FPDF_PAGELINK pagelink = FPDFLink_LoadWebLinks(textpage);
677 EXPECT_TRUE(pagelink);
678
679 EXPECT_EQ(2, FPDFLink_CountWebLinks(pagelink));
680 unsigned short buffer[128] = {0};
681 static const char kExpectedUrl[] =
682 "http://tutorial45.com/learn-autocad-basics-day-166/";
683 static const int kUrlSize = static_cast<int>(sizeof(kExpectedUrl));
684
685 EXPECT_EQ(kUrlSize, FPDFLink_GetURL(pagelink, 1, nullptr, 0));
686 EXPECT_EQ(kUrlSize,
687 FPDFLink_GetURL(pagelink, 1, buffer, FX_ArraySize(buffer)));
688 EXPECT_TRUE(check_unsigned_shorts(kExpectedUrl, buffer, kUrlSize));
689
690 FPDFLink_CloseWebLinks(pagelink);
691 FPDFText_ClosePage(textpage);
692 UnloadPage(page);
693 }
694
TEST_F(FPDFTextEmbedderTest,WebLinksCharRanges)695 TEST_F(FPDFTextEmbedderTest, WebLinksCharRanges) {
696 ASSERT_TRUE(OpenDocument("weblinks.pdf"));
697 FPDF_PAGE page = LoadPage(0);
698 ASSERT_TRUE(page);
699
700 FPDF_TEXTPAGE text_page = FPDFText_LoadPage(page);
701 ASSERT_TRUE(text_page);
702
703 FPDF_PAGELINK page_link = FPDFLink_LoadWebLinks(text_page);
704 EXPECT_TRUE(page_link);
705
706 // Test for char indices of a valid link
707 int start_char_index;
708 int char_count;
709 ASSERT_TRUE(
710 FPDFLink_GetTextRange(page_link, 0, &start_char_index, &char_count));
711 EXPECT_EQ(35, start_char_index);
712 EXPECT_EQ(24, char_count);
713
714 // Test for char indices of an invalid link
715 start_char_index = -10;
716 char_count = -8;
717 ASSERT_FALSE(
718 FPDFLink_GetTextRange(page_link, 6, &start_char_index, &char_count));
719 EXPECT_EQ(start_char_index, -10);
720 EXPECT_EQ(char_count, -8);
721
722 // Test for pagelink = nullptr
723 start_char_index = -10;
724 char_count = -8;
725 ASSERT_FALSE(
726 FPDFLink_GetTextRange(nullptr, 0, &start_char_index, &char_count));
727 EXPECT_EQ(start_char_index, -10);
728 EXPECT_EQ(char_count, -8);
729
730 // Test for link_index < 0
731 start_char_index = -10;
732 char_count = -8;
733 ASSERT_FALSE(
734 FPDFLink_GetTextRange(page_link, -4, &start_char_index, &char_count));
735 EXPECT_EQ(start_char_index, -10);
736 EXPECT_EQ(char_count, -8);
737
738 FPDFLink_CloseWebLinks(page_link);
739 FPDFText_ClosePage(text_page);
740 UnloadPage(page);
741 }
742
TEST_F(FPDFTextEmbedderTest,AnnotLinks)743 TEST_F(FPDFTextEmbedderTest, AnnotLinks) {
744 ASSERT_TRUE(OpenDocument("link_annots.pdf"));
745 FPDF_PAGE page = LoadPage(0);
746 ASSERT_TRUE(page);
747
748 // Get link count via checking annotation subtype
749 int annot_count = FPDFPage_GetAnnotCount(page);
750 ASSERT_EQ(8, annot_count);
751 int annot_subtype_link_count = 0;
752 for (int i = 0; i < annot_count; ++i) {
753 ScopedFPDFAnnotation annot(FPDFPage_GetAnnot(page, i));
754 if (FPDFAnnot_GetSubtype(annot.get()) == FPDF_ANNOT_LINK) {
755 ++annot_subtype_link_count;
756 }
757 }
758 EXPECT_EQ(4, annot_subtype_link_count);
759
760 // Validate that FPDFLink_Enumerate() returns same number of links
761 int start_pos = 0;
762 FPDF_LINK link_annot;
763 int link_count = 0;
764 while (FPDFLink_Enumerate(page, &start_pos, &link_annot)) {
765 ASSERT_TRUE(link_annot);
766 if (start_pos == 1 || start_pos == 2) {
767 // First two links point to first and second page within the document
768 // respectively
769 FPDF_DEST link_dest = FPDFLink_GetDest(document(), link_annot);
770 EXPECT_TRUE(link_dest);
771 EXPECT_EQ(start_pos - 1,
772 FPDFDest_GetDestPageIndex(document(), link_dest));
773 } else if (start_pos == 3) { // points to PDF Spec URL
774 FS_RECTF link_rect;
775 EXPECT_TRUE(FPDFLink_GetAnnotRect(link_annot, &link_rect));
776 EXPECT_NEAR(66.0, link_rect.left, 0.001);
777 EXPECT_NEAR(544.0, link_rect.top, 0.001);
778 EXPECT_NEAR(196.0, link_rect.right, 0.001);
779 EXPECT_NEAR(529.0, link_rect.bottom, 0.001);
780 } else if (start_pos == 4) { // this link has quad points
781 int quad_point_count = FPDFLink_CountQuadPoints(link_annot);
782 EXPECT_EQ(1, quad_point_count);
783 FS_QUADPOINTSF quad_points;
784 EXPECT_TRUE(FPDFLink_GetQuadPoints(link_annot, 0, &quad_points));
785 EXPECT_NEAR(83.0, quad_points.x1, 0.001);
786 EXPECT_NEAR(453.0, quad_points.y1, 0.001);
787 EXPECT_NEAR(178.0, quad_points.x2, 0.001);
788 EXPECT_NEAR(453.0, quad_points.y2, 0.001);
789 EXPECT_NEAR(83.0, quad_points.x3, 0.001);
790 EXPECT_NEAR(440.0, quad_points.y3, 0.001);
791 EXPECT_NEAR(178.0, quad_points.x4, 0.001);
792 EXPECT_NEAR(440.0, quad_points.y4, 0.001);
793 // AnnotRect is same as quad points for this link
794 FS_RECTF link_rect;
795 EXPECT_TRUE(FPDFLink_GetAnnotRect(link_annot, &link_rect));
796 EXPECT_NEAR(link_rect.left, quad_points.x1, 0.001);
797 EXPECT_NEAR(link_rect.top, quad_points.y1, 0.001);
798 EXPECT_NEAR(link_rect.right, quad_points.x4, 0.001);
799 EXPECT_NEAR(link_rect.bottom, quad_points.y4, 0.001);
800 }
801 ++link_count;
802 }
803 EXPECT_EQ(annot_subtype_link_count, link_count);
804
805 UnloadPage(page);
806 }
807
TEST_F(FPDFTextEmbedderTest,GetFontSize)808 TEST_F(FPDFTextEmbedderTest, GetFontSize) {
809 ASSERT_TRUE(OpenDocument("hello_world.pdf"));
810 FPDF_PAGE page = LoadPage(0);
811 ASSERT_TRUE(page);
812
813 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
814 ASSERT_TRUE(textpage);
815
816 const double kExpectedFontsSizes[] = {12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
817 12, 12, 12, 1, 1, 16, 16, 16, 16, 16,
818 16, 16, 16, 16, 16, 16, 16, 16, 16, 16};
819
820 int count = FPDFText_CountChars(textpage);
821 ASSERT_EQ(FX_ArraySize(kExpectedFontsSizes), static_cast<size_t>(count));
822 for (int i = 0; i < count; ++i)
823 EXPECT_EQ(kExpectedFontsSizes[i], FPDFText_GetFontSize(textpage, i)) << i;
824
825 FPDFText_ClosePage(textpage);
826 UnloadPage(page);
827 }
828
TEST_F(FPDFTextEmbedderTest,GetFontInfo)829 TEST_F(FPDFTextEmbedderTest, GetFontInfo) {
830 ASSERT_TRUE(OpenDocument("hello_world.pdf"));
831 FPDF_PAGE page = LoadPage(0);
832 ASSERT_TRUE(page);
833
834 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
835 ASSERT_TRUE(textpage);
836 std::vector<char> font_name;
837 size_t num_chars1 = strlen("Hello, world!");
838 const char kExpectedFontName1[] = "Times-Roman";
839
840 for (size_t i = 0; i < num_chars1; i++) {
841 int flags = -1;
842 unsigned long length =
843 FPDFText_GetFontInfo(textpage, i, nullptr, 0, &flags);
844 static constexpr unsigned long expected_length = sizeof(kExpectedFontName1);
845 ASSERT_EQ(expected_length, length);
846 EXPECT_EQ(FXFONT_NONSYMBOLIC, flags);
847 font_name.resize(length);
848 std::fill(font_name.begin(), font_name.end(), 'a');
849 flags = -1;
850 EXPECT_EQ(expected_length,
851 FPDFText_GetFontInfo(textpage, i, font_name.data(),
852 font_name.size(), &flags));
853 EXPECT_STREQ(kExpectedFontName1, font_name.data());
854 EXPECT_EQ(FXFONT_NONSYMBOLIC, flags);
855 }
856 // If the size of the buffer is not large enough, the buffer should remain
857 // unchanged.
858 font_name.pop_back();
859 std::fill(font_name.begin(), font_name.end(), 'a');
860 EXPECT_EQ(sizeof(kExpectedFontName1),
861 FPDFText_GetFontInfo(textpage, 0, font_name.data(),
862 font_name.size(), nullptr));
863 for (char a : font_name)
864 EXPECT_EQ('a', a);
865
866 // The text is "Hello, world!\r\nGoodbye, world!", so the next two characters
867 // do not have any font information.
868 EXPECT_EQ(0u, FPDFText_GetFontInfo(textpage, num_chars1, font_name.data(),
869 font_name.size(), nullptr));
870 EXPECT_EQ(0u, FPDFText_GetFontInfo(textpage, num_chars1 + 1, font_name.data(),
871 font_name.size(), nullptr));
872
873 size_t num_chars2 = strlen("Goodbye, world!");
874 const char kExpectedFontName2[] = "Helvetica";
875 for (size_t i = num_chars1 + 2; i < num_chars1 + num_chars2 + 2; i++) {
876 int flags = -1;
877 unsigned long length =
878 FPDFText_GetFontInfo(textpage, i, nullptr, 0, &flags);
879 static constexpr unsigned long expected_length = sizeof(kExpectedFontName2);
880 ASSERT_EQ(expected_length, length);
881 EXPECT_EQ(FXFONT_NONSYMBOLIC, flags);
882 font_name.resize(length);
883 std::fill(font_name.begin(), font_name.end(), 'a');
884 flags = -1;
885 EXPECT_EQ(expected_length,
886 FPDFText_GetFontInfo(textpage, i, font_name.data(),
887 font_name.size(), &flags));
888 EXPECT_STREQ(kExpectedFontName2, font_name.data());
889 EXPECT_EQ(FXFONT_NONSYMBOLIC, flags);
890 }
891
892 // Now try some out of bounds indices and null pointers to make sure we do not
893 // crash.
894 // No textpage.
895 EXPECT_EQ(0u, FPDFText_GetFontInfo(nullptr, 0, font_name.data(),
896 font_name.size(), nullptr));
897 // No buffer.
898 EXPECT_EQ(sizeof(kExpectedFontName1),
899 FPDFText_GetFontInfo(textpage, 0, nullptr, 0, nullptr));
900 // Negative index.
901 EXPECT_EQ(0u, FPDFText_GetFontInfo(textpage, -1, font_name.data(),
902 font_name.size(), nullptr));
903 // Out of bounds index.
904 EXPECT_EQ(0u, FPDFText_GetFontInfo(textpage, 1000, font_name.data(),
905 font_name.size(), nullptr));
906
907 FPDFText_ClosePage(textpage);
908 UnloadPage(page);
909 }
910
TEST_F(FPDFTextEmbedderTest,ToUnicode)911 TEST_F(FPDFTextEmbedderTest, ToUnicode) {
912 ASSERT_TRUE(OpenDocument("bug_583.pdf"));
913 FPDF_PAGE page = LoadPage(0);
914 ASSERT_TRUE(page);
915
916 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
917 ASSERT_TRUE(textpage);
918
919 ASSERT_EQ(1, FPDFText_CountChars(textpage));
920 EXPECT_EQ(0U, FPDFText_GetUnicode(textpage, 0));
921
922 FPDFText_ClosePage(textpage);
923 UnloadPage(page);
924 }
925
TEST_F(FPDFTextEmbedderTest,Bug_921)926 TEST_F(FPDFTextEmbedderTest, Bug_921) {
927 ASSERT_TRUE(OpenDocument("bug_921.pdf"));
928 FPDF_PAGE page = LoadPage(0);
929 ASSERT_TRUE(page);
930
931 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
932 ASSERT_TRUE(textpage);
933
934 static constexpr unsigned int kData[] = {
935 1095, 1077, 1083, 1086, 1074, 1077, 1095, 1077, 1089, 1082, 1086, 1077,
936 32, 1089, 1090, 1088, 1072, 1076, 1072, 1085, 1080, 1077, 46, 32};
937 static constexpr int kStartIndex = 238;
938
939 ASSERT_EQ(268, FPDFText_CountChars(textpage));
940 for (size_t i = 0; i < FX_ArraySize(kData); ++i)
941 EXPECT_EQ(kData[i], FPDFText_GetUnicode(textpage, kStartIndex + i));
942
943 unsigned short buffer[FX_ArraySize(kData) + 1];
944 memset(buffer, 0xbd, sizeof(buffer));
945 int count =
946 FPDFText_GetText(textpage, kStartIndex, FX_ArraySize(kData), buffer);
947 ASSERT_GT(count, 0);
948 ASSERT_EQ(FX_ArraySize(kData) + 1, static_cast<size_t>(count));
949 for (size_t i = 0; i < FX_ArraySize(kData); ++i)
950 EXPECT_EQ(kData[i], buffer[i]);
951 EXPECT_EQ(0, buffer[FX_ArraySize(kData)]);
952
953 FPDFText_ClosePage(textpage);
954 UnloadPage(page);
955 }
956
TEST_F(FPDFTextEmbedderTest,GetTextWithHyphen)957 TEST_F(FPDFTextEmbedderTest, GetTextWithHyphen) {
958 ASSERT_TRUE(OpenDocument("bug_781804.pdf"));
959 FPDF_PAGE page = LoadPage(0);
960 ASSERT_TRUE(page);
961
962 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
963 ASSERT_TRUE(textpage);
964
965 // Check that soft hyphens are not included
966 // Expecting 'Veritaserum', except there is a \uFFFE where the hyphen was in
967 // the original text. This is a weird thing that Adobe does, which we
968 // replicate.
969 constexpr unsigned short soft_expected[] = {
970 0x0056, 0x0065, 0x0072, 0x0069, 0x0074, 0x0061, 0xfffe,
971 0x0073, 0x0065, 0x0072, 0x0075, 0x006D, 0x0000};
972 {
973 constexpr int count = FX_ArraySize(soft_expected) - 1;
974 unsigned short buffer[FX_ArraySize(soft_expected)];
975 memset(buffer, 0, sizeof(buffer));
976
977 EXPECT_EQ(count + 1, FPDFText_GetText(textpage, 0, count, buffer));
978 for (int i = 0; i < count; i++)
979 EXPECT_EQ(soft_expected[i], buffer[i]);
980 }
981
982 // Check that hard hyphens are included
983 {
984 // There isn't the \0 in the actual doc, but there is a \r\n, so need to
985 // add 1 to get aligned.
986 constexpr size_t offset = FX_ArraySize(soft_expected) + 1;
987 // Expecting 'User-\r\ngenerated', the - is a unicode character, so cannnot
988 // store in a char[].
989 constexpr unsigned short hard_expected[] = {
990 0x0055, 0x0073, 0x0065, 0x0072, 0x2010, 0x000d, 0x000a, 0x0067, 0x0065,
991 0x006e, 0x0065, 0x0072, 0x0061, 0x0074, 0x0065, 0x0064, 0x0000};
992 constexpr int count = FX_ArraySize(hard_expected) - 1;
993 unsigned short buffer[FX_ArraySize(hard_expected)];
994
995 EXPECT_EQ(count + 1, FPDFText_GetText(textpage, offset, count, buffer));
996 for (int i = 0; i < count; i++)
997 EXPECT_EQ(hard_expected[i], buffer[i]);
998 }
999
1000 FPDFText_ClosePage(textpage);
1001 UnloadPage(page);
1002 }
1003
TEST_F(FPDFTextEmbedderTest,bug_782596)1004 TEST_F(FPDFTextEmbedderTest, bug_782596) {
1005 // If there is a regression in this test, it will only fail under ASAN
1006 ASSERT_TRUE(OpenDocument("bug_782596.pdf"));
1007 FPDF_PAGE page = LoadPage(0);
1008 ASSERT_TRUE(page);
1009 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
1010 ASSERT_TRUE(textpage);
1011 FPDFText_ClosePage(textpage);
1012 UnloadPage(page);
1013 }
1014
TEST_F(FPDFTextEmbedderTest,ControlCharacters)1015 TEST_F(FPDFTextEmbedderTest, ControlCharacters) {
1016 ASSERT_TRUE(OpenDocument("control_characters.pdf"));
1017 FPDF_PAGE page = LoadPage(0);
1018 ASSERT_TRUE(page);
1019
1020 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
1021 ASSERT_TRUE(textpage);
1022
1023 // Should not include the control characters in the output
1024 unsigned short buffer[128];
1025 memset(buffer, 0xbd, sizeof(buffer));
1026 int num_chars = FPDFText_GetText(textpage, 0, 128, buffer);
1027 ASSERT_EQ(kHelloGoodbyeTextSize, num_chars);
1028 EXPECT_TRUE(
1029 check_unsigned_shorts(kHelloGoodbyeText, buffer, kHelloGoodbyeTextSize));
1030
1031 // Attempting to get a chunk of text after the control characters
1032 static const char expected_substring[] = "Goodbye, world!";
1033 // Offset is the length of 'Hello, world!\r\n' + 2 control characters in the
1034 // original stream
1035 static const int offset = 17;
1036 memset(buffer, 0xbd, sizeof(buffer));
1037 num_chars = FPDFText_GetText(textpage, offset, 128, buffer);
1038
1039 ASSERT_GE(num_chars, 0);
1040 EXPECT_EQ(sizeof(expected_substring), static_cast<size_t>(num_chars));
1041 EXPECT_TRUE(check_unsigned_shorts(expected_substring, buffer,
1042 sizeof(expected_substring)));
1043
1044 FPDFText_ClosePage(textpage);
1045 UnloadPage(page);
1046 }
1047
1048 // Testing that hyphen makers (0x0002) are replacing hard hyphens when
1049 // the word contains non-ASCII characters.
TEST_F(FPDFTextEmbedderTest,bug_1029)1050 TEST_F(FPDFTextEmbedderTest, bug_1029) {
1051 ASSERT_TRUE(OpenDocument("bug_1029.pdf"));
1052 FPDF_PAGE page = LoadPage(0);
1053 ASSERT_TRUE(page);
1054
1055 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
1056 ASSERT_TRUE(textpage);
1057
1058 constexpr int page_range_offset = 171;
1059 constexpr int page_range_length = 56;
1060
1061 // This text is:
1062 // 'METADATA table. When the split has committed, it noti' followed
1063 // by a 'soft hyphen' (0x0002) and then 'fi'.
1064 //
1065 // The original text has a fi ligature, but that is broken up into
1066 // two characters when the PDF is processed.
1067 constexpr unsigned int expected[] = {
1068 0x004d, 0x0045, 0x0054, 0x0041, 0x0044, 0x0041, 0x0054, 0x0041,
1069 0x0020, 0x0074, 0x0061, 0x0062, 0x006c, 0x0065, 0x002e, 0x0020,
1070 0x0057, 0x0068, 0x0065, 0x006e, 0x0020, 0x0074, 0x0068, 0x0065,
1071 0x0020, 0x0073, 0x0070, 0x006c, 0x0069, 0x0074, 0x0020, 0x0068,
1072 0x0061, 0x0073, 0x0020, 0x0063, 0x006f, 0x006d, 0x006d, 0x0069,
1073 0x0074, 0x0074, 0x0065, 0x0064, 0x002c, 0x0020, 0x0069, 0x0074,
1074 0x0020, 0x006e, 0x006f, 0x0074, 0x0069, 0x0002, 0x0066, 0x0069};
1075 static_assert(page_range_length == FX_ArraySize(expected),
1076 "Expected should be the same size as the range being "
1077 "extracted from page.");
1078 EXPECT_LT(page_range_offset + page_range_length,
1079 FPDFText_CountChars(textpage));
1080
1081 for (int i = 0; i < page_range_length; ++i) {
1082 EXPECT_EQ(expected[i],
1083 FPDFText_GetUnicode(textpage, page_range_offset + i));
1084 }
1085
1086 FPDFText_ClosePage(textpage);
1087 UnloadPage(page);
1088 }
1089
TEST_F(FPDFTextEmbedderTest,CountRects)1090 TEST_F(FPDFTextEmbedderTest, CountRects) {
1091 ASSERT_TRUE(OpenDocument("hello_world.pdf"));
1092 FPDF_PAGE page = LoadPage(0);
1093 ASSERT_TRUE(page);
1094
1095 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
1096 ASSERT_TRUE(textpage);
1097
1098 // Sanity check hello_world.pdf.
1099 // |num_chars| check includes the terminating NUL that is provided.
1100 {
1101 unsigned short buffer[128];
1102 int num_chars = FPDFText_GetText(textpage, 0, 128, buffer);
1103 ASSERT_EQ(kHelloGoodbyeTextSize, num_chars);
1104 EXPECT_TRUE(check_unsigned_shorts(kHelloGoodbyeText, buffer,
1105 kHelloGoodbyeTextSize));
1106 }
1107
1108 // Now test FPDFText_CountRects().
1109 static const int kHelloWorldEnd = strlen("Hello, world!");
1110 static const int kGoodbyeWorldStart = kHelloWorldEnd + 2; // "\r\n"
1111 for (int start = 0; start < kHelloWorldEnd; ++start) {
1112 // Always grab some part of "hello world" and some part of "goodbye world"
1113 // Since -1 means "all".
1114 EXPECT_EQ(2, FPDFText_CountRects(textpage, start, -1));
1115
1116 // No characters always means 0 rects.
1117 EXPECT_EQ(0, FPDFText_CountRects(textpage, start, 0));
1118
1119 // 1 character stays within "hello world"
1120 EXPECT_EQ(1, FPDFText_CountRects(textpage, start, 1));
1121
1122 // When |start| is 0, Having |kGoodbyeWorldStart| char count does not reach
1123 // "goodbye world".
1124 int expected_value = start ? 2 : 1;
1125 EXPECT_EQ(expected_value,
1126 FPDFText_CountRects(textpage, start, kGoodbyeWorldStart));
1127
1128 // Extremely large character count will always return 2 rects because
1129 // |start| starts inside "hello world".
1130 EXPECT_EQ(2, FPDFText_CountRects(textpage, start, 500));
1131 }
1132
1133 // Now test negative counts.
1134 for (int start = 0; start < kHelloWorldEnd; ++start) {
1135 EXPECT_EQ(2, FPDFText_CountRects(textpage, start, -100));
1136 EXPECT_EQ(2, FPDFText_CountRects(textpage, start, -2));
1137 }
1138
1139 // Now test larger start values.
1140 const int kExpectedLength = strlen(kHelloGoodbyeText);
1141 for (int start = kGoodbyeWorldStart + 1; start < kExpectedLength; ++start) {
1142 EXPECT_EQ(1, FPDFText_CountRects(textpage, start, -1));
1143 EXPECT_EQ(0, FPDFText_CountRects(textpage, start, 0));
1144 EXPECT_EQ(1, FPDFText_CountRects(textpage, start, 1));
1145 EXPECT_EQ(1, FPDFText_CountRects(textpage, start, 2));
1146 EXPECT_EQ(1, FPDFText_CountRects(textpage, start, 500));
1147 }
1148
1149 // Now test start values that starts beyond the end of the text.
1150 for (int start = kExpectedLength; start < 100; ++start) {
1151 EXPECT_EQ(0, FPDFText_CountRects(textpage, start, -1));
1152 EXPECT_EQ(0, FPDFText_CountRects(textpage, start, 0));
1153 EXPECT_EQ(0, FPDFText_CountRects(textpage, start, 1));
1154 EXPECT_EQ(0, FPDFText_CountRects(textpage, start, 2));
1155 EXPECT_EQ(0, FPDFText_CountRects(textpage, start, 500));
1156 }
1157
1158 FPDFText_ClosePage(textpage);
1159 UnloadPage(page);
1160 }
1161
TEST_F(FPDFTextEmbedderTest,GetText)1162 TEST_F(FPDFTextEmbedderTest, GetText) {
1163 ASSERT_TRUE(OpenDocument("hello_world.pdf"));
1164 FPDF_PAGE page = LoadPage(0);
1165 ASSERT_TRUE(page);
1166
1167 FPDF_TEXTPAGE text_page = FPDFText_LoadPage(page);
1168 ASSERT_TRUE(text_page);
1169
1170 EXPECT_EQ(2, FPDFPage_CountObjects(page));
1171 FPDF_PAGEOBJECT text_object = FPDFPage_GetObject(page, 0);
1172 ASSERT_TRUE(text_object);
1173
1174 // Positive testing.
1175 constexpr char kHelloText[] = "Hello, world!";
1176 // Return value includes the terminating NUL that is provided.
1177 constexpr unsigned long kHelloUTF16Size = FX_ArraySize(kHelloText) * 2;
1178 constexpr wchar_t kHelloWideText[] = L"Hello, world!";
1179 unsigned long size = FPDFTextObj_GetText(text_object, text_page, nullptr, 0);
1180 ASSERT_EQ(kHelloUTF16Size, size);
1181
1182 std::vector<unsigned short> buffer(size);
1183 ASSERT_EQ(size,
1184 FPDFTextObj_GetText(text_object, text_page, buffer.data(), size));
1185 ASSERT_EQ(kHelloWideText, GetPlatformWString(buffer.data()));
1186
1187 // Negative testing.
1188 ASSERT_EQ(0U, FPDFTextObj_GetText(nullptr, text_page, nullptr, 0));
1189 ASSERT_EQ(0U, FPDFTextObj_GetText(text_object, nullptr, nullptr, 0));
1190 ASSERT_EQ(0U, FPDFTextObj_GetText(nullptr, nullptr, nullptr, 0));
1191
1192 // Buffer is too small, ensure it's not modified.
1193 buffer.resize(2);
1194 buffer[0] = 'x';
1195 buffer[1] = '\0';
1196 size =
1197 FPDFTextObj_GetText(text_object, text_page, buffer.data(), buffer.size());
1198 ASSERT_EQ(kHelloUTF16Size, size);
1199 ASSERT_EQ('x', buffer[0]);
1200 ASSERT_EQ('\0', buffer[1]);
1201
1202 FPDFText_ClosePage(text_page);
1203 UnloadPage(page);
1204 }
1205
TEST_F(FPDFTextEmbedderTest,CroppedText)1206 TEST_F(FPDFTextEmbedderTest, CroppedText) {
1207 static constexpr int kPageCount = 4;
1208 static constexpr FS_RECTF kBoxes[kPageCount] = {
1209 {50.0f, 150.0f, 150.0f, 50.0f},
1210 {50.0f, 150.0f, 150.0f, 50.0f},
1211 {60.0f, 150.0f, 150.0f, 60.0f},
1212 {60.0f, 150.0f, 150.0f, 60.0f},
1213 };
1214 static constexpr const char* kExpectedText[kPageCount] = {
1215 " world!\r\ndbye, world!",
1216 " world!\r\ndbye, world!",
1217 "bye, world!",
1218 "bye, world!",
1219 };
1220
1221 ASSERT_TRUE(OpenDocument("cropped_text.pdf"));
1222 ASSERT_EQ(kPageCount, FPDF_GetPageCount(document()));
1223
1224 for (int i = 0; i < kPageCount; ++i) {
1225 FPDF_PAGE page = LoadPage(i);
1226 ASSERT_TRUE(page);
1227
1228 FS_RECTF box;
1229 EXPECT_TRUE(FPDF_GetPageBoundingBox(page, &box));
1230 EXPECT_EQ(kBoxes[i].left, box.left);
1231 EXPECT_EQ(kBoxes[i].top, box.top);
1232 EXPECT_EQ(kBoxes[i].right, box.right);
1233 EXPECT_EQ(kBoxes[i].bottom, box.bottom);
1234
1235 {
1236 ScopedFPDFTextPage textpage(FPDFText_LoadPage(page));
1237 ASSERT_TRUE(textpage);
1238
1239 unsigned short buffer[128];
1240 memset(buffer, 0xbd, sizeof(buffer));
1241 int num_chars = FPDFText_GetText(textpage.get(), 0, 128, buffer);
1242 ASSERT_EQ(kHelloGoodbyeTextSize, num_chars);
1243 EXPECT_TRUE(check_unsigned_shorts(kHelloGoodbyeText, buffer,
1244 kHelloGoodbyeTextSize));
1245
1246 int expected_char_count = strlen(kExpectedText[i]);
1247 ASSERT_EQ(expected_char_count,
1248 FPDFText_GetBoundedText(textpage.get(), box.left, box.top,
1249 box.right, box.bottom, nullptr, 0));
1250
1251 memset(buffer, 0xbd, sizeof(buffer));
1252 ASSERT_EQ(expected_char_count + 1,
1253 FPDFText_GetBoundedText(textpage.get(), box.left, box.top,
1254 box.right, box.bottom, buffer, 128));
1255 EXPECT_TRUE(
1256 check_unsigned_shorts(kExpectedText[i], buffer, expected_char_count));
1257 }
1258
1259 UnloadPage(page);
1260 }
1261 }
1262
TEST_F(FPDFTextEmbedderTest,Bug_1139)1263 TEST_F(FPDFTextEmbedderTest, Bug_1139) {
1264 ASSERT_TRUE(OpenDocument("bug_1139.pdf"));
1265 FPDF_PAGE page = LoadPage(0);
1266 ASSERT_TRUE(page);
1267
1268 FPDF_TEXTPAGE text_page = FPDFText_LoadPage(page);
1269 ASSERT_TRUE(text_page);
1270
1271 // -1 for CountChars not including the \0, but +1 for the extra control
1272 // character.
1273 EXPECT_EQ(kHelloGoodbyeTextSize, FPDFText_CountChars(text_page));
1274
1275 // There is an extra control character at the beginning of the string, but it
1276 // should not appear in the output nor prevent extracting the text.
1277 unsigned short buffer[128];
1278 int num_chars = FPDFText_GetText(text_page, 0, 128, buffer);
1279 ASSERT_EQ(kHelloGoodbyeTextSize, num_chars);
1280 EXPECT_TRUE(
1281 check_unsigned_shorts(kHelloGoodbyeText, buffer, kHelloGoodbyeTextSize));
1282 FPDFText_ClosePage(text_page);
1283 UnloadPage(page);
1284 }
1285
TEST_F(FPDFTextEmbedderTest,Bug_642)1286 TEST_F(FPDFTextEmbedderTest, Bug_642) {
1287 ASSERT_TRUE(OpenDocument("bug_642.pdf"));
1288 FPDF_PAGE page = LoadPage(0);
1289 ASSERT_TRUE(page);
1290 {
1291 ScopedFPDFTextPage text_page(FPDFText_LoadPage(page));
1292 ASSERT_TRUE(text_page);
1293
1294 constexpr char kText[] = "ABCD";
1295 constexpr size_t kTextSize = FX_ArraySize(kText);
1296 // -1 for CountChars not including the \0
1297 EXPECT_EQ(static_cast<int>(kTextSize) - 1,
1298 FPDFText_CountChars(text_page.get()));
1299
1300 unsigned short buffer[kTextSize];
1301 int num_chars =
1302 FPDFText_GetText(text_page.get(), 0, FX_ArraySize(buffer) - 1, buffer);
1303 ASSERT_EQ(static_cast<int>(kTextSize), num_chars);
1304 EXPECT_TRUE(check_unsigned_shorts(kText, buffer, kTextSize));
1305 }
1306
1307 UnloadPage(page);
1308 }
1309
TEST_F(FPDFTextEmbedderTest,GetCharAngle)1310 TEST_F(FPDFTextEmbedderTest, GetCharAngle) {
1311 ASSERT_TRUE(OpenDocument("rotated_text.pdf"));
1312 FPDF_PAGE page = LoadPage(0);
1313 ASSERT_TRUE(page);
1314
1315 FPDF_TEXTPAGE text_page = FPDFText_LoadPage(page);
1316 ASSERT_TRUE(text_page);
1317
1318 static constexpr int kSubstringsSize[] = {FX_ArraySize("Hello,"),
1319 FX_ArraySize(" world!\r\n"),
1320 FX_ArraySize("Goodbye,")};
1321
1322 // -1 for CountChars not including the \0, but +1 for the extra control
1323 // character.
1324 EXPECT_EQ(kHelloGoodbyeTextSize, FPDFText_CountChars(text_page));
1325
1326 EXPECT_FLOAT_EQ(-1.0f, FPDFText_GetCharAngle(nullptr, 0));
1327 EXPECT_FLOAT_EQ(-1.0f, FPDFText_GetCharAngle(text_page, -1));
1328 EXPECT_FLOAT_EQ(-1.0f,
1329 FPDFText_GetCharAngle(text_page, kHelloGoodbyeTextSize + 1));
1330
1331 // Test GetCharAngle for every quadrant
1332 EXPECT_NEAR(FX_PI / 4.0, FPDFText_GetCharAngle(text_page, 0), 0.001);
1333 EXPECT_NEAR(3 * FX_PI / 4.0,
1334 FPDFText_GetCharAngle(text_page, kSubstringsSize[0]), 0.001);
1335 EXPECT_NEAR(
1336 5 * FX_PI / 4.0,
1337 FPDFText_GetCharAngle(text_page, kSubstringsSize[0] + kSubstringsSize[1]),
1338 0.001);
1339 EXPECT_NEAR(
1340 7 * FX_PI / 4.0,
1341 FPDFText_GetCharAngle(text_page, kSubstringsSize[0] + kSubstringsSize[1] +
1342 kSubstringsSize[2]),
1343 0.001);
1344
1345 FPDFText_ClosePage(text_page);
1346 UnloadPage(page);
1347 }
1348
TEST_F(FPDFTextEmbedderTest,GetFontWeight)1349 TEST_F(FPDFTextEmbedderTest, GetFontWeight) {
1350 ASSERT_TRUE(OpenDocument("font_weight.pdf"));
1351 FPDF_PAGE page = LoadPage(0);
1352 ASSERT_TRUE(page);
1353
1354 FPDF_TEXTPAGE text_page = FPDFText_LoadPage(page);
1355 ASSERT_TRUE(text_page);
1356
1357 EXPECT_EQ(2, FPDFText_CountChars(text_page));
1358
1359 EXPECT_EQ(-1, FPDFText_GetFontWeight(nullptr, 0));
1360 EXPECT_EQ(-1, FPDFText_GetFontWeight(text_page, -1));
1361 EXPECT_EQ(-1, FPDFText_GetFontWeight(text_page, 314));
1362
1363 // The font used for this text only specifies /StemV (80); the weight value
1364 // that is returned should be calculated from that (80*5 == 400).
1365 EXPECT_EQ(400, FPDFText_GetFontWeight(text_page, 0));
1366
1367 // Using a /StemV value of 82, the estimate comes out to 410, even though
1368 // /FontWeight is 400.
1369 // TODO(crbug.com/pdfium/1420): Fix this the return value here.
1370 EXPECT_EQ(410, FPDFText_GetFontWeight(text_page, 1));
1371
1372 FPDFText_ClosePage(text_page);
1373 UnloadPage(page);
1374 }
1375
TEST_F(FPDFTextEmbedderTest,GetTextRenderMode)1376 TEST_F(FPDFTextEmbedderTest, GetTextRenderMode) {
1377 EXPECT_TRUE(OpenDocument("text_render_mode.pdf"));
1378 FPDF_PAGE page = LoadPage(0);
1379 ASSERT_TRUE(page);
1380
1381 FPDF_TEXTPAGE text_page = FPDFText_LoadPage(page);
1382 ASSERT_TRUE(text_page);
1383
1384 ASSERT_EQ(12, FPDFText_CountChars(text_page));
1385
1386 ASSERT_EQ(FPDF_TEXTRENDERMODE_UNKNOWN,
1387 FPDFText_GetTextRenderMode(nullptr, 0));
1388 ASSERT_EQ(FPDF_TEXTRENDERMODE_UNKNOWN,
1389 FPDFText_GetTextRenderMode(text_page, -1));
1390 ASSERT_EQ(FPDF_TEXTRENDERMODE_UNKNOWN,
1391 FPDFText_GetTextRenderMode(text_page, 314));
1392
1393 ASSERT_EQ(FPDF_TEXTRENDERMODE_FILL, FPDFText_GetTextRenderMode(text_page, 0));
1394
1395 ASSERT_EQ(FPDF_TEXTRENDERMODE_STROKE,
1396 FPDFText_GetTextRenderMode(text_page, 7));
1397
1398 FPDFText_ClosePage(text_page);
1399 UnloadPage(page);
1400 }
1401
TEST_F(FPDFTextEmbedderTest,GetFillColor)1402 TEST_F(FPDFTextEmbedderTest, GetFillColor) {
1403 ASSERT_TRUE(OpenDocument("text_color.pdf"));
1404 FPDF_PAGE page = LoadPage(0);
1405 ASSERT_TRUE(page);
1406
1407 FPDF_TEXTPAGE text_page = FPDFText_LoadPage(page);
1408 ASSERT_TRUE(text_page);
1409
1410 ASSERT_EQ(1, FPDFText_CountChars(text_page));
1411
1412 ASSERT_FALSE(
1413 FPDFText_GetFillColor(nullptr, 0, nullptr, nullptr, nullptr, nullptr));
1414 ASSERT_FALSE(
1415 FPDFText_GetFillColor(text_page, -1, nullptr, nullptr, nullptr, nullptr));
1416 ASSERT_FALSE(FPDFText_GetFillColor(text_page, 314, nullptr, nullptr, nullptr,
1417 nullptr));
1418 ASSERT_FALSE(
1419 FPDFText_GetFillColor(text_page, 0, nullptr, nullptr, nullptr, nullptr));
1420
1421 unsigned int r;
1422 unsigned int g;
1423 unsigned int b;
1424 unsigned int a;
1425 ASSERT_TRUE(FPDFText_GetFillColor(text_page, 0, &r, &g, &b, &a));
1426 ASSERT_EQ(0xffu, r);
1427 ASSERT_EQ(0u, g);
1428 ASSERT_EQ(0u, b);
1429 ASSERT_EQ(0xffu, a);
1430
1431 FPDFText_ClosePage(text_page);
1432 UnloadPage(page);
1433 }
1434
TEST_F(FPDFTextEmbedderTest,GetStrokeColor)1435 TEST_F(FPDFTextEmbedderTest, GetStrokeColor) {
1436 ASSERT_TRUE(OpenDocument("text_color.pdf"));
1437 FPDF_PAGE page = LoadPage(0);
1438 ASSERT_TRUE(page);
1439
1440 FPDF_TEXTPAGE text_page = FPDFText_LoadPage(page);
1441 ASSERT_TRUE(text_page);
1442
1443 ASSERT_EQ(1, FPDFText_CountChars(text_page));
1444
1445 ASSERT_FALSE(
1446 FPDFText_GetStrokeColor(nullptr, 0, nullptr, nullptr, nullptr, nullptr));
1447 ASSERT_FALSE(FPDFText_GetStrokeColor(text_page, -1, nullptr, nullptr, nullptr,
1448 nullptr));
1449 ASSERT_FALSE(FPDFText_GetStrokeColor(text_page, 314, nullptr, nullptr,
1450 nullptr, nullptr));
1451 ASSERT_FALSE(FPDFText_GetStrokeColor(text_page, 0, nullptr, nullptr, nullptr,
1452 nullptr));
1453
1454 unsigned int r;
1455 unsigned int g;
1456 unsigned int b;
1457 unsigned int a;
1458 ASSERT_TRUE(FPDFText_GetStrokeColor(text_page, 0, &r, &g, &b, &a));
1459 ASSERT_EQ(0u, r);
1460 ASSERT_EQ(0xffu, g);
1461 ASSERT_EQ(0u, b);
1462 ASSERT_EQ(0xffu, a);
1463
1464 FPDFText_ClosePage(text_page);
1465 UnloadPage(page);
1466 }
1467
TEST_F(FPDFTextEmbedderTest,GetMatrix)1468 TEST_F(FPDFTextEmbedderTest, GetMatrix) {
1469 constexpr char kExpectedText[] = "A1\r\nA2\r\nA3";
1470 constexpr size_t kExpectedTextSize = FX_ArraySize(kExpectedText);
1471 constexpr FS_MATRIX kExpectedMatrices[] = {
1472 {12.0f, 0.0f, 0.0f, 10.0f, 66.0f, 90.0f},
1473 {12.0f, 0.0f, 0.0f, 10.0f, 66.0f, 90.0f},
1474 {1.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f},
1475 {1.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f},
1476 {12.0f, 0.0f, 0.0f, 10.0f, 38.0f, 60.0f},
1477 {12.0f, 0.0f, 0.0f, 10.0f, 38.0f, 60.0f},
1478 {1.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f},
1479 {1.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f},
1480 {1.0f, 0.0f, 0.0f, 0.833333, 60.0f, 130.0f},
1481 {1.0f, 0.0f, 0.0f, 0.833333, 60.0f, 130.0f},
1482 };
1483 constexpr size_t kExpectedCount = FX_ArraySize(kExpectedMatrices);
1484 static_assert(kExpectedCount + 1 == kExpectedTextSize,
1485 "Bad expected matrix size");
1486
1487 // For a size 12 letter 'A'.
1488 constexpr double kExpectedCharWidth = 8.436;
1489 constexpr double kExpectedCharHeight = 6.77;
1490
1491 ASSERT_TRUE(OpenDocument("font_matrix.pdf"));
1492 FPDF_PAGE page = LoadPage(0);
1493 ASSERT_TRUE(page);
1494
1495 {
1496 ScopedFPDFTextPage text_page(FPDFText_LoadPage(page));
1497 ASSERT_TRUE(text_page);
1498 ASSERT_EQ(static_cast<int>(kExpectedCount),
1499 FPDFText_CountChars(text_page.get()));
1500
1501 {
1502 // Check the characters.
1503 unsigned short buffer[kExpectedTextSize];
1504 ASSERT_EQ(static_cast<int>(kExpectedTextSize),
1505 FPDFText_GetText(text_page.get(), 0, kExpectedCount, buffer));
1506 EXPECT_TRUE(
1507 check_unsigned_shorts(kExpectedText, buffer, kExpectedTextSize));
1508 }
1509
1510 {
1511 // Check the character box size.
1512 double left;
1513 double right;
1514 double bottom;
1515 double top;
1516 ASSERT_TRUE(FPDFText_GetCharBox(text_page.get(), 0, &left, &right,
1517 &bottom, &top));
1518 EXPECT_NEAR(kExpectedCharWidth, right - left, 0.001);
1519 EXPECT_NEAR(kExpectedCharHeight, top - bottom, 0.001);
1520 ASSERT_TRUE(FPDFText_GetCharBox(text_page.get(), 4, &left, &right,
1521 &bottom, &top));
1522 EXPECT_NEAR(kExpectedCharWidth, right - left, 0.001);
1523 EXPECT_NEAR(kExpectedCharHeight, top - bottom, 0.001);
1524 ASSERT_TRUE(FPDFText_GetCharBox(text_page.get(), 8, &left, &right,
1525 &bottom, &top));
1526 EXPECT_NEAR(kExpectedCharWidth, right - left, 0.001);
1527 EXPECT_NEAR(kExpectedCharHeight, top - bottom, 0.001);
1528 }
1529
1530 // Check the character matrix.
1531 FS_MATRIX matrix;
1532 for (size_t i = 0; i < kExpectedCount; ++i) {
1533 ASSERT_TRUE(FPDFText_GetMatrix(text_page.get(), i, &matrix)) << i;
1534 EXPECT_FLOAT_EQ(kExpectedMatrices[i].a, matrix.a) << i;
1535 EXPECT_FLOAT_EQ(kExpectedMatrices[i].b, matrix.b) << i;
1536 EXPECT_FLOAT_EQ(kExpectedMatrices[i].c, matrix.c) << i;
1537 EXPECT_FLOAT_EQ(kExpectedMatrices[i].d, matrix.d) << i;
1538 EXPECT_FLOAT_EQ(kExpectedMatrices[i].e, matrix.e) << i;
1539 EXPECT_FLOAT_EQ(kExpectedMatrices[i].f, matrix.f) << i;
1540 }
1541
1542 // Check bad parameters.
1543 EXPECT_FALSE(FPDFText_GetMatrix(nullptr, 0, &matrix));
1544 EXPECT_FALSE(FPDFText_GetMatrix(text_page.get(), 10, &matrix));
1545 EXPECT_FALSE(FPDFText_GetMatrix(text_page.get(), -1, &matrix));
1546 EXPECT_FALSE(FPDFText_GetMatrix(text_page.get(), 0, nullptr));
1547 }
1548
1549 UnloadPage(page);
1550 }
1551