1 // Copyright 2015 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <algorithm>
6 #include <memory>
7 #include <utility>
8 #include <vector>
9 
10 #include "build/build_config.h"
11 #include "core/fxcrt/fx_memory.h"
12 #include "core/fxge/fx_font.h"
13 #include "public/cpp/fpdf_scopers.h"
14 #include "public/fpdf_text.h"
15 #include "public/fpdf_transformpage.h"
16 #include "public/fpdfview.h"
17 #include "testing/embedder_test.h"
18 #include "testing/fx_string_testhelpers.h"
19 #include "testing/gtest/include/gtest/gtest.h"
20 
21 namespace {
22 
23 constexpr char kHelloGoodbyeText[] = "Hello, world!\r\nGoodbye, world!";
24 constexpr int kHelloGoodbyeTextSize = FX_ArraySize(kHelloGoodbyeText);
25 
check_unsigned_shorts(const char * expected,const unsigned short * actual,size_t length)26 bool check_unsigned_shorts(const char* expected,
27                            const unsigned short* actual,
28                            size_t length) {
29   if (length > strlen(expected) + 1)
30     return false;
31 
32   for (size_t i = 0; i < length; ++i) {
33     if (actual[i] != static_cast<unsigned short>(expected[i]))
34       return false;
35   }
36   return true;
37 }
38 
39 }  // namespace
40 
41 class FPDFTextEmbedderTest : public EmbedderTest {};
42 
TEST_F(FPDFTextEmbedderTest,Text)43 TEST_F(FPDFTextEmbedderTest, Text) {
44   ASSERT_TRUE(OpenDocument("hello_world.pdf"));
45   FPDF_PAGE page = LoadPage(0);
46   ASSERT_TRUE(page);
47 
48   FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
49   ASSERT_TRUE(textpage);
50 
51   unsigned short buffer[128];
52   memset(buffer, 0xbd, sizeof(buffer));
53 
54   // Check that edge cases are handled gracefully
55   EXPECT_EQ(0, FPDFText_GetText(textpage, 0, 128, nullptr));
56   EXPECT_EQ(0, FPDFText_GetText(textpage, -1, 128, buffer));
57   EXPECT_EQ(0, FPDFText_GetText(textpage, 0, -1, buffer));
58   EXPECT_EQ(1, FPDFText_GetText(textpage, 0, 0, buffer));
59   EXPECT_EQ(0, buffer[0]);
60 
61   // Keep going and check the next case.
62   memset(buffer, 0xbd, sizeof(buffer));
63   EXPECT_EQ(2, FPDFText_GetText(textpage, 0, 1, buffer));
64   EXPECT_EQ(kHelloGoodbyeText[0], buffer[0]);
65   EXPECT_EQ(0, buffer[1]);
66 
67   // Check includes the terminating NUL that is provided.
68   int num_chars = FPDFText_GetText(textpage, 0, 128, buffer);
69   ASSERT_EQ(kHelloGoodbyeTextSize, num_chars);
70   EXPECT_TRUE(
71       check_unsigned_shorts(kHelloGoodbyeText, buffer, kHelloGoodbyeTextSize));
72 
73   // Count does not include the terminating NUL in the string literal.
74   EXPECT_EQ(kHelloGoodbyeTextSize - 1, FPDFText_CountChars(textpage));
75   for (size_t i = 0; i < kHelloGoodbyeTextSize - 1; ++i) {
76     EXPECT_EQ(static_cast<unsigned int>(kHelloGoodbyeText[i]),
77               FPDFText_GetUnicode(textpage, i))
78         << " at " << i;
79   }
80 
81   // Extracting using a buffer that will be completely filled. Small buffer is
82   // 12 elements long, since it will need 2 locations per displayed character in
83   // the expected string, plus 2 more for the terminating character.
84   static const char kSmallExpected[] = "Hello";
85   unsigned short small_buffer[12];
86   memset(buffer, 0xbd, sizeof(buffer));
87   EXPECT_EQ(6, FPDFText_GetText(textpage, 0, 5, small_buffer));
88   EXPECT_TRUE(check_unsigned_shorts(kSmallExpected, small_buffer,
89                                     sizeof(kSmallExpected)));
90 
91   EXPECT_EQ(12.0, FPDFText_GetFontSize(textpage, 0));
92   EXPECT_EQ(16.0, FPDFText_GetFontSize(textpage, 15));
93 
94   double left = 1.0;
95   double right = 2.0;
96   double bottom = 3.0;
97   double top = 4.0;
98   EXPECT_FALSE(FPDFText_GetCharBox(nullptr, 4, &left, &right, &bottom, &top));
99   EXPECT_DOUBLE_EQ(1.0, left);
100   EXPECT_DOUBLE_EQ(2.0, right);
101   EXPECT_DOUBLE_EQ(3.0, bottom);
102   EXPECT_DOUBLE_EQ(4.0, top);
103   EXPECT_FALSE(FPDFText_GetCharBox(textpage, -1, &left, &right, &bottom, &top));
104   EXPECT_DOUBLE_EQ(1.0, left);
105   EXPECT_DOUBLE_EQ(2.0, right);
106   EXPECT_DOUBLE_EQ(3.0, bottom);
107   EXPECT_DOUBLE_EQ(4.0, top);
108   EXPECT_FALSE(FPDFText_GetCharBox(textpage, 55, &left, &right, &bottom, &top));
109   EXPECT_DOUBLE_EQ(1.0, left);
110   EXPECT_DOUBLE_EQ(2.0, right);
111   EXPECT_DOUBLE_EQ(3.0, bottom);
112   EXPECT_DOUBLE_EQ(4.0, top);
113   EXPECT_FALSE(
114       FPDFText_GetCharBox(textpage, 4, nullptr, &right, &bottom, &top));
115   EXPECT_FALSE(FPDFText_GetCharBox(textpage, 4, &left, nullptr, &bottom, &top));
116   EXPECT_FALSE(FPDFText_GetCharBox(textpage, 4, &left, &right, nullptr, &top));
117   EXPECT_FALSE(
118       FPDFText_GetCharBox(textpage, 4, &left, &right, &bottom, nullptr));
119   EXPECT_FALSE(
120       FPDFText_GetCharBox(textpage, 4, nullptr, nullptr, nullptr, nullptr));
121 
122   EXPECT_TRUE(FPDFText_GetCharBox(textpage, 4, &left, &right, &bottom, &top));
123   EXPECT_NEAR(41.071, left, 0.001);
124   EXPECT_NEAR(46.243, right, 0.001);
125   EXPECT_NEAR(49.844, bottom, 0.001);
126   EXPECT_NEAR(55.520, top, 0.001);
127 
128   FS_RECTF rect = {4.0f, 1.0f, 3.0f, 2.0f};
129   EXPECT_FALSE(FPDFText_GetLooseCharBox(nullptr, 4, &rect));
130   EXPECT_FLOAT_EQ(4.0f, rect.left);
131   EXPECT_FLOAT_EQ(3.0f, rect.right);
132   EXPECT_FLOAT_EQ(2.0f, rect.bottom);
133   EXPECT_FLOAT_EQ(1.0f, rect.top);
134   EXPECT_FALSE(FPDFText_GetLooseCharBox(textpage, -1, &rect));
135   EXPECT_FLOAT_EQ(4.0f, rect.left);
136   EXPECT_FLOAT_EQ(3.0f, rect.right);
137   EXPECT_FLOAT_EQ(2.0f, rect.bottom);
138   EXPECT_FLOAT_EQ(1.0f, rect.top);
139   EXPECT_FALSE(FPDFText_GetLooseCharBox(textpage, 55, &rect));
140   EXPECT_FLOAT_EQ(4.0f, rect.left);
141   EXPECT_FLOAT_EQ(3.0f, rect.right);
142   EXPECT_FLOAT_EQ(2.0f, rect.bottom);
143   EXPECT_FLOAT_EQ(1.0f, rect.top);
144   EXPECT_FALSE(FPDFText_GetLooseCharBox(textpage, 4, nullptr));
145 
146   EXPECT_TRUE(FPDFText_GetLooseCharBox(textpage, 4, &rect));
147   EXPECT_FLOAT_EQ(40.664001f, rect.left);
148   EXPECT_FLOAT_EQ(46.664001f, rect.right);
149   EXPECT_FLOAT_EQ(47.667271f, rect.bottom);
150   EXPECT_FLOAT_EQ(59.667271f, rect.top);
151 
152   double x = 0.0;
153   double y = 0.0;
154   EXPECT_TRUE(FPDFText_GetCharOrigin(textpage, 4, &x, &y));
155   EXPECT_NEAR(40.664, x, 0.001);
156   EXPECT_NEAR(50.000, y, 0.001);
157 
158   EXPECT_EQ(4, FPDFText_GetCharIndexAtPos(textpage, 42.0, 50.0, 1.0, 1.0));
159   EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(textpage, 0.0, 0.0, 1.0, 1.0));
160   EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(textpage, 199.0, 199.0, 1.0, 1.0));
161 
162   // Test out of range indicies.
163   EXPECT_EQ(-1,
164             FPDFText_GetCharIndexAtPos(textpage, 42.0, 10000000.0, 1.0, 1.0));
165   EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(textpage, -1.0, 50.0, 1.0, 1.0));
166 
167   // Count does not include the terminating NUL in the string literal.
168   EXPECT_EQ(2, FPDFText_CountRects(textpage, 0, kHelloGoodbyeTextSize - 1));
169 
170   left = 0.0;
171   right = 0.0;
172   bottom = 0.0;
173   top = 0.0;
174   EXPECT_TRUE(FPDFText_GetRect(textpage, 1, &left, &top, &right, &bottom));
175   EXPECT_NEAR(20.847, left, 0.001);
176   EXPECT_NEAR(135.167, right, 0.001);
177   EXPECT_NEAR(96.655, bottom, 0.001);
178   EXPECT_NEAR(116.000, top, 0.001);
179 
180   // Test out of range indicies set outputs to (0.0, 0.0, 0.0, 0.0).
181   left = -1.0;
182   right = -1.0;
183   bottom = -1.0;
184   top = -1.0;
185   EXPECT_FALSE(FPDFText_GetRect(textpage, -1, &left, &top, &right, &bottom));
186   EXPECT_EQ(0.0, left);
187   EXPECT_EQ(0.0, right);
188   EXPECT_EQ(0.0, bottom);
189   EXPECT_EQ(0.0, top);
190 
191   left = -2.0;
192   right = -2.0;
193   bottom = -2.0;
194   top = -2.0;
195   EXPECT_FALSE(FPDFText_GetRect(textpage, 2, &left, &top, &right, &bottom));
196   EXPECT_EQ(0.0, left);
197   EXPECT_EQ(0.0, right);
198   EXPECT_EQ(0.0, bottom);
199   EXPECT_EQ(0.0, top);
200 
201   EXPECT_EQ(
202       9, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0, nullptr, 0));
203 
204   // Extract starting at character 4 as above.
205   memset(buffer, 0xbd, sizeof(buffer));
206   EXPECT_EQ(
207       1, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0, buffer, 1));
208   EXPECT_TRUE(check_unsigned_shorts(kHelloGoodbyeText + 4, buffer, 1));
209   EXPECT_EQ(0xbdbd, buffer[1]);
210 
211   memset(buffer, 0xbd, sizeof(buffer));
212   EXPECT_EQ(
213       9, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0, buffer, 9));
214   EXPECT_TRUE(check_unsigned_shorts(kHelloGoodbyeText + 4, buffer, 9));
215   EXPECT_EQ(0xbdbd, buffer[9]);
216 
217   memset(buffer, 0xbd, sizeof(buffer));
218   EXPECT_EQ(10, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0,
219                                         buffer, 128));
220   EXPECT_TRUE(check_unsigned_shorts(kHelloGoodbyeText + 4, buffer, 9));
221   EXPECT_EQ(0u, buffer[9]);
222   EXPECT_EQ(0xbdbd, buffer[10]);
223 
224   FPDFText_ClosePage(textpage);
225   UnloadPage(page);
226 }
227 
TEST_F(FPDFTextEmbedderTest,TextVertical)228 TEST_F(FPDFTextEmbedderTest, TextVertical) {
229   ASSERT_TRUE(OpenDocument("vertical_text.pdf"));
230   FPDF_PAGE page = LoadPage(0);
231   ASSERT_TRUE(page);
232 
233   FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
234   ASSERT_TRUE(textpage);
235 
236   EXPECT_EQ(12.0, FPDFText_GetFontSize(textpage, 0));
237 
238   double x = 0.0;
239   double y = 0.0;
240   EXPECT_TRUE(FPDFText_GetCharOrigin(textpage, 1, &x, &y));
241   EXPECT_NEAR(6.664, x, 0.001);
242   EXPECT_NEAR(171.508, y, 0.001);
243 
244   EXPECT_TRUE(FPDFText_GetCharOrigin(textpage, 2, &x, &y));
245   EXPECT_NEAR(8.668, x, 0.001);
246   EXPECT_NEAR(160.492, y, 0.001);
247 
248   FS_RECTF rect;
249   EXPECT_TRUE(FPDFText_GetLooseCharBox(textpage, 1, &rect));
250   EXPECT_NEAR(4, rect.left, 0.001);
251   EXPECT_NEAR(16, rect.right, 0.001);
252   EXPECT_NEAR(178.984, rect.bottom, 0.001);
253   EXPECT_NEAR(170.308, rect.top, 0.001);
254 
255   EXPECT_TRUE(FPDFText_GetLooseCharBox(textpage, 2, &rect));
256   EXPECT_NEAR(4, rect.left, 0.001);
257   EXPECT_NEAR(16, rect.right, 0.001);
258   EXPECT_NEAR(170.308, rect.bottom, 0.001);
259   EXPECT_NEAR(159.292, rect.top, 0.001);
260 
261   FPDFText_ClosePage(textpage);
262   UnloadPage(page);
263 }
264 
TEST_F(FPDFTextEmbedderTest,TextSearch)265 TEST_F(FPDFTextEmbedderTest, TextSearch) {
266   ASSERT_TRUE(OpenDocument("hello_world.pdf"));
267   FPDF_PAGE page = LoadPage(0);
268   ASSERT_TRUE(page);
269 
270   FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
271   ASSERT_TRUE(textpage);
272 
273   ScopedFPDFWideString nope = GetFPDFWideString(L"nope");
274   ScopedFPDFWideString world = GetFPDFWideString(L"world");
275   ScopedFPDFWideString world_caps = GetFPDFWideString(L"WORLD");
276   ScopedFPDFWideString world_substr = GetFPDFWideString(L"orld");
277 
278   {
279     // No occurrences of "nope" in test page.
280     ScopedFPDFTextFind search(FPDFText_FindStart(textpage, nope.get(), 0, 0));
281     EXPECT_TRUE(search);
282     EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
283     EXPECT_EQ(0, FPDFText_GetSchCount(search.get()));
284 
285     // Advancing finds nothing.
286     EXPECT_FALSE(FPDFText_FindNext(search.get()));
287     EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
288     EXPECT_EQ(0, FPDFText_GetSchCount(search.get()));
289 
290     // Retreating finds nothing.
291     EXPECT_FALSE(FPDFText_FindPrev(search.get()));
292     EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
293     EXPECT_EQ(0, FPDFText_GetSchCount(search.get()));
294   }
295 
296   {
297     // Two occurrences of "world" in test page.
298     ScopedFPDFTextFind search(FPDFText_FindStart(textpage, world.get(), 0, 2));
299     EXPECT_TRUE(search);
300 
301     // Remains not found until advanced.
302     EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
303     EXPECT_EQ(0, FPDFText_GetSchCount(search.get()));
304 
305     // First occurrence of "world" in this test page.
306     EXPECT_TRUE(FPDFText_FindNext(search.get()));
307     EXPECT_EQ(7, FPDFText_GetSchResultIndex(search.get()));
308     EXPECT_EQ(5, FPDFText_GetSchCount(search.get()));
309 
310     // Last occurrence of "world" in this test page.
311     EXPECT_TRUE(FPDFText_FindNext(search.get()));
312     EXPECT_EQ(24, FPDFText_GetSchResultIndex(search.get()));
313     EXPECT_EQ(5, FPDFText_GetSchCount(search.get()));
314 
315     // Found position unchanged when fails to advance.
316     EXPECT_FALSE(FPDFText_FindNext(search.get()));
317     EXPECT_EQ(24, FPDFText_GetSchResultIndex(search.get()));
318     EXPECT_EQ(5, FPDFText_GetSchCount(search.get()));
319 
320     // Back to first occurrence.
321     EXPECT_TRUE(FPDFText_FindPrev(search.get()));
322     EXPECT_EQ(7, FPDFText_GetSchResultIndex(search.get()));
323     EXPECT_EQ(5, FPDFText_GetSchCount(search.get()));
324 
325     // Found position unchanged when fails to retreat.
326     EXPECT_FALSE(FPDFText_FindPrev(search.get()));
327     EXPECT_EQ(7, FPDFText_GetSchResultIndex(search.get()));
328     EXPECT_EQ(5, FPDFText_GetSchCount(search.get()));
329   }
330 
331   {
332     // Exact search unaffected by case sensitiity and whole word flags.
333     ScopedFPDFTextFind search(FPDFText_FindStart(
334         textpage, world.get(), FPDF_MATCHCASE | FPDF_MATCHWHOLEWORD, 0));
335     EXPECT_TRUE(search);
336     EXPECT_TRUE(FPDFText_FindNext(search.get()));
337     EXPECT_EQ(7, FPDFText_GetSchResultIndex(search.get()));
338     EXPECT_EQ(5, FPDFText_GetSchCount(search.get()));
339   }
340 
341   {
342     // Default is case-insensitive, so matching agaist caps works.
343     ScopedFPDFTextFind search(
344         FPDFText_FindStart(textpage, world_caps.get(), 0, 0));
345     EXPECT_TRUE(search);
346     EXPECT_TRUE(FPDFText_FindNext(search.get()));
347     EXPECT_EQ(7, FPDFText_GetSchResultIndex(search.get()));
348     EXPECT_EQ(5, FPDFText_GetSchCount(search.get()));
349   }
350 
351   {
352     // But can be made case sensitive, in which case this fails.
353     ScopedFPDFTextFind search(
354         FPDFText_FindStart(textpage, world_caps.get(), FPDF_MATCHCASE, 0));
355     EXPECT_FALSE(FPDFText_FindNext(search.get()));
356     EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
357     EXPECT_EQ(0, FPDFText_GetSchCount(search.get()));
358   }
359 
360   {
361     // Default is match anywhere within word, so matching substring works.
362     ScopedFPDFTextFind search(
363         FPDFText_FindStart(textpage, world_substr.get(), 0, 0));
364     EXPECT_TRUE(FPDFText_FindNext(search.get()));
365     EXPECT_EQ(8, FPDFText_GetSchResultIndex(search.get()));
366     EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
367   }
368 
369   {
370     // But can be made to mach word boundaries, in which case this fails.
371     ScopedFPDFTextFind search(FPDFText_FindStart(textpage, world_substr.get(),
372                                                  FPDF_MATCHWHOLEWORD, 0));
373     EXPECT_FALSE(FPDFText_FindNext(search.get()));
374     // TODO(tsepez): investigate strange index/count values in this state.
375   }
376 
377   FPDFText_ClosePage(textpage);
378   UnloadPage(page);
379 }
380 
TEST_F(FPDFTextEmbedderTest,TextSearchConsecutive)381 TEST_F(FPDFTextEmbedderTest, TextSearchConsecutive) {
382   ASSERT_TRUE(OpenDocument("find_text_consecutive.pdf"));
383   FPDF_PAGE page = LoadPage(0);
384   ASSERT_TRUE(page);
385 
386   FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
387   ASSERT_TRUE(textpage);
388 
389   ScopedFPDFWideString aaaa = GetFPDFWideString(L"aaaa");
390 
391   {
392     // Search for "aaaa" yields 2 results in "aaaaaaaaaa".
393     ScopedFPDFTextFind search(FPDFText_FindStart(textpage, aaaa.get(), 0, 0));
394     EXPECT_TRUE(search);
395 
396     // Remains not found until advanced.
397     EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
398     EXPECT_EQ(0, FPDFText_GetSchCount(search.get()));
399 
400     // First occurrence of "aaaa" in this test page.
401     EXPECT_TRUE(FPDFText_FindNext(search.get()));
402     EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
403     EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
404 
405     // Last occurrence of "aaaa" in this test page.
406     EXPECT_TRUE(FPDFText_FindNext(search.get()));
407     EXPECT_EQ(4, FPDFText_GetSchResultIndex(search.get()));
408     EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
409 
410     // Found position unchanged when fails to advance.
411     EXPECT_FALSE(FPDFText_FindNext(search.get()));
412     EXPECT_EQ(4, FPDFText_GetSchResultIndex(search.get()));
413     EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
414 
415     // Back to first occurrence.
416     EXPECT_TRUE(FPDFText_FindPrev(search.get()));
417     EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
418     EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
419 
420     // Found position unchanged when fails to retreat.
421     EXPECT_FALSE(FPDFText_FindPrev(search.get()));
422     EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
423     EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
424   }
425 
426   {
427     // Search for "aaaa" yields 7 results in "aaaaaaaaaa", when searching with
428     // FPDF_CONSECUTIVE.
429     ScopedFPDFTextFind search(
430         FPDFText_FindStart(textpage, aaaa.get(), FPDF_CONSECUTIVE, 0));
431     EXPECT_TRUE(search);
432 
433     // Remains not found until advanced.
434     EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
435     EXPECT_EQ(0, FPDFText_GetSchCount(search.get()));
436 
437     // Find consecutive occurrences of "aaaa" in this test page:
438     for (int i = 0; i < 7; ++i) {
439       EXPECT_TRUE(FPDFText_FindNext(search.get()));
440       EXPECT_EQ(i, FPDFText_GetSchResultIndex(search.get()));
441       EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
442     }
443 
444     // Found position unchanged when fails to advance.
445     EXPECT_FALSE(FPDFText_FindNext(search.get()));
446     EXPECT_EQ(6, FPDFText_GetSchResultIndex(search.get()));
447     EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
448 
449     for (int i = 5; i >= 0; --i) {
450       EXPECT_TRUE(FPDFText_FindPrev(search.get()));
451       EXPECT_EQ(i, FPDFText_GetSchResultIndex(search.get()));
452       EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
453     }
454 
455     // Found position unchanged when fails to retreat.
456     EXPECT_FALSE(FPDFText_FindPrev(search.get()));
457     EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
458     EXPECT_EQ(4, FPDFText_GetSchCount(search.get()));
459   }
460 
461   FPDFText_ClosePage(textpage);
462   UnloadPage(page);
463 }
464 
465 // Fails on Windows. https://crbug.com/pdfium/1370
466 #if defined(OS_WIN)
467 #define MAYBE_TextSearchLatinExtended DISABLED_TextSearchLatinExtended
468 #else
469 #define MAYBE_TextSearchLatinExtended TextSearchLatinExtended
470 #endif
TEST_F(FPDFTextEmbedderTest,MAYBE_TextSearchLatinExtended)471 TEST_F(FPDFTextEmbedderTest, MAYBE_TextSearchLatinExtended) {
472   ASSERT_TRUE(OpenDocument("latin_extended.pdf"));
473   FPDF_PAGE page = LoadPage(0);
474   ASSERT_TRUE(page);
475 
476   FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
477   ASSERT_TRUE(textpage);
478 
479   // Upper/lowercase 'a' with breve.
480   constexpr FPDF_WCHAR kNeedleUpper[] = {0x0102, 0x0000};
481   constexpr FPDF_WCHAR kNeedleLower[] = {0x0103, 0x0000};
482 
483   for (const auto* needle : {kNeedleUpper, kNeedleLower}) {
484     ScopedFPDFTextFind search(FPDFText_FindStart(textpage, needle, 0, 0));
485     EXPECT_TRUE(search);
486     EXPECT_EQ(0, FPDFText_GetSchResultIndex(search.get()));
487     EXPECT_EQ(0, FPDFText_GetSchCount(search.get()));
488 
489     // Should find 2 results at position 21/22, both with length 1.
490     EXPECT_TRUE(FPDFText_FindNext(search.get()));
491     EXPECT_EQ(2, FPDFText_GetSchResultIndex(search.get()));
492     EXPECT_EQ(1, FPDFText_GetSchCount(search.get()));
493     EXPECT_TRUE(FPDFText_FindNext(search.get()));
494     EXPECT_EQ(3, FPDFText_GetSchResultIndex(search.get()));
495     EXPECT_EQ(1, FPDFText_GetSchCount(search.get()));
496     // And no more than 2 results.
497     EXPECT_FALSE(FPDFText_FindNext(search.get()));
498   }
499 
500   FPDFText_ClosePage(textpage);
501   UnloadPage(page);
502 }
503 
504 // Test that the page has characters despite a bad stream length.
TEST_F(FPDFTextEmbedderTest,StreamLengthPastEndOfFile)505 TEST_F(FPDFTextEmbedderTest, StreamLengthPastEndOfFile) {
506   ASSERT_TRUE(OpenDocument("bug_57.pdf"));
507   FPDF_PAGE page = LoadPage(0);
508   ASSERT_TRUE(page);
509 
510   FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
511   ASSERT_TRUE(textpage);
512   EXPECT_EQ(13, FPDFText_CountChars(textpage));
513 
514   FPDFText_ClosePage(textpage);
515   UnloadPage(page);
516 }
517 
TEST_F(FPDFTextEmbedderTest,WebLinks)518 TEST_F(FPDFTextEmbedderTest, WebLinks) {
519   ASSERT_TRUE(OpenDocument("weblinks.pdf"));
520   FPDF_PAGE page = LoadPage(0);
521   ASSERT_TRUE(page);
522 
523   FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
524   ASSERT_TRUE(textpage);
525 
526   {
527     ScopedFPDFPageLink pagelink(FPDFLink_LoadWebLinks(textpage));
528     EXPECT_TRUE(pagelink);
529 
530     // Page contains two HTTP-style URLs.
531     EXPECT_EQ(2, FPDFLink_CountWebLinks(pagelink.get()));
532 
533     // Only a terminating NUL required for bogus links.
534     EXPECT_EQ(1, FPDFLink_GetURL(pagelink.get(), 2, nullptr, 0));
535     EXPECT_EQ(1, FPDFLink_GetURL(pagelink.get(), 1400, nullptr, 0));
536     EXPECT_EQ(1, FPDFLink_GetURL(pagelink.get(), -1, nullptr, 0));
537   }
538 
539   FPDF_PAGELINK pagelink = FPDFLink_LoadWebLinks(textpage);
540   EXPECT_TRUE(pagelink);
541 
542   // Query the number of characters required for each link (incl NUL).
543   EXPECT_EQ(25, FPDFLink_GetURL(pagelink, 0, nullptr, 0));
544   EXPECT_EQ(26, FPDFLink_GetURL(pagelink, 1, nullptr, 0));
545 
546   static const char expected_url[] = "http://example.com?q=foo";
547   static const size_t expected_len = sizeof(expected_url);
548   unsigned short buffer[128];
549 
550   // Retrieve a link with too small a buffer.  Buffer will not be
551   // NUL-terminated, but must not be modified past indicated length,
552   // so pre-fill with a pattern to check write bounds.
553   memset(buffer, 0xbd, sizeof(buffer));
554   EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 0, buffer, 1));
555   EXPECT_TRUE(check_unsigned_shorts(expected_url, buffer, 1));
556   EXPECT_EQ(0xbdbd, buffer[1]);
557 
558   // Check buffer that doesn't have space for a terminating NUL.
559   memset(buffer, 0xbd, sizeof(buffer));
560   EXPECT_EQ(static_cast<int>(expected_len - 1),
561             FPDFLink_GetURL(pagelink, 0, buffer, expected_len - 1));
562   EXPECT_TRUE(check_unsigned_shorts(expected_url, buffer, expected_len - 1));
563   EXPECT_EQ(0xbdbd, buffer[expected_len - 1]);
564 
565   // Retreive link with exactly-sized buffer.
566   memset(buffer, 0xbd, sizeof(buffer));
567   EXPECT_EQ(static_cast<int>(expected_len),
568             FPDFLink_GetURL(pagelink, 0, buffer, expected_len));
569   EXPECT_TRUE(check_unsigned_shorts(expected_url, buffer, expected_len));
570   EXPECT_EQ(0u, buffer[expected_len - 1]);
571   EXPECT_EQ(0xbdbd, buffer[expected_len]);
572 
573   // Retreive link with ample-sized-buffer.
574   memset(buffer, 0xbd, sizeof(buffer));
575   EXPECT_EQ(static_cast<int>(expected_len),
576             FPDFLink_GetURL(pagelink, 0, buffer, 128));
577   EXPECT_TRUE(check_unsigned_shorts(expected_url, buffer, expected_len));
578   EXPECT_EQ(0u, buffer[expected_len - 1]);
579   EXPECT_EQ(0xbdbd, buffer[expected_len]);
580 
581   // Each link rendered in a single rect in this test page.
582   EXPECT_EQ(1, FPDFLink_CountRects(pagelink, 0));
583   EXPECT_EQ(1, FPDFLink_CountRects(pagelink, 1));
584 
585   // Each link rendered in a single rect in this test page.
586   EXPECT_EQ(0, FPDFLink_CountRects(pagelink, -1));
587   EXPECT_EQ(0, FPDFLink_CountRects(pagelink, 2));
588   EXPECT_EQ(0, FPDFLink_CountRects(pagelink, 10000));
589 
590   // Check boundary of valid link index with valid rect index.
591   double left = 0.0;
592   double right = 0.0;
593   double top = 0.0;
594   double bottom = 0.0;
595   EXPECT_TRUE(FPDFLink_GetRect(pagelink, 0, 0, &left, &top, &right, &bottom));
596   EXPECT_NEAR(50.791, left, 0.001);
597   EXPECT_NEAR(187.963, right, 0.001);
598   EXPECT_NEAR(97.624, bottom, 0.001);
599   EXPECT_NEAR(108.736, top, 0.001);
600 
601   // Check that valid link with invalid rect index leaves parameters unchanged.
602   left = -1.0;
603   right = -1.0;
604   top = -1.0;
605   bottom = -1.0;
606   EXPECT_FALSE(FPDFLink_GetRect(pagelink, 0, 1, &left, &top, &right, &bottom));
607   EXPECT_EQ(-1.0, left);
608   EXPECT_EQ(-1.0, right);
609   EXPECT_EQ(-1.0, bottom);
610   EXPECT_EQ(-1.0, top);
611 
612   // Check that invalid link index leaves parameters unchanged.
613   left = -2.0;
614   right = -2.0;
615   top = -2.0;
616   bottom = -2.0;
617   EXPECT_FALSE(FPDFLink_GetRect(pagelink, -1, 0, &left, &top, &right, &bottom));
618   EXPECT_EQ(-2.0, left);
619   EXPECT_EQ(-2.0, right);
620   EXPECT_EQ(-2.0, bottom);
621   EXPECT_EQ(-2.0, top);
622 
623   FPDFLink_CloseWebLinks(pagelink);
624   FPDFText_ClosePage(textpage);
625   UnloadPage(page);
626 }
627 
TEST_F(FPDFTextEmbedderTest,WebLinksAcrossLines)628 TEST_F(FPDFTextEmbedderTest, WebLinksAcrossLines) {
629   ASSERT_TRUE(OpenDocument("weblinks_across_lines.pdf"));
630   FPDF_PAGE page = LoadPage(0);
631   ASSERT_TRUE(page);
632 
633   FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
634   ASSERT_TRUE(textpage);
635 
636   FPDF_PAGELINK pagelink = FPDFLink_LoadWebLinks(textpage);
637   EXPECT_TRUE(pagelink);
638 
639   static const char* const kExpectedUrls[] = {
640       "http://example.com",           // from "http://www.example.com?\r\nfoo"
641       "http://example.com/",          // from "http://www.example.com/\r\nfoo"
642       "http://example.com/test-foo",  // from "http://example.com/test-\r\nfoo"
643       "http://abc.com/test-foo",      // from "http://abc.com/test-\r\n\r\nfoo"
644       // Next two links from "http://www.example.com/\r\nhttp://www.abc.com/"
645       "http://example.com/",
646       "http://www.abc.com",
647   };
648   static const int kNumLinks = static_cast<int>(FX_ArraySize(kExpectedUrls));
649 
650   EXPECT_EQ(kNumLinks, FPDFLink_CountWebLinks(pagelink));
651 
652   unsigned short buffer[128];
653   for (int i = 0; i < kNumLinks; i++) {
654     const size_t expected_len = strlen(kExpectedUrls[i]) + 1;
655     memset(buffer, 0, sizeof(buffer));
656     EXPECT_EQ(static_cast<int>(expected_len),
657               FPDFLink_GetURL(pagelink, i, nullptr, 0));
658     EXPECT_EQ(static_cast<int>(expected_len),
659               FPDFLink_GetURL(pagelink, i, buffer, FX_ArraySize(buffer)));
660     EXPECT_TRUE(check_unsigned_shorts(kExpectedUrls[i], buffer, expected_len));
661   }
662 
663   FPDFLink_CloseWebLinks(pagelink);
664   FPDFText_ClosePage(textpage);
665   UnloadPage(page);
666 }
667 
TEST_F(FPDFTextEmbedderTest,WebLinksAcrossLinesBug)668 TEST_F(FPDFTextEmbedderTest, WebLinksAcrossLinesBug) {
669   ASSERT_TRUE(OpenDocument("bug_650.pdf"));
670   FPDF_PAGE page = LoadPage(0);
671   ASSERT_TRUE(page);
672 
673   FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
674   ASSERT_TRUE(textpage);
675 
676   FPDF_PAGELINK pagelink = FPDFLink_LoadWebLinks(textpage);
677   EXPECT_TRUE(pagelink);
678 
679   EXPECT_EQ(2, FPDFLink_CountWebLinks(pagelink));
680   unsigned short buffer[128] = {0};
681   static const char kExpectedUrl[] =
682       "http://tutorial45.com/learn-autocad-basics-day-166/";
683   static const int kUrlSize = static_cast<int>(sizeof(kExpectedUrl));
684 
685   EXPECT_EQ(kUrlSize, FPDFLink_GetURL(pagelink, 1, nullptr, 0));
686   EXPECT_EQ(kUrlSize,
687             FPDFLink_GetURL(pagelink, 1, buffer, FX_ArraySize(buffer)));
688   EXPECT_TRUE(check_unsigned_shorts(kExpectedUrl, buffer, kUrlSize));
689 
690   FPDFLink_CloseWebLinks(pagelink);
691   FPDFText_ClosePage(textpage);
692   UnloadPage(page);
693 }
694 
TEST_F(FPDFTextEmbedderTest,WebLinksCharRanges)695 TEST_F(FPDFTextEmbedderTest, WebLinksCharRanges) {
696   ASSERT_TRUE(OpenDocument("weblinks.pdf"));
697   FPDF_PAGE page = LoadPage(0);
698   ASSERT_TRUE(page);
699 
700   FPDF_TEXTPAGE text_page = FPDFText_LoadPage(page);
701   ASSERT_TRUE(text_page);
702 
703   FPDF_PAGELINK page_link = FPDFLink_LoadWebLinks(text_page);
704   EXPECT_TRUE(page_link);
705 
706   // Test for char indices of a valid link
707   int start_char_index;
708   int char_count;
709   ASSERT_TRUE(
710       FPDFLink_GetTextRange(page_link, 0, &start_char_index, &char_count));
711   EXPECT_EQ(35, start_char_index);
712   EXPECT_EQ(24, char_count);
713 
714   // Test for char indices of an invalid link
715   start_char_index = -10;
716   char_count = -8;
717   ASSERT_FALSE(
718       FPDFLink_GetTextRange(page_link, 6, &start_char_index, &char_count));
719   EXPECT_EQ(start_char_index, -10);
720   EXPECT_EQ(char_count, -8);
721 
722   // Test for pagelink = nullptr
723   start_char_index = -10;
724   char_count = -8;
725   ASSERT_FALSE(
726       FPDFLink_GetTextRange(nullptr, 0, &start_char_index, &char_count));
727   EXPECT_EQ(start_char_index, -10);
728   EXPECT_EQ(char_count, -8);
729 
730   // Test for link_index < 0
731   start_char_index = -10;
732   char_count = -8;
733   ASSERT_FALSE(
734       FPDFLink_GetTextRange(page_link, -4, &start_char_index, &char_count));
735   EXPECT_EQ(start_char_index, -10);
736   EXPECT_EQ(char_count, -8);
737 
738   FPDFLink_CloseWebLinks(page_link);
739   FPDFText_ClosePage(text_page);
740   UnloadPage(page);
741 }
742 
TEST_F(FPDFTextEmbedderTest,AnnotLinks)743 TEST_F(FPDFTextEmbedderTest, AnnotLinks) {
744   ASSERT_TRUE(OpenDocument("link_annots.pdf"));
745   FPDF_PAGE page = LoadPage(0);
746   ASSERT_TRUE(page);
747 
748   // Get link count via checking annotation subtype
749   int annot_count = FPDFPage_GetAnnotCount(page);
750   ASSERT_EQ(8, annot_count);
751   int annot_subtype_link_count = 0;
752   for (int i = 0; i < annot_count; ++i) {
753     ScopedFPDFAnnotation annot(FPDFPage_GetAnnot(page, i));
754     if (FPDFAnnot_GetSubtype(annot.get()) == FPDF_ANNOT_LINK) {
755       ++annot_subtype_link_count;
756     }
757   }
758   EXPECT_EQ(4, annot_subtype_link_count);
759 
760   // Validate that FPDFLink_Enumerate() returns same number of links
761   int start_pos = 0;
762   FPDF_LINK link_annot;
763   int link_count = 0;
764   while (FPDFLink_Enumerate(page, &start_pos, &link_annot)) {
765     ASSERT_TRUE(link_annot);
766     if (start_pos == 1 || start_pos == 2) {
767       // First two links point to first and second page within the document
768       // respectively
769       FPDF_DEST link_dest = FPDFLink_GetDest(document(), link_annot);
770       EXPECT_TRUE(link_dest);
771       EXPECT_EQ(start_pos - 1,
772                 FPDFDest_GetDestPageIndex(document(), link_dest));
773     } else if (start_pos == 3) {  // points to PDF Spec URL
774       FS_RECTF link_rect;
775       EXPECT_TRUE(FPDFLink_GetAnnotRect(link_annot, &link_rect));
776       EXPECT_NEAR(66.0, link_rect.left, 0.001);
777       EXPECT_NEAR(544.0, link_rect.top, 0.001);
778       EXPECT_NEAR(196.0, link_rect.right, 0.001);
779       EXPECT_NEAR(529.0, link_rect.bottom, 0.001);
780     } else if (start_pos == 4) {  // this link has quad points
781       int quad_point_count = FPDFLink_CountQuadPoints(link_annot);
782       EXPECT_EQ(1, quad_point_count);
783       FS_QUADPOINTSF quad_points;
784       EXPECT_TRUE(FPDFLink_GetQuadPoints(link_annot, 0, &quad_points));
785       EXPECT_NEAR(83.0, quad_points.x1, 0.001);
786       EXPECT_NEAR(453.0, quad_points.y1, 0.001);
787       EXPECT_NEAR(178.0, quad_points.x2, 0.001);
788       EXPECT_NEAR(453.0, quad_points.y2, 0.001);
789       EXPECT_NEAR(83.0, quad_points.x3, 0.001);
790       EXPECT_NEAR(440.0, quad_points.y3, 0.001);
791       EXPECT_NEAR(178.0, quad_points.x4, 0.001);
792       EXPECT_NEAR(440.0, quad_points.y4, 0.001);
793       // AnnotRect is same as quad points for this link
794       FS_RECTF link_rect;
795       EXPECT_TRUE(FPDFLink_GetAnnotRect(link_annot, &link_rect));
796       EXPECT_NEAR(link_rect.left, quad_points.x1, 0.001);
797       EXPECT_NEAR(link_rect.top, quad_points.y1, 0.001);
798       EXPECT_NEAR(link_rect.right, quad_points.x4, 0.001);
799       EXPECT_NEAR(link_rect.bottom, quad_points.y4, 0.001);
800     }
801     ++link_count;
802   }
803   EXPECT_EQ(annot_subtype_link_count, link_count);
804 
805   UnloadPage(page);
806 }
807 
TEST_F(FPDFTextEmbedderTest,GetFontSize)808 TEST_F(FPDFTextEmbedderTest, GetFontSize) {
809   ASSERT_TRUE(OpenDocument("hello_world.pdf"));
810   FPDF_PAGE page = LoadPage(0);
811   ASSERT_TRUE(page);
812 
813   FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
814   ASSERT_TRUE(textpage);
815 
816   const double kExpectedFontsSizes[] = {12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
817                                         12, 12, 12, 1,  1,  16, 16, 16, 16, 16,
818                                         16, 16, 16, 16, 16, 16, 16, 16, 16, 16};
819 
820   int count = FPDFText_CountChars(textpage);
821   ASSERT_EQ(FX_ArraySize(kExpectedFontsSizes), static_cast<size_t>(count));
822   for (int i = 0; i < count; ++i)
823     EXPECT_EQ(kExpectedFontsSizes[i], FPDFText_GetFontSize(textpage, i)) << i;
824 
825   FPDFText_ClosePage(textpage);
826   UnloadPage(page);
827 }
828 
TEST_F(FPDFTextEmbedderTest,GetFontInfo)829 TEST_F(FPDFTextEmbedderTest, GetFontInfo) {
830   ASSERT_TRUE(OpenDocument("hello_world.pdf"));
831   FPDF_PAGE page = LoadPage(0);
832   ASSERT_TRUE(page);
833 
834   FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
835   ASSERT_TRUE(textpage);
836   std::vector<char> font_name;
837   size_t num_chars1 = strlen("Hello, world!");
838   const char kExpectedFontName1[] = "Times-Roman";
839 
840   for (size_t i = 0; i < num_chars1; i++) {
841     int flags = -1;
842     unsigned long length =
843         FPDFText_GetFontInfo(textpage, i, nullptr, 0, &flags);
844     static constexpr unsigned long expected_length = sizeof(kExpectedFontName1);
845     ASSERT_EQ(expected_length, length);
846     EXPECT_EQ(FXFONT_NONSYMBOLIC, flags);
847     font_name.resize(length);
848     std::fill(font_name.begin(), font_name.end(), 'a');
849     flags = -1;
850     EXPECT_EQ(expected_length,
851               FPDFText_GetFontInfo(textpage, i, font_name.data(),
852                                    font_name.size(), &flags));
853     EXPECT_STREQ(kExpectedFontName1, font_name.data());
854     EXPECT_EQ(FXFONT_NONSYMBOLIC, flags);
855   }
856   // If the size of the buffer is not large enough, the buffer should remain
857   // unchanged.
858   font_name.pop_back();
859   std::fill(font_name.begin(), font_name.end(), 'a');
860   EXPECT_EQ(sizeof(kExpectedFontName1),
861             FPDFText_GetFontInfo(textpage, 0, font_name.data(),
862                                  font_name.size(), nullptr));
863   for (char a : font_name)
864     EXPECT_EQ('a', a);
865 
866   // The text is "Hello, world!\r\nGoodbye, world!", so the next two characters
867   // do not have any font information.
868   EXPECT_EQ(0u, FPDFText_GetFontInfo(textpage, num_chars1, font_name.data(),
869                                      font_name.size(), nullptr));
870   EXPECT_EQ(0u, FPDFText_GetFontInfo(textpage, num_chars1 + 1, font_name.data(),
871                                      font_name.size(), nullptr));
872 
873   size_t num_chars2 = strlen("Goodbye, world!");
874   const char kExpectedFontName2[] = "Helvetica";
875   for (size_t i = num_chars1 + 2; i < num_chars1 + num_chars2 + 2; i++) {
876     int flags = -1;
877     unsigned long length =
878         FPDFText_GetFontInfo(textpage, i, nullptr, 0, &flags);
879     static constexpr unsigned long expected_length = sizeof(kExpectedFontName2);
880     ASSERT_EQ(expected_length, length);
881     EXPECT_EQ(FXFONT_NONSYMBOLIC, flags);
882     font_name.resize(length);
883     std::fill(font_name.begin(), font_name.end(), 'a');
884     flags = -1;
885     EXPECT_EQ(expected_length,
886               FPDFText_GetFontInfo(textpage, i, font_name.data(),
887                                    font_name.size(), &flags));
888     EXPECT_STREQ(kExpectedFontName2, font_name.data());
889     EXPECT_EQ(FXFONT_NONSYMBOLIC, flags);
890   }
891 
892   // Now try some out of bounds indices and null pointers to make sure we do not
893   // crash.
894   // No textpage.
895   EXPECT_EQ(0u, FPDFText_GetFontInfo(nullptr, 0, font_name.data(),
896                                      font_name.size(), nullptr));
897   // No buffer.
898   EXPECT_EQ(sizeof(kExpectedFontName1),
899             FPDFText_GetFontInfo(textpage, 0, nullptr, 0, nullptr));
900   // Negative index.
901   EXPECT_EQ(0u, FPDFText_GetFontInfo(textpage, -1, font_name.data(),
902                                      font_name.size(), nullptr));
903   // Out of bounds index.
904   EXPECT_EQ(0u, FPDFText_GetFontInfo(textpage, 1000, font_name.data(),
905                                      font_name.size(), nullptr));
906 
907   FPDFText_ClosePage(textpage);
908   UnloadPage(page);
909 }
910 
TEST_F(FPDFTextEmbedderTest,ToUnicode)911 TEST_F(FPDFTextEmbedderTest, ToUnicode) {
912   ASSERT_TRUE(OpenDocument("bug_583.pdf"));
913   FPDF_PAGE page = LoadPage(0);
914   ASSERT_TRUE(page);
915 
916   FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
917   ASSERT_TRUE(textpage);
918 
919   ASSERT_EQ(1, FPDFText_CountChars(textpage));
920   EXPECT_EQ(0U, FPDFText_GetUnicode(textpage, 0));
921 
922   FPDFText_ClosePage(textpage);
923   UnloadPage(page);
924 }
925 
TEST_F(FPDFTextEmbedderTest,Bug_921)926 TEST_F(FPDFTextEmbedderTest, Bug_921) {
927   ASSERT_TRUE(OpenDocument("bug_921.pdf"));
928   FPDF_PAGE page = LoadPage(0);
929   ASSERT_TRUE(page);
930 
931   FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
932   ASSERT_TRUE(textpage);
933 
934   static constexpr unsigned int kData[] = {
935       1095, 1077, 1083, 1086, 1074, 1077, 1095, 1077, 1089, 1082, 1086, 1077,
936       32,   1089, 1090, 1088, 1072, 1076, 1072, 1085, 1080, 1077, 46,   32};
937   static constexpr int kStartIndex = 238;
938 
939   ASSERT_EQ(268, FPDFText_CountChars(textpage));
940   for (size_t i = 0; i < FX_ArraySize(kData); ++i)
941     EXPECT_EQ(kData[i], FPDFText_GetUnicode(textpage, kStartIndex + i));
942 
943   unsigned short buffer[FX_ArraySize(kData) + 1];
944   memset(buffer, 0xbd, sizeof(buffer));
945   int count =
946       FPDFText_GetText(textpage, kStartIndex, FX_ArraySize(kData), buffer);
947   ASSERT_GT(count, 0);
948   ASSERT_EQ(FX_ArraySize(kData) + 1, static_cast<size_t>(count));
949   for (size_t i = 0; i < FX_ArraySize(kData); ++i)
950     EXPECT_EQ(kData[i], buffer[i]);
951   EXPECT_EQ(0, buffer[FX_ArraySize(kData)]);
952 
953   FPDFText_ClosePage(textpage);
954   UnloadPage(page);
955 }
956 
TEST_F(FPDFTextEmbedderTest,GetTextWithHyphen)957 TEST_F(FPDFTextEmbedderTest, GetTextWithHyphen) {
958   ASSERT_TRUE(OpenDocument("bug_781804.pdf"));
959   FPDF_PAGE page = LoadPage(0);
960   ASSERT_TRUE(page);
961 
962   FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
963   ASSERT_TRUE(textpage);
964 
965   // Check that soft hyphens are not included
966   // Expecting 'Veritaserum', except there is a \uFFFE where the hyphen was in
967   // the original text. This is a weird thing that Adobe does, which we
968   // replicate.
969   constexpr unsigned short soft_expected[] = {
970       0x0056, 0x0065, 0x0072, 0x0069, 0x0074, 0x0061, 0xfffe,
971       0x0073, 0x0065, 0x0072, 0x0075, 0x006D, 0x0000};
972   {
973     constexpr int count = FX_ArraySize(soft_expected) - 1;
974     unsigned short buffer[FX_ArraySize(soft_expected)];
975     memset(buffer, 0, sizeof(buffer));
976 
977     EXPECT_EQ(count + 1, FPDFText_GetText(textpage, 0, count, buffer));
978     for (int i = 0; i < count; i++)
979       EXPECT_EQ(soft_expected[i], buffer[i]);
980   }
981 
982   // Check that hard hyphens are included
983   {
984     // There isn't the \0 in the actual doc, but there is a \r\n, so need to
985     // add 1 to get aligned.
986     constexpr size_t offset = FX_ArraySize(soft_expected) + 1;
987     // Expecting 'User-\r\ngenerated', the - is a unicode character, so cannnot
988     // store in a char[].
989     constexpr unsigned short hard_expected[] = {
990         0x0055, 0x0073, 0x0065, 0x0072, 0x2010, 0x000d, 0x000a, 0x0067, 0x0065,
991         0x006e, 0x0065, 0x0072, 0x0061, 0x0074, 0x0065, 0x0064, 0x0000};
992     constexpr int count = FX_ArraySize(hard_expected) - 1;
993     unsigned short buffer[FX_ArraySize(hard_expected)];
994 
995     EXPECT_EQ(count + 1, FPDFText_GetText(textpage, offset, count, buffer));
996     for (int i = 0; i < count; i++)
997       EXPECT_EQ(hard_expected[i], buffer[i]);
998   }
999 
1000   FPDFText_ClosePage(textpage);
1001   UnloadPage(page);
1002 }
1003 
TEST_F(FPDFTextEmbedderTest,bug_782596)1004 TEST_F(FPDFTextEmbedderTest, bug_782596) {
1005   // If there is a regression in this test, it will only fail under ASAN
1006   ASSERT_TRUE(OpenDocument("bug_782596.pdf"));
1007   FPDF_PAGE page = LoadPage(0);
1008   ASSERT_TRUE(page);
1009   FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
1010   ASSERT_TRUE(textpage);
1011   FPDFText_ClosePage(textpage);
1012   UnloadPage(page);
1013 }
1014 
TEST_F(FPDFTextEmbedderTest,ControlCharacters)1015 TEST_F(FPDFTextEmbedderTest, ControlCharacters) {
1016   ASSERT_TRUE(OpenDocument("control_characters.pdf"));
1017   FPDF_PAGE page = LoadPage(0);
1018   ASSERT_TRUE(page);
1019 
1020   FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
1021   ASSERT_TRUE(textpage);
1022 
1023   // Should not include the control characters in the output
1024   unsigned short buffer[128];
1025   memset(buffer, 0xbd, sizeof(buffer));
1026   int num_chars = FPDFText_GetText(textpage, 0, 128, buffer);
1027   ASSERT_EQ(kHelloGoodbyeTextSize, num_chars);
1028   EXPECT_TRUE(
1029       check_unsigned_shorts(kHelloGoodbyeText, buffer, kHelloGoodbyeTextSize));
1030 
1031   // Attempting to get a chunk of text after the control characters
1032   static const char expected_substring[] = "Goodbye, world!";
1033   // Offset is the length of 'Hello, world!\r\n' + 2 control characters in the
1034   // original stream
1035   static const int offset = 17;
1036   memset(buffer, 0xbd, sizeof(buffer));
1037   num_chars = FPDFText_GetText(textpage, offset, 128, buffer);
1038 
1039   ASSERT_GE(num_chars, 0);
1040   EXPECT_EQ(sizeof(expected_substring), static_cast<size_t>(num_chars));
1041   EXPECT_TRUE(check_unsigned_shorts(expected_substring, buffer,
1042                                     sizeof(expected_substring)));
1043 
1044   FPDFText_ClosePage(textpage);
1045   UnloadPage(page);
1046 }
1047 
1048 // Testing that hyphen makers (0x0002) are replacing hard hyphens when
1049 // the word contains non-ASCII characters.
TEST_F(FPDFTextEmbedderTest,bug_1029)1050 TEST_F(FPDFTextEmbedderTest, bug_1029) {
1051   ASSERT_TRUE(OpenDocument("bug_1029.pdf"));
1052   FPDF_PAGE page = LoadPage(0);
1053   ASSERT_TRUE(page);
1054 
1055   FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
1056   ASSERT_TRUE(textpage);
1057 
1058   constexpr int page_range_offset = 171;
1059   constexpr int page_range_length = 56;
1060 
1061   // This text is:
1062   // 'METADATA table. When the split has committed, it noti' followed
1063   // by a 'soft hyphen' (0x0002) and then 'fi'.
1064   //
1065   // The original text has a fi ligature, but that is broken up into
1066   // two characters when the PDF is processed.
1067   constexpr unsigned int expected[] = {
1068       0x004d, 0x0045, 0x0054, 0x0041, 0x0044, 0x0041, 0x0054, 0x0041,
1069       0x0020, 0x0074, 0x0061, 0x0062, 0x006c, 0x0065, 0x002e, 0x0020,
1070       0x0057, 0x0068, 0x0065, 0x006e, 0x0020, 0x0074, 0x0068, 0x0065,
1071       0x0020, 0x0073, 0x0070, 0x006c, 0x0069, 0x0074, 0x0020, 0x0068,
1072       0x0061, 0x0073, 0x0020, 0x0063, 0x006f, 0x006d, 0x006d, 0x0069,
1073       0x0074, 0x0074, 0x0065, 0x0064, 0x002c, 0x0020, 0x0069, 0x0074,
1074       0x0020, 0x006e, 0x006f, 0x0074, 0x0069, 0x0002, 0x0066, 0x0069};
1075   static_assert(page_range_length == FX_ArraySize(expected),
1076                 "Expected should be the same size as the range being "
1077                 "extracted from page.");
1078   EXPECT_LT(page_range_offset + page_range_length,
1079             FPDFText_CountChars(textpage));
1080 
1081   for (int i = 0; i < page_range_length; ++i) {
1082     EXPECT_EQ(expected[i],
1083               FPDFText_GetUnicode(textpage, page_range_offset + i));
1084   }
1085 
1086   FPDFText_ClosePage(textpage);
1087   UnloadPage(page);
1088 }
1089 
TEST_F(FPDFTextEmbedderTest,CountRects)1090 TEST_F(FPDFTextEmbedderTest, CountRects) {
1091   ASSERT_TRUE(OpenDocument("hello_world.pdf"));
1092   FPDF_PAGE page = LoadPage(0);
1093   ASSERT_TRUE(page);
1094 
1095   FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
1096   ASSERT_TRUE(textpage);
1097 
1098   // Sanity check hello_world.pdf.
1099   // |num_chars| check includes the terminating NUL that is provided.
1100   {
1101     unsigned short buffer[128];
1102     int num_chars = FPDFText_GetText(textpage, 0, 128, buffer);
1103     ASSERT_EQ(kHelloGoodbyeTextSize, num_chars);
1104     EXPECT_TRUE(check_unsigned_shorts(kHelloGoodbyeText, buffer,
1105                                       kHelloGoodbyeTextSize));
1106   }
1107 
1108   // Now test FPDFText_CountRects().
1109   static const int kHelloWorldEnd = strlen("Hello, world!");
1110   static const int kGoodbyeWorldStart = kHelloWorldEnd + 2;  // "\r\n"
1111   for (int start = 0; start < kHelloWorldEnd; ++start) {
1112     // Always grab some part of "hello world" and some part of "goodbye world"
1113     // Since -1 means "all".
1114     EXPECT_EQ(2, FPDFText_CountRects(textpage, start, -1));
1115 
1116     // No characters always means 0 rects.
1117     EXPECT_EQ(0, FPDFText_CountRects(textpage, start, 0));
1118 
1119     // 1 character stays within "hello world"
1120     EXPECT_EQ(1, FPDFText_CountRects(textpage, start, 1));
1121 
1122     // When |start| is 0, Having |kGoodbyeWorldStart| char count does not reach
1123     // "goodbye world".
1124     int expected_value = start ? 2 : 1;
1125     EXPECT_EQ(expected_value,
1126               FPDFText_CountRects(textpage, start, kGoodbyeWorldStart));
1127 
1128     // Extremely large character count will always return 2 rects because
1129     // |start| starts inside "hello world".
1130     EXPECT_EQ(2, FPDFText_CountRects(textpage, start, 500));
1131   }
1132 
1133   // Now test negative counts.
1134   for (int start = 0; start < kHelloWorldEnd; ++start) {
1135     EXPECT_EQ(2, FPDFText_CountRects(textpage, start, -100));
1136     EXPECT_EQ(2, FPDFText_CountRects(textpage, start, -2));
1137   }
1138 
1139   // Now test larger start values.
1140   const int kExpectedLength = strlen(kHelloGoodbyeText);
1141   for (int start = kGoodbyeWorldStart + 1; start < kExpectedLength; ++start) {
1142     EXPECT_EQ(1, FPDFText_CountRects(textpage, start, -1));
1143     EXPECT_EQ(0, FPDFText_CountRects(textpage, start, 0));
1144     EXPECT_EQ(1, FPDFText_CountRects(textpage, start, 1));
1145     EXPECT_EQ(1, FPDFText_CountRects(textpage, start, 2));
1146     EXPECT_EQ(1, FPDFText_CountRects(textpage, start, 500));
1147   }
1148 
1149   // Now test start values that starts beyond the end of the text.
1150   for (int start = kExpectedLength; start < 100; ++start) {
1151     EXPECT_EQ(0, FPDFText_CountRects(textpage, start, -1));
1152     EXPECT_EQ(0, FPDFText_CountRects(textpage, start, 0));
1153     EXPECT_EQ(0, FPDFText_CountRects(textpage, start, 1));
1154     EXPECT_EQ(0, FPDFText_CountRects(textpage, start, 2));
1155     EXPECT_EQ(0, FPDFText_CountRects(textpage, start, 500));
1156   }
1157 
1158   FPDFText_ClosePage(textpage);
1159   UnloadPage(page);
1160 }
1161 
TEST_F(FPDFTextEmbedderTest,GetText)1162 TEST_F(FPDFTextEmbedderTest, GetText) {
1163   ASSERT_TRUE(OpenDocument("hello_world.pdf"));
1164   FPDF_PAGE page = LoadPage(0);
1165   ASSERT_TRUE(page);
1166 
1167   FPDF_TEXTPAGE text_page = FPDFText_LoadPage(page);
1168   ASSERT_TRUE(text_page);
1169 
1170   EXPECT_EQ(2, FPDFPage_CountObjects(page));
1171   FPDF_PAGEOBJECT text_object = FPDFPage_GetObject(page, 0);
1172   ASSERT_TRUE(text_object);
1173 
1174   // Positive testing.
1175   constexpr char kHelloText[] = "Hello, world!";
1176   // Return value includes the terminating NUL that is provided.
1177   constexpr unsigned long kHelloUTF16Size = FX_ArraySize(kHelloText) * 2;
1178   constexpr wchar_t kHelloWideText[] = L"Hello, world!";
1179   unsigned long size = FPDFTextObj_GetText(text_object, text_page, nullptr, 0);
1180   ASSERT_EQ(kHelloUTF16Size, size);
1181 
1182   std::vector<unsigned short> buffer(size);
1183   ASSERT_EQ(size,
1184             FPDFTextObj_GetText(text_object, text_page, buffer.data(), size));
1185   ASSERT_EQ(kHelloWideText, GetPlatformWString(buffer.data()));
1186 
1187   // Negative testing.
1188   ASSERT_EQ(0U, FPDFTextObj_GetText(nullptr, text_page, nullptr, 0));
1189   ASSERT_EQ(0U, FPDFTextObj_GetText(text_object, nullptr, nullptr, 0));
1190   ASSERT_EQ(0U, FPDFTextObj_GetText(nullptr, nullptr, nullptr, 0));
1191 
1192   // Buffer is too small, ensure it's not modified.
1193   buffer.resize(2);
1194   buffer[0] = 'x';
1195   buffer[1] = '\0';
1196   size =
1197       FPDFTextObj_GetText(text_object, text_page, buffer.data(), buffer.size());
1198   ASSERT_EQ(kHelloUTF16Size, size);
1199   ASSERT_EQ('x', buffer[0]);
1200   ASSERT_EQ('\0', buffer[1]);
1201 
1202   FPDFText_ClosePage(text_page);
1203   UnloadPage(page);
1204 }
1205 
TEST_F(FPDFTextEmbedderTest,CroppedText)1206 TEST_F(FPDFTextEmbedderTest, CroppedText) {
1207   static constexpr int kPageCount = 4;
1208   static constexpr FS_RECTF kBoxes[kPageCount] = {
1209       {50.0f, 150.0f, 150.0f, 50.0f},
1210       {50.0f, 150.0f, 150.0f, 50.0f},
1211       {60.0f, 150.0f, 150.0f, 60.0f},
1212       {60.0f, 150.0f, 150.0f, 60.0f},
1213   };
1214   static constexpr const char* kExpectedText[kPageCount] = {
1215       " world!\r\ndbye, world!",
1216       " world!\r\ndbye, world!",
1217       "bye, world!",
1218       "bye, world!",
1219   };
1220 
1221   ASSERT_TRUE(OpenDocument("cropped_text.pdf"));
1222   ASSERT_EQ(kPageCount, FPDF_GetPageCount(document()));
1223 
1224   for (int i = 0; i < kPageCount; ++i) {
1225     FPDF_PAGE page = LoadPage(i);
1226     ASSERT_TRUE(page);
1227 
1228     FS_RECTF box;
1229     EXPECT_TRUE(FPDF_GetPageBoundingBox(page, &box));
1230     EXPECT_EQ(kBoxes[i].left, box.left);
1231     EXPECT_EQ(kBoxes[i].top, box.top);
1232     EXPECT_EQ(kBoxes[i].right, box.right);
1233     EXPECT_EQ(kBoxes[i].bottom, box.bottom);
1234 
1235     {
1236       ScopedFPDFTextPage textpage(FPDFText_LoadPage(page));
1237       ASSERT_TRUE(textpage);
1238 
1239       unsigned short buffer[128];
1240       memset(buffer, 0xbd, sizeof(buffer));
1241       int num_chars = FPDFText_GetText(textpage.get(), 0, 128, buffer);
1242       ASSERT_EQ(kHelloGoodbyeTextSize, num_chars);
1243       EXPECT_TRUE(check_unsigned_shorts(kHelloGoodbyeText, buffer,
1244                                         kHelloGoodbyeTextSize));
1245 
1246       int expected_char_count = strlen(kExpectedText[i]);
1247       ASSERT_EQ(expected_char_count,
1248                 FPDFText_GetBoundedText(textpage.get(), box.left, box.top,
1249                                         box.right, box.bottom, nullptr, 0));
1250 
1251       memset(buffer, 0xbd, sizeof(buffer));
1252       ASSERT_EQ(expected_char_count + 1,
1253                 FPDFText_GetBoundedText(textpage.get(), box.left, box.top,
1254                                         box.right, box.bottom, buffer, 128));
1255       EXPECT_TRUE(
1256           check_unsigned_shorts(kExpectedText[i], buffer, expected_char_count));
1257     }
1258 
1259     UnloadPage(page);
1260   }
1261 }
1262 
TEST_F(FPDFTextEmbedderTest,Bug_1139)1263 TEST_F(FPDFTextEmbedderTest, Bug_1139) {
1264   ASSERT_TRUE(OpenDocument("bug_1139.pdf"));
1265   FPDF_PAGE page = LoadPage(0);
1266   ASSERT_TRUE(page);
1267 
1268   FPDF_TEXTPAGE text_page = FPDFText_LoadPage(page);
1269   ASSERT_TRUE(text_page);
1270 
1271   // -1 for CountChars not including the \0, but +1 for the extra control
1272   // character.
1273   EXPECT_EQ(kHelloGoodbyeTextSize, FPDFText_CountChars(text_page));
1274 
1275   // There is an extra control character at the beginning of the string, but it
1276   // should not appear in the output nor prevent extracting the text.
1277   unsigned short buffer[128];
1278   int num_chars = FPDFText_GetText(text_page, 0, 128, buffer);
1279   ASSERT_EQ(kHelloGoodbyeTextSize, num_chars);
1280   EXPECT_TRUE(
1281       check_unsigned_shorts(kHelloGoodbyeText, buffer, kHelloGoodbyeTextSize));
1282   FPDFText_ClosePage(text_page);
1283   UnloadPage(page);
1284 }
1285 
TEST_F(FPDFTextEmbedderTest,Bug_642)1286 TEST_F(FPDFTextEmbedderTest, Bug_642) {
1287   ASSERT_TRUE(OpenDocument("bug_642.pdf"));
1288   FPDF_PAGE page = LoadPage(0);
1289   ASSERT_TRUE(page);
1290   {
1291     ScopedFPDFTextPage text_page(FPDFText_LoadPage(page));
1292     ASSERT_TRUE(text_page);
1293 
1294     constexpr char kText[] = "ABCD";
1295     constexpr size_t kTextSize = FX_ArraySize(kText);
1296     // -1 for CountChars not including the \0
1297     EXPECT_EQ(static_cast<int>(kTextSize) - 1,
1298               FPDFText_CountChars(text_page.get()));
1299 
1300     unsigned short buffer[kTextSize];
1301     int num_chars =
1302         FPDFText_GetText(text_page.get(), 0, FX_ArraySize(buffer) - 1, buffer);
1303     ASSERT_EQ(static_cast<int>(kTextSize), num_chars);
1304     EXPECT_TRUE(check_unsigned_shorts(kText, buffer, kTextSize));
1305   }
1306 
1307   UnloadPage(page);
1308 }
1309 
TEST_F(FPDFTextEmbedderTest,GetCharAngle)1310 TEST_F(FPDFTextEmbedderTest, GetCharAngle) {
1311   ASSERT_TRUE(OpenDocument("rotated_text.pdf"));
1312   FPDF_PAGE page = LoadPage(0);
1313   ASSERT_TRUE(page);
1314 
1315   FPDF_TEXTPAGE text_page = FPDFText_LoadPage(page);
1316   ASSERT_TRUE(text_page);
1317 
1318   static constexpr int kSubstringsSize[] = {FX_ArraySize("Hello,"),
1319                                             FX_ArraySize(" world!\r\n"),
1320                                             FX_ArraySize("Goodbye,")};
1321 
1322   // -1 for CountChars not including the \0, but +1 for the extra control
1323   // character.
1324   EXPECT_EQ(kHelloGoodbyeTextSize, FPDFText_CountChars(text_page));
1325 
1326   EXPECT_FLOAT_EQ(-1.0f, FPDFText_GetCharAngle(nullptr, 0));
1327   EXPECT_FLOAT_EQ(-1.0f, FPDFText_GetCharAngle(text_page, -1));
1328   EXPECT_FLOAT_EQ(-1.0f,
1329                   FPDFText_GetCharAngle(text_page, kHelloGoodbyeTextSize + 1));
1330 
1331   // Test GetCharAngle for every quadrant
1332   EXPECT_NEAR(FX_PI / 4.0, FPDFText_GetCharAngle(text_page, 0), 0.001);
1333   EXPECT_NEAR(3 * FX_PI / 4.0,
1334               FPDFText_GetCharAngle(text_page, kSubstringsSize[0]), 0.001);
1335   EXPECT_NEAR(
1336       5 * FX_PI / 4.0,
1337       FPDFText_GetCharAngle(text_page, kSubstringsSize[0] + kSubstringsSize[1]),
1338       0.001);
1339   EXPECT_NEAR(
1340       7 * FX_PI / 4.0,
1341       FPDFText_GetCharAngle(text_page, kSubstringsSize[0] + kSubstringsSize[1] +
1342                                            kSubstringsSize[2]),
1343       0.001);
1344 
1345   FPDFText_ClosePage(text_page);
1346   UnloadPage(page);
1347 }
1348 
TEST_F(FPDFTextEmbedderTest,GetFontWeight)1349 TEST_F(FPDFTextEmbedderTest, GetFontWeight) {
1350   ASSERT_TRUE(OpenDocument("font_weight.pdf"));
1351   FPDF_PAGE page = LoadPage(0);
1352   ASSERT_TRUE(page);
1353 
1354   FPDF_TEXTPAGE text_page = FPDFText_LoadPage(page);
1355   ASSERT_TRUE(text_page);
1356 
1357   EXPECT_EQ(2, FPDFText_CountChars(text_page));
1358 
1359   EXPECT_EQ(-1, FPDFText_GetFontWeight(nullptr, 0));
1360   EXPECT_EQ(-1, FPDFText_GetFontWeight(text_page, -1));
1361   EXPECT_EQ(-1, FPDFText_GetFontWeight(text_page, 314));
1362 
1363   // The font used for this text only specifies /StemV (80); the weight value
1364   // that is returned should be calculated from that (80*5 == 400).
1365   EXPECT_EQ(400, FPDFText_GetFontWeight(text_page, 0));
1366 
1367   // Using a /StemV value of 82, the estimate comes out to 410, even though
1368   // /FontWeight is 400.
1369   // TODO(crbug.com/pdfium/1420): Fix this the return value here.
1370   EXPECT_EQ(410, FPDFText_GetFontWeight(text_page, 1));
1371 
1372   FPDFText_ClosePage(text_page);
1373   UnloadPage(page);
1374 }
1375 
TEST_F(FPDFTextEmbedderTest,GetTextRenderMode)1376 TEST_F(FPDFTextEmbedderTest, GetTextRenderMode) {
1377   EXPECT_TRUE(OpenDocument("text_render_mode.pdf"));
1378   FPDF_PAGE page = LoadPage(0);
1379   ASSERT_TRUE(page);
1380 
1381   FPDF_TEXTPAGE text_page = FPDFText_LoadPage(page);
1382   ASSERT_TRUE(text_page);
1383 
1384   ASSERT_EQ(12, FPDFText_CountChars(text_page));
1385 
1386   ASSERT_EQ(FPDF_TEXTRENDERMODE_UNKNOWN,
1387             FPDFText_GetTextRenderMode(nullptr, 0));
1388   ASSERT_EQ(FPDF_TEXTRENDERMODE_UNKNOWN,
1389             FPDFText_GetTextRenderMode(text_page, -1));
1390   ASSERT_EQ(FPDF_TEXTRENDERMODE_UNKNOWN,
1391             FPDFText_GetTextRenderMode(text_page, 314));
1392 
1393   ASSERT_EQ(FPDF_TEXTRENDERMODE_FILL, FPDFText_GetTextRenderMode(text_page, 0));
1394 
1395   ASSERT_EQ(FPDF_TEXTRENDERMODE_STROKE,
1396             FPDFText_GetTextRenderMode(text_page, 7));
1397 
1398   FPDFText_ClosePage(text_page);
1399   UnloadPage(page);
1400 }
1401 
TEST_F(FPDFTextEmbedderTest,GetFillColor)1402 TEST_F(FPDFTextEmbedderTest, GetFillColor) {
1403   ASSERT_TRUE(OpenDocument("text_color.pdf"));
1404   FPDF_PAGE page = LoadPage(0);
1405   ASSERT_TRUE(page);
1406 
1407   FPDF_TEXTPAGE text_page = FPDFText_LoadPage(page);
1408   ASSERT_TRUE(text_page);
1409 
1410   ASSERT_EQ(1, FPDFText_CountChars(text_page));
1411 
1412   ASSERT_FALSE(
1413       FPDFText_GetFillColor(nullptr, 0, nullptr, nullptr, nullptr, nullptr));
1414   ASSERT_FALSE(
1415       FPDFText_GetFillColor(text_page, -1, nullptr, nullptr, nullptr, nullptr));
1416   ASSERT_FALSE(FPDFText_GetFillColor(text_page, 314, nullptr, nullptr, nullptr,
1417                                      nullptr));
1418   ASSERT_FALSE(
1419       FPDFText_GetFillColor(text_page, 0, nullptr, nullptr, nullptr, nullptr));
1420 
1421   unsigned int r;
1422   unsigned int g;
1423   unsigned int b;
1424   unsigned int a;
1425   ASSERT_TRUE(FPDFText_GetFillColor(text_page, 0, &r, &g, &b, &a));
1426   ASSERT_EQ(0xffu, r);
1427   ASSERT_EQ(0u, g);
1428   ASSERT_EQ(0u, b);
1429   ASSERT_EQ(0xffu, a);
1430 
1431   FPDFText_ClosePage(text_page);
1432   UnloadPage(page);
1433 }
1434 
TEST_F(FPDFTextEmbedderTest,GetStrokeColor)1435 TEST_F(FPDFTextEmbedderTest, GetStrokeColor) {
1436   ASSERT_TRUE(OpenDocument("text_color.pdf"));
1437   FPDF_PAGE page = LoadPage(0);
1438   ASSERT_TRUE(page);
1439 
1440   FPDF_TEXTPAGE text_page = FPDFText_LoadPage(page);
1441   ASSERT_TRUE(text_page);
1442 
1443   ASSERT_EQ(1, FPDFText_CountChars(text_page));
1444 
1445   ASSERT_FALSE(
1446       FPDFText_GetStrokeColor(nullptr, 0, nullptr, nullptr, nullptr, nullptr));
1447   ASSERT_FALSE(FPDFText_GetStrokeColor(text_page, -1, nullptr, nullptr, nullptr,
1448                                        nullptr));
1449   ASSERT_FALSE(FPDFText_GetStrokeColor(text_page, 314, nullptr, nullptr,
1450                                        nullptr, nullptr));
1451   ASSERT_FALSE(FPDFText_GetStrokeColor(text_page, 0, nullptr, nullptr, nullptr,
1452                                        nullptr));
1453 
1454   unsigned int r;
1455   unsigned int g;
1456   unsigned int b;
1457   unsigned int a;
1458   ASSERT_TRUE(FPDFText_GetStrokeColor(text_page, 0, &r, &g, &b, &a));
1459   ASSERT_EQ(0u, r);
1460   ASSERT_EQ(0xffu, g);
1461   ASSERT_EQ(0u, b);
1462   ASSERT_EQ(0xffu, a);
1463 
1464   FPDFText_ClosePage(text_page);
1465   UnloadPage(page);
1466 }
1467 
TEST_F(FPDFTextEmbedderTest,GetMatrix)1468 TEST_F(FPDFTextEmbedderTest, GetMatrix) {
1469   constexpr char kExpectedText[] = "A1\r\nA2\r\nA3";
1470   constexpr size_t kExpectedTextSize = FX_ArraySize(kExpectedText);
1471   constexpr FS_MATRIX kExpectedMatrices[] = {
1472       {12.0f, 0.0f, 0.0f, 10.0f, 66.0f, 90.0f},
1473       {12.0f, 0.0f, 0.0f, 10.0f, 66.0f, 90.0f},
1474       {1.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f},
1475       {1.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f},
1476       {12.0f, 0.0f, 0.0f, 10.0f, 38.0f, 60.0f},
1477       {12.0f, 0.0f, 0.0f, 10.0f, 38.0f, 60.0f},
1478       {1.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f},
1479       {1.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f},
1480       {1.0f, 0.0f, 0.0f, 0.833333, 60.0f, 130.0f},
1481       {1.0f, 0.0f, 0.0f, 0.833333, 60.0f, 130.0f},
1482   };
1483   constexpr size_t kExpectedCount = FX_ArraySize(kExpectedMatrices);
1484   static_assert(kExpectedCount + 1 == kExpectedTextSize,
1485                 "Bad expected matrix size");
1486 
1487   // For a size 12 letter 'A'.
1488   constexpr double kExpectedCharWidth = 8.436;
1489   constexpr double kExpectedCharHeight = 6.77;
1490 
1491   ASSERT_TRUE(OpenDocument("font_matrix.pdf"));
1492   FPDF_PAGE page = LoadPage(0);
1493   ASSERT_TRUE(page);
1494 
1495   {
1496     ScopedFPDFTextPage text_page(FPDFText_LoadPage(page));
1497     ASSERT_TRUE(text_page);
1498     ASSERT_EQ(static_cast<int>(kExpectedCount),
1499               FPDFText_CountChars(text_page.get()));
1500 
1501     {
1502       // Check the characters.
1503       unsigned short buffer[kExpectedTextSize];
1504       ASSERT_EQ(static_cast<int>(kExpectedTextSize),
1505                 FPDFText_GetText(text_page.get(), 0, kExpectedCount, buffer));
1506       EXPECT_TRUE(
1507           check_unsigned_shorts(kExpectedText, buffer, kExpectedTextSize));
1508     }
1509 
1510     {
1511       // Check the character box size.
1512       double left;
1513       double right;
1514       double bottom;
1515       double top;
1516       ASSERT_TRUE(FPDFText_GetCharBox(text_page.get(), 0, &left, &right,
1517                                       &bottom, &top));
1518       EXPECT_NEAR(kExpectedCharWidth, right - left, 0.001);
1519       EXPECT_NEAR(kExpectedCharHeight, top - bottom, 0.001);
1520       ASSERT_TRUE(FPDFText_GetCharBox(text_page.get(), 4, &left, &right,
1521                                       &bottom, &top));
1522       EXPECT_NEAR(kExpectedCharWidth, right - left, 0.001);
1523       EXPECT_NEAR(kExpectedCharHeight, top - bottom, 0.001);
1524       ASSERT_TRUE(FPDFText_GetCharBox(text_page.get(), 8, &left, &right,
1525                                       &bottom, &top));
1526       EXPECT_NEAR(kExpectedCharWidth, right - left, 0.001);
1527       EXPECT_NEAR(kExpectedCharHeight, top - bottom, 0.001);
1528     }
1529 
1530     // Check the character matrix.
1531     FS_MATRIX matrix;
1532     for (size_t i = 0; i < kExpectedCount; ++i) {
1533       ASSERT_TRUE(FPDFText_GetMatrix(text_page.get(), i, &matrix)) << i;
1534       EXPECT_FLOAT_EQ(kExpectedMatrices[i].a, matrix.a) << i;
1535       EXPECT_FLOAT_EQ(kExpectedMatrices[i].b, matrix.b) << i;
1536       EXPECT_FLOAT_EQ(kExpectedMatrices[i].c, matrix.c) << i;
1537       EXPECT_FLOAT_EQ(kExpectedMatrices[i].d, matrix.d) << i;
1538       EXPECT_FLOAT_EQ(kExpectedMatrices[i].e, matrix.e) << i;
1539       EXPECT_FLOAT_EQ(kExpectedMatrices[i].f, matrix.f) << i;
1540     }
1541 
1542     // Check bad parameters.
1543     EXPECT_FALSE(FPDFText_GetMatrix(nullptr, 0, &matrix));
1544     EXPECT_FALSE(FPDFText_GetMatrix(text_page.get(), 10, &matrix));
1545     EXPECT_FALSE(FPDFText_GetMatrix(text_page.get(), -1, &matrix));
1546     EXPECT_FALSE(FPDFText_GetMatrix(text_page.get(), 0, nullptr));
1547   }
1548 
1549   UnloadPage(page);
1550 }
1551