1 /*
2 * Copyright (C) 2024 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "page.h"
18
19 #include <stddef.h>
20 #include <stdint.h>
21
22 #include <algorithm>
23 #include <limits>
24 #include <span>
25 #include <string>
26 #include <vector>
27
28 #include "cpp/fpdf_scopers.h"
29 #include "form_filler.h"
30 #include "form_widget_info.h"
31 #include "fpdf_annot.h"
32 #include "fpdf_doc.h"
33 #include "fpdf_text.h"
34 #include "fpdfview.h"
35 #include "logging.h"
36 #include "normalize.h"
37 #include "rect.h"
38 #include "utf.h"
39 #include "utils/annot_hider.h"
40 #include "utils/text.h"
41
42 #define LOG_TAG "page"
43
44 using std::vector;
45
46 namespace pdfClient {
47
48 static const int kBytesPerPixel = 4;
49
50 static const Rectangle_i kEmptyIntRectangle = IntRect(0, 0, 0, 0);
51
52 // The acceptable fatness / inaccuracy of a user's finger in points.
53 static const int kFingerTolerance = 10;
54
55 static const int RENDER_MODE_FOR_DISPLAY = 1;
56 static const int RENDER_MODE_FOR_PRINT = 2;
57
Page(FPDF_DOCUMENT doc,int page_num,FormFiller * form_filler)58 Page::Page(FPDF_DOCUMENT doc, int page_num, FormFiller* form_filler)
59 : document_(doc),
60 page_(FPDF_LoadPage(doc, page_num)),
61 form_filler_(form_filler),
62 invalid_rect_(kEmptyIntRectangle) {}
63
64 Page::Page(Page&& p) = default;
65
~Page()66 Page::~Page() {}
67
Width() const68 int Page::Width() const {
69 return FPDF_GetPageWidth(page_.get());
70 }
71
Height() const72 int Page::Height() const {
73 return FPDF_GetPageHeight(page_.get());
74 }
75
Dimensions() const76 Rectangle_i Page::Dimensions() const {
77 return IntRect(0, 0, Width(), Height());
78 }
79
Render(FPDF_BITMAP bitmap,FS_MATRIX transform,int clip_left,int clip_top,int clip_right,int clip_bottom,int render_mode,int show_annot_types,bool render_form_fields)80 void Page::Render(FPDF_BITMAP bitmap, FS_MATRIX transform, int clip_left, int clip_top,
81 int clip_right, int clip_bottom, int render_mode, int show_annot_types,
82 bool render_form_fields) {
83 std::unordered_set<int> types;
84 for (auto renderFlag_annot : renderFlagsAnnotsMap) {
85 if ((renderFlag_annot.first & show_annot_types) != 0) {
86 for (int annot_type : renderFlag_annot.second) {
87 types.insert(annot_type);
88 }
89 }
90 }
91 if (render_form_fields) types.insert(FPDF_ANNOT_WIDGET);
92 pdfClient_utils::AnnotHider annot_hider(page_.get(), types);
93 int renderFlags = FPDF_REVERSE_BYTE_ORDER;
94 if (render_mode == RENDER_MODE_FOR_DISPLAY) {
95 renderFlags |= FPDF_LCD_TEXT | FPDF_ANNOT;
96 } else if (render_mode == RENDER_MODE_FOR_PRINT) {
97 renderFlags |= FPDF_PRINTING;
98 }
99
100 FS_RECTF clip = {(float)clip_left, (float)clip_top, (float)clip_right, (float)clip_bottom};
101 FPDF_RenderPageBitmapWithMatrix(bitmap, page_.get(), &transform, &clip, renderFlags);
102
103 if (render_form_fields) {
104 form_filler_->RenderTile(page_.get(), bitmap, transform, clip, renderFlags);
105 }
106 }
107
ApplyPageTransform(const Point_d & input) const108 Point_i Page::ApplyPageTransform(const Point_d& input) const {
109 Point_i output;
110 FPDF_PageToDevice(page_.get(), 0, 0, Width(), Height(), 0, input.x, input.y, &output.x,
111 &output.y);
112 return output;
113 }
114
ApplyPageTransform(const Rectangle_d & input) const115 Rectangle_i Page::ApplyPageTransform(const Rectangle_d& input) const {
116 return ApplyPageTransform(OuterIntRect(input));
117 }
118
ApplyPageTransform(const Rectangle_i & input) const119 Rectangle_i Page::ApplyPageTransform(const Rectangle_i& input) const {
120 Point_i output1, output2;
121 FPDF_PageToDevice(page_.get(), 0, 0, Width(), Height(), 0, input.left, input.top, &output1.x,
122 &output1.y);
123 FPDF_PageToDevice(page_.get(), 0, 0, Width(), Height(), 0, input.right, input.bottom,
124 &output2.x, &output2.y);
125
126 Rectangle_i output = IntRect(output1, output2);
127 // Constrain output within the page.
128 output = Intersect(output, Dimensions());
129 return output;
130 }
131
UnapplyPageTransform(const Point_i & input) const132 Point_d Page::UnapplyPageTransform(const Point_i& input) const {
133 Point_d output;
134 FPDF_DeviceToPage(page_.get(), 0, 0, Width(), Height(), 0, input.x, input.y, &output.x,
135 &output.y);
136 return output;
137 }
138
NumChars()139 int Page::NumChars() {
140 return FPDFText_CountChars(text_page());
141 }
142
GetUnicode(int char_index)143 uint32_t Page::GetUnicode(int char_index) {
144 return FPDFText_GetUnicode(text_page(), char_index);
145 }
146
GetTextUtf8()147 std::string Page::GetTextUtf8() {
148 return GetTextUtf8(first_printable_char_index(), last_printable_char_index() + 1);
149 }
150
GetTextUtf8(const int start_index,const int stop_index)151 std::string Page::GetTextUtf8(const int start_index, const int stop_index) {
152 std::string result;
153 for (int i = start_index; i < stop_index; i++) {
154 AppendpdfClientCodepointAsUtf8(GetUnicode(i), &result);
155 }
156 return result;
157 }
158
GetAltTextUtf8(vector<std::string> * result) const159 void Page::GetAltTextUtf8(vector<std::string>* result) const {
160 ::pdfClient_utils::GetAltText(page_.get(), result);
161 }
162
FindMatchesUtf8(std::string_view utf8,vector<TextRange> * matches)163 int Page::FindMatchesUtf8(std::string_view utf8, vector<TextRange>* matches) {
164 std::u32string query(Utf8ToUtf32(utf8));
165 // Normalize characters of string for searching - ignore case and accents.
166 NormalizeStringForSearch(&query);
167 TextRange match;
168 int page_start = first_printable_char_index();
169 int page_stop = last_printable_char_index() + 1;
170 int num_matches = 0;
171 while (FindMatch(query, page_start, page_stop, &match)) {
172 if (matches != nullptr) {
173 matches->push_back(match);
174 }
175 num_matches++;
176 page_start = match.second;
177 }
178 return num_matches;
179 }
180
BoundsOfMatchesUtf8(std::string_view utf8,vector<Rectangle_i> * rects,vector<int> * match_to_rect,vector<int> * char_indexes)181 int Page::BoundsOfMatchesUtf8(std::string_view utf8, vector<Rectangle_i>* rects,
182 vector<int>* match_to_rect, vector<int>* char_indexes) {
183 vector<TextRange> matches;
184 int num_matches = FindMatchesUtf8(utf8, &matches);
185 int num_rects = 0;
186 int num_matches_with_rects = 0;
187 for (int i = 0; i < num_matches; i++) {
188 int start = matches[i].first, stop = matches[i].second;
189 int num_rects_for_match = GetTextBounds(start, stop, rects);
190 if (num_rects_for_match == 0) {
191 continue;
192 }
193 if (match_to_rect != nullptr) {
194 match_to_rect->push_back(num_rects);
195 }
196 if (char_indexes != nullptr) {
197 char_indexes->push_back(start);
198 }
199 num_rects += num_rects_for_match;
200 num_matches_with_rects++;
201 }
202 return num_matches_with_rects;
203 }
204
GetTextBounds(const int start_index,const int stop_index,vector<Rectangle_i> * rects)205 int Page::GetTextBounds(const int start_index, const int stop_index, vector<Rectangle_i>* rects) {
206 int num_rects = 0;
207 Rectangle_d rect = DoubleRect(0, 0, 0, 0);
208 for (int index = start_index; index < stop_index; index++) {
209 double x1, x2, y1, y2;
210 // This call doesn't apply the page transform - have to apply later.
211 FPDFText_GetCharBox(text_page(), index, &x1, &x2, &y1, &y2);
212 if (x1 != x2 && y1 != y2) {
213 if (IsEmpty(rect)) {
214 rect = DoubleRect(x1, y1, x2, y2);
215 } else {
216 rect = Union(rect, DoubleRect(x1, y1, x2, y2));
217 }
218 }
219 // Starting a new line - push current rect, start a new rect.
220 if (IsLineBreak(GetUnicode(index))) {
221 if (!IsEmpty(rect)) {
222 num_rects++;
223 rects->push_back(ApplyPageTransform(rect));
224 }
225 rect = DoubleRect(0, 0, 0, 0);
226 }
227 }
228 // Push the last current rect.
229 if (!IsEmpty(rect)) {
230 num_rects++;
231 rects->push_back(ApplyPageTransform(rect));
232 }
233 return num_rects;
234 }
235
SelectWordAt(const Point_i & point,SelectionBoundary * start,SelectionBoundary * stop)236 bool Page::SelectWordAt(const Point_i& point, SelectionBoundary* start, SelectionBoundary* stop) {
237 Point_d char_point = UnapplyPageTransform(point);
238 int char_index = FPDFText_GetCharIndexAtPos(text_page(), char_point.x, char_point.y,
239 kFingerTolerance, kFingerTolerance);
240 if (char_index < 0 || IsWordBreak(GetUnicode(char_index))) {
241 return false; // No word at the given point to select.
242 }
243 start->index = GetWordStartIndex(char_index);
244 stop->index = GetWordStopIndex(char_index);
245 ConstrainBoundary(start);
246 ConstrainBoundary(stop);
247 return true;
248 }
249
ConstrainBoundary(SelectionBoundary * boundary)250 void Page::ConstrainBoundary(SelectionBoundary* boundary) {
251 if (boundary->index < 0) {
252 // Index is not specified - find the nearest index to the given point.
253 *boundary = GetBoundaryAtPoint(boundary->point);
254 } else {
255 // Index is specified - find the point at that index.
256 int index = std::max(boundary->index, first_printable_char_index());
257 index = std::min(index, last_printable_char_index() + 1);
258 *boundary = GetBoundaryAtIndex(index);
259 }
260 }
261
GetFontSize(int index)262 int Page::GetFontSize(int index) {
263 return FPDFText_GetFontSize(text_page(), index);
264 }
265
GetLinksUtf8(vector<Rectangle_i> * rects,vector<int> * link_to_rect,vector<std::string> * urls) const266 int Page::GetLinksUtf8(vector<Rectangle_i>* rects, vector<int>* link_to_rect,
267 vector<std::string>* urls) const {
268 return GetAnnotatedLinksUtf8(rects, link_to_rect, urls) +
269 GetInferredLinksUtf8(rects, link_to_rect, urls);
270 }
271
GetGotoLinks() const272 vector<GotoLink> Page::GetGotoLinks() const {
273 vector<GotoLink> links;
274
275 FPDF_LINK link = nullptr;
276 int pos = 0;
277 while (FPDFLink_Enumerate(page_.get(), &pos, &link)) {
278 if (!IsGotoLink(link)) {
279 continue;
280 }
281 // Get the bounds of the actual link
282 vector<Rectangle_i> goto_link_rects;
283 Rectangle_i rect = GetRect(link);
284 goto_link_rects.push_back(rect);
285
286 GotoLinkDest* goto_link_dest = new GotoLinkDest();
287
288 // Get and parse the destination
289 FPDF_DEST fpdf_dest = FPDFLink_GetDest(document_, link);
290 goto_link_dest->set_page_number(FPDFDest_GetDestPageIndex(document_, fpdf_dest));
291
292 FPDF_BOOL has_x_coord;
293 FPDF_BOOL has_y_coord;
294 FPDF_BOOL has_zoom;
295 FS_FLOAT x;
296 FS_FLOAT y;
297 FS_FLOAT zoom;
298 FPDF_BOOL success = FPDFDest_GetLocationInPage(fpdf_dest, &has_x_coord, &has_y_coord,
299 &has_zoom, &x, &y, &zoom);
300
301 if (!success) {
302 continue;
303 }
304 if (has_x_coord) {
305 auto point = DoublePoint(x, 0);
306 auto tPoint = ApplyPageTransform(point);
307 goto_link_dest->set_x(tPoint.x);
308 }
309 if (has_y_coord) {
310 auto point = DoublePoint(0, y);
311 auto tPoint = ApplyPageTransform(point);
312 goto_link_dest->set_y(tPoint.y);
313 }
314 if (has_zoom) {
315 goto_link_dest->set_zoom(zoom);
316 }
317
318 GotoLink goto_link = GotoLink{goto_link_rects, *goto_link_dest};
319
320 // Ensure that links are within page bounds
321 if (goto_link_dest->x >= 0 && goto_link_dest->y >= 0) {
322 links.push_back(goto_link);
323 } else {
324 LOGE("Goto Link out of bound (x=%f, y=%f). Page width=%d, height =%d",
325 goto_link_dest->x, goto_link_dest->y, Width(), Height());
326 }
327 }
328 return links;
329 }
330
InitializeFormFilling()331 void Page::InitializeFormFilling() {
332 form_filler_->NotifyAfterPageLoad(page_.get());
333 }
334
TerminateFormFilling()335 void Page::TerminateFormFilling() {
336 form_filler_->NotifyBeforePageClose(page_.get());
337 }
338
GetFormWidgetInfo(Point_i point)339 FormWidgetInfo Page::GetFormWidgetInfo(Point_i point) {
340 Point_d page_point = UnapplyPageTransform(point);
341 FormWidgetInfo result = form_filler_->GetFormWidgetInfo(page_.get(), page_point);
342 if (result.FoundWidget()) {
343 // widget_rect is in page coords, transform to device coords before
344 // returning to user.
345 Rectangle_i transformed_widget_rect = ApplyPageTransform(result.widget_rect());
346 result.set_widget_rect(transformed_widget_rect);
347 }
348
349 // Consume any rectangle that was invalidated by this action. Some
350 // info-gathering actions may cause temporary invalidation without
351 // actually doing anything that we need to redraw for.
352 ConsumeInvalidRect();
353 return result;
354 }
355
GetFormWidgetInfo(int annotation_index)356 FormWidgetInfo Page::GetFormWidgetInfo(int annotation_index) {
357 FormWidgetInfo result = form_filler_->GetFormWidgetInfo(page_.get(), annotation_index);
358 if (result.FoundWidget()) {
359 // widget_rect is in page coords; transform to device coords before
360 // returning to user.
361 Rectangle_i transformed_widget_rect = ApplyPageTransform(result.widget_rect());
362 result.set_widget_rect(transformed_widget_rect);
363 }
364
365 // Consume any rectangle that was invalidated by this action. Some
366 // info-gathering actions may cause temporary invalidation without
367 // actually doing anything that we need to redraw for.
368 ConsumeInvalidRect();
369 return result;
370 }
371
GetFormWidgetInfos(const std::unordered_set<int> & type_ids,std::vector<FormWidgetInfo> * widget_infos)372 void Page::GetFormWidgetInfos(const std::unordered_set<int>& type_ids,
373 std::vector<FormWidgetInfo>* widget_infos) {
374 form_filler_->GetFormWidgetInfos(page_.get(), type_ids, widget_infos);
375 for (FormWidgetInfo& widget_info : *widget_infos) {
376 // widget_rect is in page coords; transform to device coords before
377 // returning to user.
378 Rectangle_i transformed_widget_rect = ApplyPageTransform(widget_info.widget_rect());
379 widget_info.set_widget_rect(transformed_widget_rect);
380 }
381
382 // Consume any rectangles that were invalidated by this action. Some
383 // info-gathering actions may cause temporary invalidation without
384 // actually doing anything that we need to redraw for.
385 ConsumeInvalidRect();
386 }
387
ClickOnPoint(Point_i point)388 bool Page::ClickOnPoint(Point_i point) {
389 Point_d page_point = UnapplyPageTransform(point);
390 return form_filler_->ClickOnPoint(page_.get(), page_point);
391 }
SetFormFieldText(int annotation_index,std::string_view text)392 bool Page::SetFormFieldText(int annotation_index, std::string_view text) {
393 return form_filler_->SetText(page_.get(), annotation_index, text);
394 }
395
SetChoiceSelection(int annotation_index,std::span<const int> selected_indices)396 bool Page::SetChoiceSelection(int annotation_index, std::span<const int> selected_indices) {
397 return form_filler_->SetChoiceSelection(page_.get(), annotation_index, selected_indices);
398 }
NotifyInvalidRect(Rectangle_i rect)399 void Page::NotifyInvalidRect(Rectangle_i rect) {
400 if (rect.left < 0 || rect.top < 0 || rect.right < 0 || rect.bottom < 0 || IsEmpty(rect)) {
401 return;
402 }
403
404 Rectangle_i device_rect = ApplyPageTransform(rect);
405 // If invalid_rect_ is currently empty, avoid unioning so we don't extend
406 // |rect|'s top left corner to (0,0) for no reason.
407 if (IsEmpty(invalid_rect_)) {
408 invalid_rect_ = device_rect;
409 return;
410 }
411
412 invalid_rect_ = Union(invalid_rect_, device_rect);
413 }
414
HasInvalidRect()415 bool Page::HasInvalidRect() {
416 return !IsEmpty(invalid_rect_);
417 }
418
ConsumeInvalidRect()419 Rectangle_i Page::ConsumeInvalidRect() {
420 Rectangle_i copy = invalid_rect_;
421 invalid_rect_ = kEmptyIntRectangle;
422 return copy;
423 }
424
page()425 void* Page::page() {
426 return page_.get();
427 }
428
text_page()429 FPDF_TEXTPAGE Page::text_page() {
430 EnsureTextPageInitialized();
431 return text_page_.get();
432 }
433
first_printable_char_index()434 int Page::first_printable_char_index() {
435 EnsureTextPageInitialized();
436 return first_printable_char_index_;
437 }
438
last_printable_char_index()439 int Page::last_printable_char_index() {
440 EnsureTextPageInitialized();
441 return last_printable_char_index_;
442 }
443
EnsureTextPageInitialized()444 void Page::EnsureTextPageInitialized() {
445 if (text_page_) {
446 return;
447 }
448 text_page_.reset(FPDFText_LoadPage(page_.get()));
449
450 int num_chars = NumChars();
451
452 int i;
453 for (i = 0; i < num_chars && IsWordBreak(GetUnicode(i)); i++) {
454 }
455 first_printable_char_index_ = i;
456
457 for (i = num_chars - 1; i >= first_printable_char_index_ && IsWordBreak(GetUnicode(i)); i--) {
458 }
459 last_printable_char_index_ = i;
460 }
461
InPlaceSwapRedBlueChannels(void * pixels,const int num_pixels) const462 void Page::InPlaceSwapRedBlueChannels(void* pixels, const int num_pixels) const {
463 uint8_t* channels = static_cast<uint8_t*>(pixels);
464 uint8_t* channel1 = channels;
465 uint8_t* channel3 = channels + 2;
466
467 for (int i = 0; i < num_pixels; ++i, channel1 += kBytesPerPixel, channel3 += kBytesPerPixel) {
468 std::swap(*channel1, *channel3);
469 }
470 }
471
FindMatch(const std::u32string & query,const int page_start,const int page_stop,TextRange * match)472 bool Page::FindMatch(const std::u32string& query, const int page_start, const int page_stop,
473 TextRange* match) {
474 if (query.empty()) {
475 return false;
476 }
477
478 int max_match_start = page_stop - query.length();
479 for (int m = page_start; m <= max_match_start; m++) {
480 if (IsMatch(query, m, page_stop, match)) {
481 return true;
482 }
483 }
484 return false;
485 }
486
IsMatch(const std::u32string & query,const int match_start,const int page_stop,TextRange * match)487 bool Page::IsMatch(const std::u32string& query, const int match_start, const int page_stop,
488 TextRange* match) {
489 int page_index = match_start;
490 size_t query_index = 0;
491 uint32_t page_char = 0, prev_char = 0;
492 while (query_index < query.length()) {
493 prev_char = page_char;
494 page_char = GetUnicode(page_index);
495
496 if (NormalizeForSearch(page_char) == query[query_index]) {
497 // This codepoint matches (ignoring case and accents). Move to next.
498 query_index++;
499 page_index++;
500 } else if (IsSkippableForSearch(page_char, prev_char) && query_index > 0) {
501 // Don't increment query index - skip over skippable character.
502 page_index++;
503 if ((page_stop - page_index) < (query.length() - query_index)) {
504 return false; // Not enough room for query string before page_stop.
505 }
506 } else {
507 return false;
508 }
509 }
510 // Update match to contain page indices of match start and match stop.
511 match->first = match_start;
512 match->second = page_index;
513 return true;
514 }
515
GetBoundaryAtIndex(const int index)516 SelectionBoundary Page::GetBoundaryAtIndex(const int index) {
517 return GetBoundaryAtIndex(index, IsRtlAtIndex(index));
518 }
519
IsRtlAtIndex(const int index)520 bool Page::IsRtlAtIndex(const int index) {
521 int start_index = GetWordStartIndex(index);
522 int stop_index = GetWordStopIndex(index);
523 int word_length = stop_index - start_index;
524 if (word_length <= 1) {
525 // Can't tell directionality from a single character, guess LTR.
526 return false;
527 }
528 Rectangle_i start_bounds = GetCharBounds(start_index);
529 Rectangle_i stop_bounds = GetCharBounds(stop_index - 1);
530 return start_bounds.Center().x > stop_bounds.Center().x;
531 }
532
GetBoundaryAtIndex(const int index,bool is_rtl)533 SelectionBoundary Page::GetBoundaryAtIndex(const int index, bool is_rtl) {
534 // Normally we align the boundary on the start edge of next character:
535 int char_index = index;
536 bool use_end_edge = false;
537
538 // Printable characters have well defined bounding boxes, word-breaks (spaces
539 // and newlines) may not - so we use the end edge of the previous printable
540 // character instead if the next character is not printable.
541 if (index == NumChars() || IsWordBreak(GetUnicode(index))) {
542 char_index = index - 1;
543 use_end_edge = true;
544 }
545 bool use_right_edge = use_end_edge ^ is_rtl;
546
547 SelectionBoundary boundary(index, 0, 0, is_rtl);
548 Rectangle_i char_bounds = GetCharBounds(char_index);
549 boundary.point.x = use_right_edge ? char_bounds.right : char_bounds.left;
550 // Use the baseline (not the bottom) of the char as the y-value.
551 boundary.point.y = GetCharOrigin(char_index).y;
552 return boundary;
553 }
554
GetBoundaryAtPoint(const Point_i & point)555 SelectionBoundary Page::GetBoundaryAtPoint(const Point_i& point) {
556 SelectionBoundary best_boundary(0, point.x, point.y, false);
557 int best_distance_sq = std::numeric_limits<int>::max();
558
559 bool prev_char_is_word_char = false;
560 bool is_rtl = false;
561 for (int index = first_printable_char_index(); index <= last_printable_char_index() + 1;
562 index++) {
563 bool cur_char_is_word_char =
564 (index <= last_printable_char_index()) && !IsWordBreak(GetUnicode(index));
565 // Starting a new word:
566 if (cur_char_is_word_char && !prev_char_is_word_char) {
567 // Finding out RTL involves looking at each end of the word,
568 // so we only do it at the start of each word:
569 is_rtl = IsRtlAtIndex(index);
570 }
571 if (cur_char_is_word_char || prev_char_is_word_char) {
572 SelectionBoundary boundary = GetBoundaryAtIndex(index, is_rtl);
573 int dx = boundary.point.x - point.x;
574 int dy = boundary.point.y - point.y;
575 int distance_sq = dx * dx + dy * dy;
576 if (distance_sq < best_distance_sq) {
577 best_boundary = boundary;
578 best_distance_sq = distance_sq;
579 }
580 }
581 prev_char_is_word_char = cur_char_is_word_char;
582 }
583 return best_boundary;
584 }
585
GetWordStartIndex(const int index)586 int Page::GetWordStartIndex(const int index) {
587 int start_index = index;
588 while (start_index > 0 && !IsWordBreak(GetUnicode(start_index - 1))) {
589 --start_index; // Move start_index to the start of the word.
590 }
591 return start_index;
592 }
593
GetWordStopIndex(const int index)594 int Page::GetWordStopIndex(const int index) {
595 int stop_index = index;
596 int num_chars = NumChars();
597 while (stop_index < num_chars && !IsWordBreak(GetUnicode(stop_index))) {
598 ++stop_index; // Move stop_index to the end of the word.
599 }
600 return stop_index;
601 }
602
GetRawCharBounds(const int char_index)603 Rectangle_d Page::GetRawCharBounds(const int char_index) {
604 double x1, x2, y1, y2;
605 FPDFText_GetCharBox(text_page(), char_index, &x1, &x2, &y1, &y2);
606 return DoubleRect(x1, y1, x2, y2);
607 }
608
GetCharBounds(const int char_index)609 Rectangle_i Page::GetCharBounds(const int char_index) {
610 return ApplyPageTransform(GetRawCharBounds(char_index));
611 }
612
GetCharOrigin(const int char_index)613 Point_i Page::GetCharOrigin(const int char_index) {
614 double x = 0.0, y = 0.0;
615 FPDFText_GetCharOrigin(text_page(), char_index, &x, &y);
616 return ApplyPageTransform(DoublePoint(x, y));
617 }
618
GetAnnotatedLinksUtf8(vector<Rectangle_i> * rects,vector<int> * link_to_rect,vector<std::string> * urls) const619 int Page::GetAnnotatedLinksUtf8(vector<Rectangle_i>* rects, vector<int>* link_to_rect,
620 vector<std::string>* urls) const {
621 FPDF_LINK link = nullptr;
622 int pos = 0;
623 int num_links_with_rect = 0;
624 while (FPDFLink_Enumerate(page_.get(), &pos, &link)) {
625 if (!IsUrlLink(link)) {
626 continue;
627 }
628
629 std::string url = GetUrlUtf8(link);
630 Rectangle_i rect = GetRect(link);
631 if (IsEmpty(rect)) {
632 continue;
633 }
634
635 link_to_rect->push_back(rects->size());
636 rects->push_back(rect);
637 urls->push_back(url);
638 num_links_with_rect++;
639 }
640 return num_links_with_rect;
641 }
642
GetInferredLinksUtf8(vector<Rectangle_i> * rects,vector<int> * link_to_rect,vector<std::string> * urls) const643 int Page::GetInferredLinksUtf8(vector<Rectangle_i>* rects, vector<int>* link_to_rect,
644 vector<std::string>* urls) const {
645 // TODO(b/312730882): Infer links by looking for http:// and similar and for
646 // email addresses to use as mailto: links. There are some pdfClient methods for
647 // doing this, but these have some bugs which need patching or working around.
648 return 0;
649 }
650
GetUrlUtf8(FPDF_LINK link) const651 std::string Page::GetUrlUtf8(FPDF_LINK link) const {
652 FPDF_ACTION action = FPDFLink_GetAction(link);
653 // Allocate a string big enough to hold the URL.
654 std::string url(FPDFAction_GetURIPath(document_, action, nullptr, 0), '\0');
655 // Then write the URL to it.
656 FPDFAction_GetURIPath(document_, action, &url[0], url.length());
657 EraseTrailingNulls(&url);
658 return url;
659 }
660
GetRect(FPDF_LINK link) const661 Rectangle_i Page::GetRect(FPDF_LINK link) const {
662 FS_RECTF r;
663 if (!FPDFLink_GetAnnotRect(link, &r)) {
664 return Rectangle_i();
665 }
666
667 Rectangle_d rect_d = DoubleRect(r.left, r.top, r.right, r.bottom);
668 return ApplyPageTransform(rect_d);
669 }
670
IsGotoLink(FPDF_LINK link) const671 bool Page::IsGotoLink(FPDF_LINK link) const {
672 FPDF_ACTION action = FPDFLink_GetAction(link);
673 return action != nullptr && FPDFAction_GetType(action) == PDFACTION_GOTO;
674 }
675
IsUrlLink(FPDF_LINK link) const676 bool Page::IsUrlLink(FPDF_LINK link) const {
677 FPDF_ACTION action = FPDFLink_GetAction(link);
678 return action != nullptr && FPDFAction_GetType(action) == PDFACTION_URI;
679 }
680
681 } // namespace pdfClient