1 /*
2  * Copyright (C) 2024 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "page.h"
18 
19 #include <stddef.h>
20 #include <stdint.h>
21 
22 #include <algorithm>
23 #include <limits>
24 #include <span>
25 #include <string>
26 #include <vector>
27 
28 #include "cpp/fpdf_scopers.h"
29 #include "form_filler.h"
30 #include "form_widget_info.h"
31 #include "fpdf_annot.h"
32 #include "fpdf_doc.h"
33 #include "fpdf_text.h"
34 #include "fpdfview.h"
35 #include "logging.h"
36 #include "normalize.h"
37 #include "rect.h"
38 #include "utf.h"
39 #include "utils/annot_hider.h"
40 #include "utils/text.h"
41 
42 #define LOG_TAG "page"
43 
44 using std::vector;
45 
46 namespace pdfClient {
47 
48 static const int kBytesPerPixel = 4;
49 
50 static const Rectangle_i kEmptyIntRectangle = IntRect(0, 0, 0, 0);
51 
52 // The acceptable fatness / inaccuracy of a user's finger in points.
53 static const int kFingerTolerance = 10;
54 
55 static const int RENDER_MODE_FOR_DISPLAY = 1;
56 static const int RENDER_MODE_FOR_PRINT = 2;
57 
Page(FPDF_DOCUMENT doc,int page_num,FormFiller * form_filler)58 Page::Page(FPDF_DOCUMENT doc, int page_num, FormFiller* form_filler)
59     : document_(doc),
60       page_(FPDF_LoadPage(doc, page_num)),
61       form_filler_(form_filler),
62       invalid_rect_(kEmptyIntRectangle) {}
63 
64 Page::Page(Page&& p) = default;
65 
~Page()66 Page::~Page() {}
67 
Width() const68 int Page::Width() const {
69     return FPDF_GetPageWidth(page_.get());
70 }
71 
Height() const72 int Page::Height() const {
73     return FPDF_GetPageHeight(page_.get());
74 }
75 
Dimensions() const76 Rectangle_i Page::Dimensions() const {
77     return IntRect(0, 0, Width(), Height());
78 }
79 
Render(FPDF_BITMAP bitmap,FS_MATRIX transform,int clip_left,int clip_top,int clip_right,int clip_bottom,int render_mode,int show_annot_types,bool render_form_fields)80 void Page::Render(FPDF_BITMAP bitmap, FS_MATRIX transform, int clip_left, int clip_top,
81                   int clip_right, int clip_bottom, int render_mode, int show_annot_types,
82                   bool render_form_fields) {
83     std::unordered_set<int> types;
84     for (auto renderFlag_annot : renderFlagsAnnotsMap) {
85         if ((renderFlag_annot.first & show_annot_types) != 0) {
86             for (int annot_type : renderFlag_annot.second) {
87                 types.insert(annot_type);
88             }
89         }
90     }
91     if (render_form_fields) types.insert(FPDF_ANNOT_WIDGET);
92     pdfClient_utils::AnnotHider annot_hider(page_.get(), types);
93     int renderFlags = FPDF_REVERSE_BYTE_ORDER;
94     if (render_mode == RENDER_MODE_FOR_DISPLAY) {
95         renderFlags |= FPDF_LCD_TEXT | FPDF_ANNOT;
96     } else if (render_mode == RENDER_MODE_FOR_PRINT) {
97         renderFlags |= FPDF_PRINTING;
98     }
99 
100     FS_RECTF clip = {(float)clip_left, (float)clip_top, (float)clip_right, (float)clip_bottom};
101     FPDF_RenderPageBitmapWithMatrix(bitmap, page_.get(), &transform, &clip, renderFlags);
102 
103     if (render_form_fields) {
104         form_filler_->RenderTile(page_.get(), bitmap, transform, clip, renderFlags);
105     }
106 }
107 
ApplyPageTransform(const Point_d & input) const108 Point_i Page::ApplyPageTransform(const Point_d& input) const {
109     Point_i output;
110     FPDF_PageToDevice(page_.get(), 0, 0, Width(), Height(), 0, input.x, input.y, &output.x,
111                       &output.y);
112     return output;
113 }
114 
ApplyPageTransform(const Rectangle_d & input) const115 Rectangle_i Page::ApplyPageTransform(const Rectangle_d& input) const {
116     return ApplyPageTransform(OuterIntRect(input));
117 }
118 
ApplyPageTransform(const Rectangle_i & input) const119 Rectangle_i Page::ApplyPageTransform(const Rectangle_i& input) const {
120     Point_i output1, output2;
121     FPDF_PageToDevice(page_.get(), 0, 0, Width(), Height(), 0, input.left, input.top, &output1.x,
122                       &output1.y);
123     FPDF_PageToDevice(page_.get(), 0, 0, Width(), Height(), 0, input.right, input.bottom,
124                       &output2.x, &output2.y);
125 
126     Rectangle_i output = IntRect(output1, output2);
127     // Constrain output within the page.
128     output = Intersect(output, Dimensions());
129     return output;
130 }
131 
UnapplyPageTransform(const Point_i & input) const132 Point_d Page::UnapplyPageTransform(const Point_i& input) const {
133     Point_d output;
134     FPDF_DeviceToPage(page_.get(), 0, 0, Width(), Height(), 0, input.x, input.y, &output.x,
135                       &output.y);
136     return output;
137 }
138 
NumChars()139 int Page::NumChars() {
140     return FPDFText_CountChars(text_page());
141 }
142 
GetUnicode(int char_index)143 uint32_t Page::GetUnicode(int char_index) {
144     return FPDFText_GetUnicode(text_page(), char_index);
145 }
146 
GetTextUtf8()147 std::string Page::GetTextUtf8() {
148     return GetTextUtf8(first_printable_char_index(), last_printable_char_index() + 1);
149 }
150 
GetTextUtf8(const int start_index,const int stop_index)151 std::string Page::GetTextUtf8(const int start_index, const int stop_index) {
152     std::string result;
153     for (int i = start_index; i < stop_index; i++) {
154         AppendpdfClientCodepointAsUtf8(GetUnicode(i), &result);
155     }
156     return result;
157 }
158 
GetAltTextUtf8(vector<std::string> * result) const159 void Page::GetAltTextUtf8(vector<std::string>* result) const {
160     ::pdfClient_utils::GetAltText(page_.get(), result);
161 }
162 
FindMatchesUtf8(std::string_view utf8,vector<TextRange> * matches)163 int Page::FindMatchesUtf8(std::string_view utf8, vector<TextRange>* matches) {
164     std::u32string query(Utf8ToUtf32(utf8));
165     // Normalize characters of string for searching - ignore case and accents.
166     NormalizeStringForSearch(&query);
167     TextRange match;
168     int page_start = first_printable_char_index();
169     int page_stop = last_printable_char_index() + 1;
170     int num_matches = 0;
171     while (FindMatch(query, page_start, page_stop, &match)) {
172         if (matches != nullptr) {
173             matches->push_back(match);
174         }
175         num_matches++;
176         page_start = match.second;
177     }
178     return num_matches;
179 }
180 
BoundsOfMatchesUtf8(std::string_view utf8,vector<Rectangle_i> * rects,vector<int> * match_to_rect,vector<int> * char_indexes)181 int Page::BoundsOfMatchesUtf8(std::string_view utf8, vector<Rectangle_i>* rects,
182                               vector<int>* match_to_rect, vector<int>* char_indexes) {
183     vector<TextRange> matches;
184     int num_matches = FindMatchesUtf8(utf8, &matches);
185     int num_rects = 0;
186     int num_matches_with_rects = 0;
187     for (int i = 0; i < num_matches; i++) {
188         int start = matches[i].first, stop = matches[i].second;
189         int num_rects_for_match = GetTextBounds(start, stop, rects);
190         if (num_rects_for_match == 0) {
191             continue;
192         }
193         if (match_to_rect != nullptr) {
194             match_to_rect->push_back(num_rects);
195         }
196         if (char_indexes != nullptr) {
197             char_indexes->push_back(start);
198         }
199         num_rects += num_rects_for_match;
200         num_matches_with_rects++;
201     }
202     return num_matches_with_rects;
203 }
204 
GetTextBounds(const int start_index,const int stop_index,vector<Rectangle_i> * rects)205 int Page::GetTextBounds(const int start_index, const int stop_index, vector<Rectangle_i>* rects) {
206     int num_rects = 0;
207     Rectangle_d rect = DoubleRect(0, 0, 0, 0);
208     for (int index = start_index; index < stop_index; index++) {
209         double x1, x2, y1, y2;
210         // This call doesn't apply the page transform - have to apply later.
211         FPDFText_GetCharBox(text_page(), index, &x1, &x2, &y1, &y2);
212         if (x1 != x2 && y1 != y2) {
213             if (IsEmpty(rect)) {
214                 rect = DoubleRect(x1, y1, x2, y2);
215             } else {
216                 rect = Union(rect, DoubleRect(x1, y1, x2, y2));
217             }
218         }
219         // Starting a new line - push current rect, start a new rect.
220         if (IsLineBreak(GetUnicode(index))) {
221             if (!IsEmpty(rect)) {
222                 num_rects++;
223                 rects->push_back(ApplyPageTransform(rect));
224             }
225             rect = DoubleRect(0, 0, 0, 0);
226         }
227     }
228     // Push the last current rect.
229     if (!IsEmpty(rect)) {
230         num_rects++;
231         rects->push_back(ApplyPageTransform(rect));
232     }
233     return num_rects;
234 }
235 
SelectWordAt(const Point_i & point,SelectionBoundary * start,SelectionBoundary * stop)236 bool Page::SelectWordAt(const Point_i& point, SelectionBoundary* start, SelectionBoundary* stop) {
237     Point_d char_point = UnapplyPageTransform(point);
238     int char_index = FPDFText_GetCharIndexAtPos(text_page(), char_point.x, char_point.y,
239                                                 kFingerTolerance, kFingerTolerance);
240     if (char_index < 0 || IsWordBreak(GetUnicode(char_index))) {
241         return false;  // No word at the given point to select.
242     }
243     start->index = GetWordStartIndex(char_index);
244     stop->index = GetWordStopIndex(char_index);
245     ConstrainBoundary(start);
246     ConstrainBoundary(stop);
247     return true;
248 }
249 
ConstrainBoundary(SelectionBoundary * boundary)250 void Page::ConstrainBoundary(SelectionBoundary* boundary) {
251     if (boundary->index < 0) {
252         // Index is not specified - find the nearest index to the given point.
253         *boundary = GetBoundaryAtPoint(boundary->point);
254     } else {
255         // Index is specified - find the point at that index.
256         int index = std::max(boundary->index, first_printable_char_index());
257         index = std::min(index, last_printable_char_index() + 1);
258         *boundary = GetBoundaryAtIndex(index);
259     }
260 }
261 
GetFontSize(int index)262 int Page::GetFontSize(int index) {
263     return FPDFText_GetFontSize(text_page(), index);
264 }
265 
GetLinksUtf8(vector<Rectangle_i> * rects,vector<int> * link_to_rect,vector<std::string> * urls) const266 int Page::GetLinksUtf8(vector<Rectangle_i>* rects, vector<int>* link_to_rect,
267                        vector<std::string>* urls) const {
268     return GetAnnotatedLinksUtf8(rects, link_to_rect, urls) +
269            GetInferredLinksUtf8(rects, link_to_rect, urls);
270 }
271 
GetGotoLinks() const272 vector<GotoLink> Page::GetGotoLinks() const {
273     vector<GotoLink> links;
274 
275     FPDF_LINK link = nullptr;
276     int pos = 0;
277     while (FPDFLink_Enumerate(page_.get(), &pos, &link)) {
278         if (!IsGotoLink(link)) {
279             continue;
280         }
281         // Get the bounds of the actual link
282         vector<Rectangle_i> goto_link_rects;
283         Rectangle_i rect = GetRect(link);
284         goto_link_rects.push_back(rect);
285 
286         GotoLinkDest* goto_link_dest = new GotoLinkDest();
287 
288         // Get and parse the destination
289         FPDF_DEST fpdf_dest = FPDFLink_GetDest(document_, link);
290         goto_link_dest->set_page_number(FPDFDest_GetDestPageIndex(document_, fpdf_dest));
291 
292         FPDF_BOOL has_x_coord;
293         FPDF_BOOL has_y_coord;
294         FPDF_BOOL has_zoom;
295         FS_FLOAT x;
296         FS_FLOAT y;
297         FS_FLOAT zoom;
298         FPDF_BOOL success = FPDFDest_GetLocationInPage(fpdf_dest, &has_x_coord, &has_y_coord,
299                                                        &has_zoom, &x, &y, &zoom);
300 
301         if (!success) {
302             continue;
303         }
304         if (has_x_coord) {
305             auto point = DoublePoint(x, 0);
306             auto tPoint = ApplyPageTransform(point);
307             goto_link_dest->set_x(tPoint.x);
308         }
309         if (has_y_coord) {
310             auto point = DoublePoint(0, y);
311             auto tPoint = ApplyPageTransform(point);
312             goto_link_dest->set_y(tPoint.y);
313         }
314         if (has_zoom) {
315             goto_link_dest->set_zoom(zoom);
316         }
317 
318         GotoLink goto_link = GotoLink{goto_link_rects, *goto_link_dest};
319 
320         // Ensure that links are within page bounds
321         if (goto_link_dest->x >= 0 && goto_link_dest->y >= 0) {
322             links.push_back(goto_link);
323         } else {
324             LOGE("Goto Link out of bound (x=%f, y=%f). Page width=%d, height =%d",
325                  goto_link_dest->x, goto_link_dest->y, Width(), Height());
326         }
327     }
328     return links;
329 }
330 
InitializeFormFilling()331 void Page::InitializeFormFilling() {
332     form_filler_->NotifyAfterPageLoad(page_.get());
333 }
334 
TerminateFormFilling()335 void Page::TerminateFormFilling() {
336     form_filler_->NotifyBeforePageClose(page_.get());
337 }
338 
GetFormWidgetInfo(Point_i point)339 FormWidgetInfo Page::GetFormWidgetInfo(Point_i point) {
340     Point_d page_point = UnapplyPageTransform(point);
341     FormWidgetInfo result = form_filler_->GetFormWidgetInfo(page_.get(), page_point);
342     if (result.FoundWidget()) {
343         // widget_rect is in page coords, transform to device coords before
344         // returning to user.
345         Rectangle_i transformed_widget_rect = ApplyPageTransform(result.widget_rect());
346         result.set_widget_rect(transformed_widget_rect);
347     }
348 
349     // Consume any rectangle that was invalidated by this action. Some
350     // info-gathering actions may cause temporary invalidation without
351     // actually doing anything that we need to redraw for.
352     ConsumeInvalidRect();
353     return result;
354 }
355 
GetFormWidgetInfo(int annotation_index)356 FormWidgetInfo Page::GetFormWidgetInfo(int annotation_index) {
357     FormWidgetInfo result = form_filler_->GetFormWidgetInfo(page_.get(), annotation_index);
358     if (result.FoundWidget()) {
359         // widget_rect is in page coords; transform to device coords before
360         // returning to user.
361         Rectangle_i transformed_widget_rect = ApplyPageTransform(result.widget_rect());
362         result.set_widget_rect(transformed_widget_rect);
363     }
364 
365     // Consume any rectangle that was invalidated by this action. Some
366     // info-gathering actions may cause temporary invalidation without
367     // actually doing anything that we need to redraw for.
368     ConsumeInvalidRect();
369     return result;
370 }
371 
GetFormWidgetInfos(const std::unordered_set<int> & type_ids,std::vector<FormWidgetInfo> * widget_infos)372 void Page::GetFormWidgetInfos(const std::unordered_set<int>& type_ids,
373                               std::vector<FormWidgetInfo>* widget_infos) {
374     form_filler_->GetFormWidgetInfos(page_.get(), type_ids, widget_infos);
375     for (FormWidgetInfo& widget_info : *widget_infos) {
376         // widget_rect is in page coords; transform to device coords before
377         // returning to user.
378         Rectangle_i transformed_widget_rect = ApplyPageTransform(widget_info.widget_rect());
379         widget_info.set_widget_rect(transformed_widget_rect);
380     }
381 
382     // Consume any rectangles that were invalidated by this action. Some
383     // info-gathering actions may cause temporary invalidation without
384     // actually doing anything that we need to redraw for.
385     ConsumeInvalidRect();
386 }
387 
ClickOnPoint(Point_i point)388 bool Page::ClickOnPoint(Point_i point) {
389     Point_d page_point = UnapplyPageTransform(point);
390     return form_filler_->ClickOnPoint(page_.get(), page_point);
391 }
SetFormFieldText(int annotation_index,std::string_view text)392 bool Page::SetFormFieldText(int annotation_index, std::string_view text) {
393     return form_filler_->SetText(page_.get(), annotation_index, text);
394 }
395 
SetChoiceSelection(int annotation_index,std::span<const int> selected_indices)396 bool Page::SetChoiceSelection(int annotation_index, std::span<const int> selected_indices) {
397     return form_filler_->SetChoiceSelection(page_.get(), annotation_index, selected_indices);
398 }
NotifyInvalidRect(Rectangle_i rect)399 void Page::NotifyInvalidRect(Rectangle_i rect) {
400     if (rect.left < 0 || rect.top < 0 || rect.right < 0 || rect.bottom < 0 || IsEmpty(rect)) {
401         return;
402     }
403 
404     Rectangle_i device_rect = ApplyPageTransform(rect);
405     // If invalid_rect_ is currently empty, avoid unioning so we don't extend
406     // |rect|'s top left corner to (0,0) for no reason.
407     if (IsEmpty(invalid_rect_)) {
408         invalid_rect_ = device_rect;
409         return;
410     }
411 
412     invalid_rect_ = Union(invalid_rect_, device_rect);
413 }
414 
HasInvalidRect()415 bool Page::HasInvalidRect() {
416     return !IsEmpty(invalid_rect_);
417 }
418 
ConsumeInvalidRect()419 Rectangle_i Page::ConsumeInvalidRect() {
420     Rectangle_i copy = invalid_rect_;
421     invalid_rect_ = kEmptyIntRectangle;
422     return copy;
423 }
424 
page()425 void* Page::page() {
426     return page_.get();
427 }
428 
text_page()429 FPDF_TEXTPAGE Page::text_page() {
430     EnsureTextPageInitialized();
431     return text_page_.get();
432 }
433 
first_printable_char_index()434 int Page::first_printable_char_index() {
435     EnsureTextPageInitialized();
436     return first_printable_char_index_;
437 }
438 
last_printable_char_index()439 int Page::last_printable_char_index() {
440     EnsureTextPageInitialized();
441     return last_printable_char_index_;
442 }
443 
EnsureTextPageInitialized()444 void Page::EnsureTextPageInitialized() {
445     if (text_page_) {
446         return;
447     }
448     text_page_.reset(FPDFText_LoadPage(page_.get()));
449 
450     int num_chars = NumChars();
451 
452     int i;
453     for (i = 0; i < num_chars && IsWordBreak(GetUnicode(i)); i++) {
454     }
455     first_printable_char_index_ = i;
456 
457     for (i = num_chars - 1; i >= first_printable_char_index_ && IsWordBreak(GetUnicode(i)); i--) {
458     }
459     last_printable_char_index_ = i;
460 }
461 
InPlaceSwapRedBlueChannels(void * pixels,const int num_pixels) const462 void Page::InPlaceSwapRedBlueChannels(void* pixels, const int num_pixels) const {
463     uint8_t* channels = static_cast<uint8_t*>(pixels);
464     uint8_t* channel1 = channels;
465     uint8_t* channel3 = channels + 2;
466 
467     for (int i = 0; i < num_pixels; ++i, channel1 += kBytesPerPixel, channel3 += kBytesPerPixel) {
468         std::swap(*channel1, *channel3);
469     }
470 }
471 
FindMatch(const std::u32string & query,const int page_start,const int page_stop,TextRange * match)472 bool Page::FindMatch(const std::u32string& query, const int page_start, const int page_stop,
473                      TextRange* match) {
474     if (query.empty()) {
475         return false;
476     }
477 
478     int max_match_start = page_stop - query.length();
479     for (int m = page_start; m <= max_match_start; m++) {
480         if (IsMatch(query, m, page_stop, match)) {
481             return true;
482         }
483     }
484     return false;
485 }
486 
IsMatch(const std::u32string & query,const int match_start,const int page_stop,TextRange * match)487 bool Page::IsMatch(const std::u32string& query, const int match_start, const int page_stop,
488                    TextRange* match) {
489     int page_index = match_start;
490     size_t query_index = 0;
491     uint32_t page_char = 0, prev_char = 0;
492     while (query_index < query.length()) {
493         prev_char = page_char;
494         page_char = GetUnicode(page_index);
495 
496         if (NormalizeForSearch(page_char) == query[query_index]) {
497             // This codepoint matches (ignoring case and accents). Move to next.
498             query_index++;
499             page_index++;
500         } else if (IsSkippableForSearch(page_char, prev_char) && query_index > 0) {
501             // Don't increment query index - skip over skippable character.
502             page_index++;
503             if ((page_stop - page_index) < (query.length() - query_index)) {
504                 return false;  // Not enough room for query string before page_stop.
505             }
506         } else {
507             return false;
508         }
509     }
510     // Update match to contain page indices of match start and match stop.
511     match->first = match_start;
512     match->second = page_index;
513     return true;
514 }
515 
GetBoundaryAtIndex(const int index)516 SelectionBoundary Page::GetBoundaryAtIndex(const int index) {
517     return GetBoundaryAtIndex(index, IsRtlAtIndex(index));
518 }
519 
IsRtlAtIndex(const int index)520 bool Page::IsRtlAtIndex(const int index) {
521     int start_index = GetWordStartIndex(index);
522     int stop_index = GetWordStopIndex(index);
523     int word_length = stop_index - start_index;
524     if (word_length <= 1) {
525         // Can't tell directionality from a single character, guess LTR.
526         return false;
527     }
528     Rectangle_i start_bounds = GetCharBounds(start_index);
529     Rectangle_i stop_bounds = GetCharBounds(stop_index - 1);
530     return start_bounds.Center().x > stop_bounds.Center().x;
531 }
532 
GetBoundaryAtIndex(const int index,bool is_rtl)533 SelectionBoundary Page::GetBoundaryAtIndex(const int index, bool is_rtl) {
534     // Normally we align the boundary on the start edge of next character:
535     int char_index = index;
536     bool use_end_edge = false;
537 
538     // Printable characters have well defined bounding boxes, word-breaks (spaces
539     // and newlines) may not - so we use the end edge of the previous printable
540     // character instead if the next character is not printable.
541     if (index == NumChars() || IsWordBreak(GetUnicode(index))) {
542         char_index = index - 1;
543         use_end_edge = true;
544     }
545     bool use_right_edge = use_end_edge ^ is_rtl;
546 
547     SelectionBoundary boundary(index, 0, 0, is_rtl);
548     Rectangle_i char_bounds = GetCharBounds(char_index);
549     boundary.point.x = use_right_edge ? char_bounds.right : char_bounds.left;
550     // Use the baseline (not the bottom) of the char as the y-value.
551     boundary.point.y = GetCharOrigin(char_index).y;
552     return boundary;
553 }
554 
GetBoundaryAtPoint(const Point_i & point)555 SelectionBoundary Page::GetBoundaryAtPoint(const Point_i& point) {
556     SelectionBoundary best_boundary(0, point.x, point.y, false);
557     int best_distance_sq = std::numeric_limits<int>::max();
558 
559     bool prev_char_is_word_char = false;
560     bool is_rtl = false;
561     for (int index = first_printable_char_index(); index <= last_printable_char_index() + 1;
562          index++) {
563         bool cur_char_is_word_char =
564                 (index <= last_printable_char_index()) && !IsWordBreak(GetUnicode(index));
565         // Starting a new word:
566         if (cur_char_is_word_char && !prev_char_is_word_char) {
567             // Finding out RTL involves looking at each end of the word,
568             // so we only do it at the start of each word:
569             is_rtl = IsRtlAtIndex(index);
570         }
571         if (cur_char_is_word_char || prev_char_is_word_char) {
572             SelectionBoundary boundary = GetBoundaryAtIndex(index, is_rtl);
573             int dx = boundary.point.x - point.x;
574             int dy = boundary.point.y - point.y;
575             int distance_sq = dx * dx + dy * dy;
576             if (distance_sq < best_distance_sq) {
577                 best_boundary = boundary;
578                 best_distance_sq = distance_sq;
579             }
580         }
581         prev_char_is_word_char = cur_char_is_word_char;
582     }
583     return best_boundary;
584 }
585 
GetWordStartIndex(const int index)586 int Page::GetWordStartIndex(const int index) {
587     int start_index = index;
588     while (start_index > 0 && !IsWordBreak(GetUnicode(start_index - 1))) {
589         --start_index;  // Move start_index to the start of the word.
590     }
591     return start_index;
592 }
593 
GetWordStopIndex(const int index)594 int Page::GetWordStopIndex(const int index) {
595     int stop_index = index;
596     int num_chars = NumChars();
597     while (stop_index < num_chars && !IsWordBreak(GetUnicode(stop_index))) {
598         ++stop_index;  // Move stop_index to the end of the word.
599     }
600     return stop_index;
601 }
602 
GetRawCharBounds(const int char_index)603 Rectangle_d Page::GetRawCharBounds(const int char_index) {
604     double x1, x2, y1, y2;
605     FPDFText_GetCharBox(text_page(), char_index, &x1, &x2, &y1, &y2);
606     return DoubleRect(x1, y1, x2, y2);
607 }
608 
GetCharBounds(const int char_index)609 Rectangle_i Page::GetCharBounds(const int char_index) {
610     return ApplyPageTransform(GetRawCharBounds(char_index));
611 }
612 
GetCharOrigin(const int char_index)613 Point_i Page::GetCharOrigin(const int char_index) {
614     double x = 0.0, y = 0.0;
615     FPDFText_GetCharOrigin(text_page(), char_index, &x, &y);
616     return ApplyPageTransform(DoublePoint(x, y));
617 }
618 
GetAnnotatedLinksUtf8(vector<Rectangle_i> * rects,vector<int> * link_to_rect,vector<std::string> * urls) const619 int Page::GetAnnotatedLinksUtf8(vector<Rectangle_i>* rects, vector<int>* link_to_rect,
620                                 vector<std::string>* urls) const {
621     FPDF_LINK link = nullptr;
622     int pos = 0;
623     int num_links_with_rect = 0;
624     while (FPDFLink_Enumerate(page_.get(), &pos, &link)) {
625         if (!IsUrlLink(link)) {
626             continue;
627         }
628 
629         std::string url = GetUrlUtf8(link);
630         Rectangle_i rect = GetRect(link);
631         if (IsEmpty(rect)) {
632             continue;
633         }
634 
635         link_to_rect->push_back(rects->size());
636         rects->push_back(rect);
637         urls->push_back(url);
638         num_links_with_rect++;
639     }
640     return num_links_with_rect;
641 }
642 
GetInferredLinksUtf8(vector<Rectangle_i> * rects,vector<int> * link_to_rect,vector<std::string> * urls) const643 int Page::GetInferredLinksUtf8(vector<Rectangle_i>* rects, vector<int>* link_to_rect,
644                                vector<std::string>* urls) const {
645     // TODO(b/312730882): Infer links by looking for http:// and similar and for
646     // email addresses to use as mailto: links. There are some pdfClient methods for
647     // doing this, but these have some bugs which need patching or working around.
648     return 0;
649 }
650 
GetUrlUtf8(FPDF_LINK link) const651 std::string Page::GetUrlUtf8(FPDF_LINK link) const {
652     FPDF_ACTION action = FPDFLink_GetAction(link);
653     // Allocate a string big enough to hold the URL.
654     std::string url(FPDFAction_GetURIPath(document_, action, nullptr, 0), '\0');
655     // Then write the URL to it.
656     FPDFAction_GetURIPath(document_, action, &url[0], url.length());
657     EraseTrailingNulls(&url);
658     return url;
659 }
660 
GetRect(FPDF_LINK link) const661 Rectangle_i Page::GetRect(FPDF_LINK link) const {
662     FS_RECTF r;
663     if (!FPDFLink_GetAnnotRect(link, &r)) {
664         return Rectangle_i();
665     }
666 
667     Rectangle_d rect_d = DoubleRect(r.left, r.top, r.right, r.bottom);
668     return ApplyPageTransform(rect_d);
669 }
670 
IsGotoLink(FPDF_LINK link) const671 bool Page::IsGotoLink(FPDF_LINK link) const {
672     FPDF_ACTION action = FPDFLink_GetAction(link);
673     return action != nullptr && FPDFAction_GetType(action) == PDFACTION_GOTO;
674 }
675 
IsUrlLink(FPDF_LINK link) const676 bool Page::IsUrlLink(FPDF_LINK link) const {
677     FPDF_ACTION action = FPDFLink_GetAction(link);
678     return action != nullptr && FPDFAction_GetType(action) == PDFACTION_URI;
679 }
680 
681 }  // namespace pdfClient