1 /*
2  * Copyright (C) 2024 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MEDIAPROVIDER_PDF_JNI_PDFCLIENT_PAGE_H_
18 #define MEDIAPROVIDER_PDF_JNI_PDFCLIENT_PAGE_H_
19 
20 #include <stdint.h>
21 
22 #include <span>
23 #include <string>
24 #include <unordered_map>
25 #include <unordered_set>
26 #include <utility>
27 #include <vector>
28 
29 #include "cpp/fpdf_scopers.h"
30 #include "form_filler.h"
31 #include "form_widget_info.h"
32 #include "fpdfview.h"
33 #include "rect.h"
34 
35 namespace pdfClient {
36 
37 // Render Flags corresponding to each render flag defined in
38 // 'pdf/framework/java/android/graphics/pdf/RenderParams.java'
39 // LINT.IfChange
40 static const int FLAG_RENDER_TEXT_ANNOTATIONS = 1 << 1;
41 static const int FLAG_RENDER_HIGHLIGHT_ANNOTATIONS = 1 << 2;
42 // LINT.ThenChange(packages/providers/MediaProvider/pdf/framework/java/android/graphics/pdf/RenderParams.java)
43 
44 static const std::unordered_map<int, std::vector<int>> renderFlagsAnnotsMap = {
45         {FLAG_RENDER_TEXT_ANNOTATIONS, std::vector<int>{FPDF_ANNOT_TEXT, FPDF_ANNOT_FREETEXT}},
46         {FLAG_RENDER_HIGHLIGHT_ANNOTATIONS, std::vector<int>{FPDF_ANNOT_HIGHLIGHT}}};
47 
48 // A start index (inclusive) and a stop index (exclusive) into the string of
49 // codepoints that make up a range of text.
50 typedef std::pair<int, int> TextRange;
51 
52 // A start index (inclusive) or stop index (exclusive) into the string of
53 // codepoints that make up a range of text, and a point on the boundary where
54 // the selection starts or stops.
55 struct SelectionBoundary {
56     int index;
57     Point_i point;
58     bool is_rtl;
59 
SelectionBoundarySelectionBoundary60     SelectionBoundary(int i, int x, int y, bool r) : index(i), is_rtl(r) { point = IntPoint(x, y); }
61 };
62 
63 struct GotoLinkDest {
64     int page_number = 0;
65     float x = 0;
66     float y = 0;
67     float zoom = 0;
68 
set_page_numberGotoLinkDest69     void set_page_number(int page_number) { this->page_number = page_number; }
70 
set_xGotoLinkDest71     void set_x(float x) { this->x = x; }
72 
set_yGotoLinkDest73     void set_y(float y) { this->y = y; }
74 
set_zoomGotoLinkDest75     void set_zoom(float zoom) { this->zoom = zoom; }
76 };
77 
78 struct GotoLink {
79     std::vector<Rectangle_i> rect;
80     GotoLinkDest dest;
81 };
82 
83 // Wrapper on a FPDF_PAGE that adds rendering functionality.
84 class Page {
85   public:
86     // FPDF_PAGE is opened when constructed.
87     Page(FPDF_DOCUMENT doc, int page_num, FormFiller* form_filler);
88 
89     // Move constructor.
90     Page(Page&& p);
91 
92     virtual ~Page();
93 
94     int Width() const;
95 
96     int Height() const;
97 
98     Rectangle_i Dimensions() const;
99 
100     // Render the page to the output bitmap, applying the appropriate transform, clip, and
101     // render mode as specified.
102     void Render(FPDF_BITMAP bitmap, FS_MATRIX transform, int clip_left, int clip_top,
103          int clip_right, int clip_bottom, int render_mode, int show_annot_types,
104          bool render_form_fields);
105 
106     // The page has a transform that must be applied to all characters and objects
107     // on the page. This transforms from the page's internal co-ordinate system
108     // to the external co-ordinate system from (0, 0) to (Width(), Height()).
109     Point_i ApplyPageTransform(const Point_d& input) const;
110     Rectangle_i ApplyPageTransform(const Rectangle_d& input) const;
111     Rectangle_i ApplyPageTransform(const Rectangle_i& input) const;
112 
113     // Transform from the external co-ordinate system (0, 0)-(Width(), Height())
114     // back into the page's internal co-ordinate system.
115     Point_d UnapplyPageTransform(const Point_i& input) const;
116 
117     int NumChars();
118 
119     uint32_t GetUnicode(int char_index);
120 
121     // Returns the entire text of the given page in UTF-8.
122     std::string GetTextUtf8();
123 
124     // Returns part of the text of the given page in UTF-8.
125     std::string GetTextUtf8(const int start_index, const int stop_index);
126 
127     // Appends each alt-text instance on the page to |result|.
128     void GetAltTextUtf8(std::vector<std::string>* result) const;
129 
130     // Searches for the given word on the given page and returns the number of
131     // matches. Ignores case and accents when searching.
132     // If matches vector is not NULL, it is filled with the start and end indices
133     // of each match - these are character indices according to FPDFText API.
134     int FindMatchesUtf8(std::string_view utf8, std::vector<TextRange>* matches);
135 
136     // Same as above, but finds the bounding boxes of the matches. Returns the
137     // number of matches and fills in the rects vector. Each match can take more
138     // than one rect to bound, so the match_to_rect vector is filled so that
139     // rects[match_to_rect[i]] is the first rectangle that belongs with match i.
140     // Matches for which we cannot find a single bounding rectangle are discarded.
141     // The char_indexes vector is filled with the char index that each match
142     // starts at - the beginning of its TextRange.
143     int BoundsOfMatchesUtf8(std::string_view utf8, std::vector<Rectangle_i>* rects,
144                             std::vector<int>* match_to_rect, std::vector<int>* char_indexes);
145 
146     // Appends 0 or more rectangles to the given vector that surround the text
147     // of the given page from the start index and the stop index.
148     // Returns the number of rectangles used to surround the text.
149     int GetTextBounds(const int start_index, const int stop_index, std::vector<Rectangle_i>* rects);
150 
151     // If there is a word at the given point, returns true and modifies the given
152     // boundaries to point to each end of the word - otherwise returns false.
153     bool SelectWordAt(const Point_i& point, SelectionBoundary* start, SelectionBoundary* stop);
154 
155     // Modifies the given selection boundary object in the following ways:
156     // - The resulting boundary will have an index that is within the range
157     // [0...n], where n is NumChars().
158     // - The resulting boundary will have a point that is at the outer corner
159     // of the char just inside the selection.
160     void ConstrainBoundary(SelectionBoundary* boundary);
161 
162     int GetFontSize(int index);
163     // Get the URLs and bounding rectangles for all links on the page.
164     int GetLinksUtf8(std::vector<Rectangle_i>* rects, std::vector<int>* link_to_rect,
165                      std::vector<std::string>* urls) const;
166 
167     // Returns the list of GotoLink for all GotoLinks on the page.
168     std::vector<GotoLink> GetGotoLinks() const;
169 
170     // Perform any operations required to prepare this page for form filling.
171     void InitializeFormFilling();
172 
173     // Perform any clean up operations after form filling is complete.
174     void TerminateFormFilling();
175 
176     // Obtain information about the form widget at |point| on the page, if any.
177     // |point| is in device coordinates.
178     FormWidgetInfo GetFormWidgetInfo(Point_i point);
179 
180     // Obtain information about the form widget with index |annotation_index| on
181     // the page, if any.
182     FormWidgetInfo GetFormWidgetInfo(int annotation_index);
183 
184     // Obtain form widget information for all form field annotations on the page,
185     // optionally restricting by |type_ids| and store in |widget_infos|. See
186     // fpdf_formfill.h for type constants. If |type_ids| is empty all form
187     // widgets on |page| will be added to |widget_infos|, if any.
188     void GetFormWidgetInfos(const std::unordered_set<int>& type_ids,
189                             std::vector<FormWidgetInfo>* widget_infos);
190 
191     // Perform a click at |point| on the page. Any focus in the document
192     // resulting from this operation will be killed before returning.  No-op if
193     // no widget present at |point| or widget cannot be edited. Returns true if
194     // click was performed. |point| is in device coordinates.
195     bool ClickOnPoint(Point_i point);
196 
197     // Set the value text of the widget at |annotation_index| on page. No-op if
198     // no widget present or widget cannot be edited. Returns true if text was
199     // set, false otherwise.
200     bool SetFormFieldText(int annotation_index, std::string_view text);
201 
202     // Set the |selected_indices| for the choice widget at |annotation_index| as
203     // selected and deselect all other indices. No-op if no widget present or
204     // widget cannot be edited. Returns true if indices were set, false otherwise.
205     bool SetChoiceSelection(int annotation_index, std::span<const int> selected_indices);
206 
207     // Informs the page that the |rect| of the page bitmap has been invalidated.
208     // This takes place following form filling operations. |Rect| must be in page
209     // coordinates.
210     void NotifyInvalidRect(Rectangle_i rect);
211 
212     // Return whether or not an area of the bitmap has been invalidated.
213     bool HasInvalidRect();
214 
215     // Returns the area of the page that has been invalidated and resets the
216     // field. Rect returned in device coordinates.
217     Rectangle_i ConsumeInvalidRect();
218 
219     // Returns FPDF_PAGE. This Page retains ownership. All operations that wish
220     // to access FPDF_PAGE should to call methods of this class instead of
221     // requesting the FPDF_PAGE directly through this method.
222     void* page();
223 
224   private:
225     // Convenience methods to access the variables dependent on an initialized
226     // ScopedFPDFTextPage. We lazy init text_page_ for efficiency because many
227     // page operations do not require it.
228     FPDF_TEXTPAGE text_page();
229     int first_printable_char_index();
230     int last_printable_char_index();
231 
232     // Check that text_page_ and first/last_printable_char_index_ have been
233     // initialized and do so if not.
234     void EnsureTextPageInitialized();
235 
236     // Android bitmaps are in ARGB order. pdfClient emits bitmaps which have red and
237     // blue swapped when treated as Android bitmaps - but this function fixes it.
238     // NOTE: This might rely on little-endian architecture.
239     void InPlaceSwapRedBlueChannels(void* pixels, const int num_pixels) const;
240 
241     // Looks for an instance of the given UTF32 string on the given page, starting
242     // not before the page_start index and ending before the page_stop index.
243     // If found, returns true and updates the TextRange. Case/accent insensitive.
244     bool FindMatch(const std::u32string& query, const int page_start, const int page_stop,
245                    TextRange* match);
246 
247     // Checks if the page matches the given UTF32 string at the given match_start
248     // index that ends before the page_stop index. If it matches, returns true
249     // and updates the TextRange. Case/accent insensitive.
250     bool IsMatch(const std::u32string& query, const int match_start, const int page_stop,
251                  TextRange* match);
252 
253     // Returns a SelectionBoundary at a particular index - 0 means before the char
254     // at index 0, 1 means after char 0 but before the char at index 1, and so on.
255     SelectionBoundary GetBoundaryAtIndex(const int index);
256 
257     // Returns whether text is flowing left or right at a particular index.
258     bool IsRtlAtIndex(const int index);
259 
260     // Returns a SelectionBoundary at a particular index, once we already know
261     // which way the text is flowing at that index.
262     SelectionBoundary GetBoundaryAtIndex(const int index, bool is_rtl);
263 
264     // Returns a SelectionBoundary as near as possible to the given point.
265     SelectionBoundary GetBoundaryAtPoint(const Point_i& point);
266 
267     // Given a boundary index to the middle or either end of a word, returns
268     // the boundary index of the start of that word - which is the index of the
269     // first char that is part of that word.
270     int GetWordStartIndex(const int index);
271 
272     // Given a boundary index to the middle or either end of a word, returns
273     // the boundary index of the stop of that word - which is the index of the
274     // first char that is immediately after that word, but not part of it.
275     int GetWordStopIndex(const int index);
276 
277     // Returns the rectangle that bounds the given char - page transform is not
278     // yet applied, must be applied later.
279     Rectangle_d GetRawCharBounds(int char_index);
280 
281     // Returns the rectangle that bounds the given char, with the page transform
282     // already applied.
283     Rectangle_i GetCharBounds(int char_index);
284 
285     // Returns the origin of the given char, with the page transform applied.
286     Point_i GetCharOrigin(int char_index);
287 
288     // Get the URLs and bounding rectangles for annotation links only - text
289     // that has been annotated to link to some URL.
290     int GetAnnotatedLinksUtf8(std::vector<Rectangle_i>* rects, std::vector<int>* link_to_rect,
291                               std::vector<std::string>* urls) const;
292 
293     // Get the URLs and bounding rectangles for inferred links only - text that
294     // we recognize as a potential link since it starts with http:// or similar.
295     int GetInferredLinksUtf8(std::vector<Rectangle_i>* rects, std::vector<int>* link_to_rect,
296                              std::vector<std::string>* urls) const;
297 
298     bool IsGotoLink(FPDF_LINK link) const;
299 
300     bool IsUrlLink(FPDF_LINK link) const;
301 
302     // Get the URL of the given link, in UTF-8.
303     std::string GetUrlUtf8(FPDF_LINK link) const;
304 
305     // Get the bounds of the given link, in page co-ordinates.
306     Rectangle_i GetRect(FPDF_LINK link) const;
307 
308     FPDF_DOCUMENT document_;  // Not owned.
309 
310     ScopedFPDFPage page_;
311 
312     FormFiller* const form_filler_;  // Not owned.
313 
314     // these variables lazily initialized, should be accessed via corresponding
315     // accessor methods
316     ScopedFPDFTextPage text_page_;
317     int first_printable_char_index_;
318     int last_printable_char_index_;
319 
320     // Rectangle representing an area of the bitmap for this page that has been
321     // reported as invalidated. Will be coalesced from all rectangles that are
322     // reported as invalidated since the last time this rectangle was consumed.
323     // Rectangles are invalidated due to form filling operations.
324     // Rectangle is in Device Coordinates.
325     Rectangle_i invalid_rect_;
326 };
327 
328 }  // namespace pdfClient
329 
330 #endif  // MEDIAPROVIDER_PDF_JNI_PDFCLIENT_PAGE_H_