1 /* 2 * Copyright (C) 2024 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MEDIAPROVIDER_PDF_JNI_PDFCLIENT_PAGE_H_ 18 #define MEDIAPROVIDER_PDF_JNI_PDFCLIENT_PAGE_H_ 19 20 #include <stdint.h> 21 22 #include <span> 23 #include <string> 24 #include <unordered_map> 25 #include <unordered_set> 26 #include <utility> 27 #include <vector> 28 29 #include "cpp/fpdf_scopers.h" 30 #include "form_filler.h" 31 #include "form_widget_info.h" 32 #include "fpdfview.h" 33 #include "rect.h" 34 35 namespace pdfClient { 36 37 // Render Flags corresponding to each render flag defined in 38 // 'pdf/framework/java/android/graphics/pdf/RenderParams.java' 39 // LINT.IfChange 40 static const int FLAG_RENDER_TEXT_ANNOTATIONS = 1 << 1; 41 static const int FLAG_RENDER_HIGHLIGHT_ANNOTATIONS = 1 << 2; 42 // LINT.ThenChange(packages/providers/MediaProvider/pdf/framework/java/android/graphics/pdf/RenderParams.java) 43 44 static const std::unordered_map<int, std::vector<int>> renderFlagsAnnotsMap = { 45 {FLAG_RENDER_TEXT_ANNOTATIONS, std::vector<int>{FPDF_ANNOT_TEXT, FPDF_ANNOT_FREETEXT}}, 46 {FLAG_RENDER_HIGHLIGHT_ANNOTATIONS, std::vector<int>{FPDF_ANNOT_HIGHLIGHT}}}; 47 48 // A start index (inclusive) and a stop index (exclusive) into the string of 49 // codepoints that make up a range of text. 50 typedef std::pair<int, int> TextRange; 51 52 // A start index (inclusive) or stop index (exclusive) into the string of 53 // codepoints that make up a range of text, and a point on the boundary where 54 // the selection starts or stops. 55 struct SelectionBoundary { 56 int index; 57 Point_i point; 58 bool is_rtl; 59 SelectionBoundarySelectionBoundary60 SelectionBoundary(int i, int x, int y, bool r) : index(i), is_rtl(r) { point = IntPoint(x, y); } 61 }; 62 63 struct GotoLinkDest { 64 int page_number = 0; 65 float x = 0; 66 float y = 0; 67 float zoom = 0; 68 set_page_numberGotoLinkDest69 void set_page_number(int page_number) { this->page_number = page_number; } 70 set_xGotoLinkDest71 void set_x(float x) { this->x = x; } 72 set_yGotoLinkDest73 void set_y(float y) { this->y = y; } 74 set_zoomGotoLinkDest75 void set_zoom(float zoom) { this->zoom = zoom; } 76 }; 77 78 struct GotoLink { 79 std::vector<Rectangle_i> rect; 80 GotoLinkDest dest; 81 }; 82 83 // Wrapper on a FPDF_PAGE that adds rendering functionality. 84 class Page { 85 public: 86 // FPDF_PAGE is opened when constructed. 87 Page(FPDF_DOCUMENT doc, int page_num, FormFiller* form_filler); 88 89 // Move constructor. 90 Page(Page&& p); 91 92 virtual ~Page(); 93 94 int Width() const; 95 96 int Height() const; 97 98 Rectangle_i Dimensions() const; 99 100 // Render the page to the output bitmap, applying the appropriate transform, clip, and 101 // render mode as specified. 102 void Render(FPDF_BITMAP bitmap, FS_MATRIX transform, int clip_left, int clip_top, 103 int clip_right, int clip_bottom, int render_mode, int show_annot_types, 104 bool render_form_fields); 105 106 // The page has a transform that must be applied to all characters and objects 107 // on the page. This transforms from the page's internal co-ordinate system 108 // to the external co-ordinate system from (0, 0) to (Width(), Height()). 109 Point_i ApplyPageTransform(const Point_d& input) const; 110 Rectangle_i ApplyPageTransform(const Rectangle_d& input) const; 111 Rectangle_i ApplyPageTransform(const Rectangle_i& input) const; 112 113 // Transform from the external co-ordinate system (0, 0)-(Width(), Height()) 114 // back into the page's internal co-ordinate system. 115 Point_d UnapplyPageTransform(const Point_i& input) const; 116 117 int NumChars(); 118 119 uint32_t GetUnicode(int char_index); 120 121 // Returns the entire text of the given page in UTF-8. 122 std::string GetTextUtf8(); 123 124 // Returns part of the text of the given page in UTF-8. 125 std::string GetTextUtf8(const int start_index, const int stop_index); 126 127 // Appends each alt-text instance on the page to |result|. 128 void GetAltTextUtf8(std::vector<std::string>* result) const; 129 130 // Searches for the given word on the given page and returns the number of 131 // matches. Ignores case and accents when searching. 132 // If matches vector is not NULL, it is filled with the start and end indices 133 // of each match - these are character indices according to FPDFText API. 134 int FindMatchesUtf8(std::string_view utf8, std::vector<TextRange>* matches); 135 136 // Same as above, but finds the bounding boxes of the matches. Returns the 137 // number of matches and fills in the rects vector. Each match can take more 138 // than one rect to bound, so the match_to_rect vector is filled so that 139 // rects[match_to_rect[i]] is the first rectangle that belongs with match i. 140 // Matches for which we cannot find a single bounding rectangle are discarded. 141 // The char_indexes vector is filled with the char index that each match 142 // starts at - the beginning of its TextRange. 143 int BoundsOfMatchesUtf8(std::string_view utf8, std::vector<Rectangle_i>* rects, 144 std::vector<int>* match_to_rect, std::vector<int>* char_indexes); 145 146 // Appends 0 or more rectangles to the given vector that surround the text 147 // of the given page from the start index and the stop index. 148 // Returns the number of rectangles used to surround the text. 149 int GetTextBounds(const int start_index, const int stop_index, std::vector<Rectangle_i>* rects); 150 151 // If there is a word at the given point, returns true and modifies the given 152 // boundaries to point to each end of the word - otherwise returns false. 153 bool SelectWordAt(const Point_i& point, SelectionBoundary* start, SelectionBoundary* stop); 154 155 // Modifies the given selection boundary object in the following ways: 156 // - The resulting boundary will have an index that is within the range 157 // [0...n], where n is NumChars(). 158 // - The resulting boundary will have a point that is at the outer corner 159 // of the char just inside the selection. 160 void ConstrainBoundary(SelectionBoundary* boundary); 161 162 int GetFontSize(int index); 163 // Get the URLs and bounding rectangles for all links on the page. 164 int GetLinksUtf8(std::vector<Rectangle_i>* rects, std::vector<int>* link_to_rect, 165 std::vector<std::string>* urls) const; 166 167 // Returns the list of GotoLink for all GotoLinks on the page. 168 std::vector<GotoLink> GetGotoLinks() const; 169 170 // Perform any operations required to prepare this page for form filling. 171 void InitializeFormFilling(); 172 173 // Perform any clean up operations after form filling is complete. 174 void TerminateFormFilling(); 175 176 // Obtain information about the form widget at |point| on the page, if any. 177 // |point| is in device coordinates. 178 FormWidgetInfo GetFormWidgetInfo(Point_i point); 179 180 // Obtain information about the form widget with index |annotation_index| on 181 // the page, if any. 182 FormWidgetInfo GetFormWidgetInfo(int annotation_index); 183 184 // Obtain form widget information for all form field annotations on the page, 185 // optionally restricting by |type_ids| and store in |widget_infos|. See 186 // fpdf_formfill.h for type constants. If |type_ids| is empty all form 187 // widgets on |page| will be added to |widget_infos|, if any. 188 void GetFormWidgetInfos(const std::unordered_set<int>& type_ids, 189 std::vector<FormWidgetInfo>* widget_infos); 190 191 // Perform a click at |point| on the page. Any focus in the document 192 // resulting from this operation will be killed before returning. No-op if 193 // no widget present at |point| or widget cannot be edited. Returns true if 194 // click was performed. |point| is in device coordinates. 195 bool ClickOnPoint(Point_i point); 196 197 // Set the value text of the widget at |annotation_index| on page. No-op if 198 // no widget present or widget cannot be edited. Returns true if text was 199 // set, false otherwise. 200 bool SetFormFieldText(int annotation_index, std::string_view text); 201 202 // Set the |selected_indices| for the choice widget at |annotation_index| as 203 // selected and deselect all other indices. No-op if no widget present or 204 // widget cannot be edited. Returns true if indices were set, false otherwise. 205 bool SetChoiceSelection(int annotation_index, std::span<const int> selected_indices); 206 207 // Informs the page that the |rect| of the page bitmap has been invalidated. 208 // This takes place following form filling operations. |Rect| must be in page 209 // coordinates. 210 void NotifyInvalidRect(Rectangle_i rect); 211 212 // Return whether or not an area of the bitmap has been invalidated. 213 bool HasInvalidRect(); 214 215 // Returns the area of the page that has been invalidated and resets the 216 // field. Rect returned in device coordinates. 217 Rectangle_i ConsumeInvalidRect(); 218 219 // Returns FPDF_PAGE. This Page retains ownership. All operations that wish 220 // to access FPDF_PAGE should to call methods of this class instead of 221 // requesting the FPDF_PAGE directly through this method. 222 void* page(); 223 224 private: 225 // Convenience methods to access the variables dependent on an initialized 226 // ScopedFPDFTextPage. We lazy init text_page_ for efficiency because many 227 // page operations do not require it. 228 FPDF_TEXTPAGE text_page(); 229 int first_printable_char_index(); 230 int last_printable_char_index(); 231 232 // Check that text_page_ and first/last_printable_char_index_ have been 233 // initialized and do so if not. 234 void EnsureTextPageInitialized(); 235 236 // Android bitmaps are in ARGB order. pdfClient emits bitmaps which have red and 237 // blue swapped when treated as Android bitmaps - but this function fixes it. 238 // NOTE: This might rely on little-endian architecture. 239 void InPlaceSwapRedBlueChannels(void* pixels, const int num_pixels) const; 240 241 // Looks for an instance of the given UTF32 string on the given page, starting 242 // not before the page_start index and ending before the page_stop index. 243 // If found, returns true and updates the TextRange. Case/accent insensitive. 244 bool FindMatch(const std::u32string& query, const int page_start, const int page_stop, 245 TextRange* match); 246 247 // Checks if the page matches the given UTF32 string at the given match_start 248 // index that ends before the page_stop index. If it matches, returns true 249 // and updates the TextRange. Case/accent insensitive. 250 bool IsMatch(const std::u32string& query, const int match_start, const int page_stop, 251 TextRange* match); 252 253 // Returns a SelectionBoundary at a particular index - 0 means before the char 254 // at index 0, 1 means after char 0 but before the char at index 1, and so on. 255 SelectionBoundary GetBoundaryAtIndex(const int index); 256 257 // Returns whether text is flowing left or right at a particular index. 258 bool IsRtlAtIndex(const int index); 259 260 // Returns a SelectionBoundary at a particular index, once we already know 261 // which way the text is flowing at that index. 262 SelectionBoundary GetBoundaryAtIndex(const int index, bool is_rtl); 263 264 // Returns a SelectionBoundary as near as possible to the given point. 265 SelectionBoundary GetBoundaryAtPoint(const Point_i& point); 266 267 // Given a boundary index to the middle or either end of a word, returns 268 // the boundary index of the start of that word - which is the index of the 269 // first char that is part of that word. 270 int GetWordStartIndex(const int index); 271 272 // Given a boundary index to the middle or either end of a word, returns 273 // the boundary index of the stop of that word - which is the index of the 274 // first char that is immediately after that word, but not part of it. 275 int GetWordStopIndex(const int index); 276 277 // Returns the rectangle that bounds the given char - page transform is not 278 // yet applied, must be applied later. 279 Rectangle_d GetRawCharBounds(int char_index); 280 281 // Returns the rectangle that bounds the given char, with the page transform 282 // already applied. 283 Rectangle_i GetCharBounds(int char_index); 284 285 // Returns the origin of the given char, with the page transform applied. 286 Point_i GetCharOrigin(int char_index); 287 288 // Get the URLs and bounding rectangles for annotation links only - text 289 // that has been annotated to link to some URL. 290 int GetAnnotatedLinksUtf8(std::vector<Rectangle_i>* rects, std::vector<int>* link_to_rect, 291 std::vector<std::string>* urls) const; 292 293 // Get the URLs and bounding rectangles for inferred links only - text that 294 // we recognize as a potential link since it starts with http:// or similar. 295 int GetInferredLinksUtf8(std::vector<Rectangle_i>* rects, std::vector<int>* link_to_rect, 296 std::vector<std::string>* urls) const; 297 298 bool IsGotoLink(FPDF_LINK link) const; 299 300 bool IsUrlLink(FPDF_LINK link) const; 301 302 // Get the URL of the given link, in UTF-8. 303 std::string GetUrlUtf8(FPDF_LINK link) const; 304 305 // Get the bounds of the given link, in page co-ordinates. 306 Rectangle_i GetRect(FPDF_LINK link) const; 307 308 FPDF_DOCUMENT document_; // Not owned. 309 310 ScopedFPDFPage page_; 311 312 FormFiller* const form_filler_; // Not owned. 313 314 // these variables lazily initialized, should be accessed via corresponding 315 // accessor methods 316 ScopedFPDFTextPage text_page_; 317 int first_printable_char_index_; 318 int last_printable_char_index_; 319 320 // Rectangle representing an area of the bitmap for this page that has been 321 // reported as invalidated. Will be coalesced from all rectangles that are 322 // reported as invalidated since the last time this rectangle was consumed. 323 // Rectangles are invalidated due to form filling operations. 324 // Rectangle is in Device Coordinates. 325 Rectangle_i invalid_rect_; 326 }; 327 328 } // namespace pdfClient 329 330 #endif // MEDIAPROVIDER_PDF_JNI_PDFCLIENT_PAGE_H_