1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "public/fpdf_text.h"
8 
9 #include <algorithm>
10 #include <vector>
11 
12 #include "core/fpdfapi/page/cpdf_page.h"
13 #include "core/fpdfdoc/cpdf_viewerpreferences.h"
14 #include "core/fpdftext/cpdf_linkextract.h"
15 #include "core/fpdftext/cpdf_textpage.h"
16 #include "core/fpdftext/cpdf_textpagefind.h"
17 #include "fpdfsdk/fsdk_define.h"
18 #include "third_party/base/numerics/safe_conversions.h"
19 #include "third_party/base/stl_util.h"
20 
21 #ifdef PDF_ENABLE_XFA
22 #include "fpdfsdk/fpdfxfa/cpdfxfa_context.h"
23 #include "fpdfsdk/fpdfxfa/cpdfxfa_page.h"
24 #endif  // PDF_ENABLE_XFA
25 
26 #ifdef _WIN32
27 #include <tchar.h>
28 #endif
29 
30 namespace {
31 
CPDFTextPageFromFPDFTextPage(FPDF_TEXTPAGE text_page)32 CPDF_TextPage* CPDFTextPageFromFPDFTextPage(FPDF_TEXTPAGE text_page) {
33   return static_cast<CPDF_TextPage*>(text_page);
34 }
35 
CPDFTextPageFindFromFPDFSchHandle(FPDF_SCHHANDLE handle)36 CPDF_TextPageFind* CPDFTextPageFindFromFPDFSchHandle(FPDF_SCHHANDLE handle) {
37   return static_cast<CPDF_TextPageFind*>(handle);
38 }
39 
CPDFLinkExtractFromFPDFPageLink(FPDF_PAGELINK link)40 CPDF_LinkExtract* CPDFLinkExtractFromFPDFPageLink(FPDF_PAGELINK link) {
41   return static_cast<CPDF_LinkExtract*>(link);
42 }
43 
44 }  // namespace
45 
FPDFText_LoadPage(FPDF_PAGE page)46 DLLEXPORT FPDF_TEXTPAGE STDCALL FPDFText_LoadPage(FPDF_PAGE page) {
47   CPDF_Page* pPDFPage = CPDFPageFromFPDFPage(page);
48   if (!pPDFPage)
49     return nullptr;
50 
51 #ifdef PDF_ENABLE_XFA
52   CPDFXFA_Page* pPage = (CPDFXFA_Page*)page;
53   CPDFXFA_Context* pContext = pPage->GetContext();
54   CPDF_ViewerPreferences viewRef(pContext->GetPDFDoc());
55 #else  // PDF_ENABLE_XFA
56   CPDF_ViewerPreferences viewRef(pPDFPage->m_pDocument);
57 #endif  // PDF_ENABLE_XFA
58 
59   CPDF_TextPage* textpage = new CPDF_TextPage(
60       pPDFPage, viewRef.IsDirectionR2L() ? FPDFText_Direction::Right
61                                          : FPDFText_Direction::Left);
62   textpage->ParseTextPage();
63   return textpage;
64 }
65 
FPDFText_ClosePage(FPDF_TEXTPAGE text_page)66 DLLEXPORT void STDCALL FPDFText_ClosePage(FPDF_TEXTPAGE text_page) {
67   delete CPDFTextPageFromFPDFTextPage(text_page);
68 }
69 
FPDFText_CountChars(FPDF_TEXTPAGE text_page)70 DLLEXPORT int STDCALL FPDFText_CountChars(FPDF_TEXTPAGE text_page) {
71   if (!text_page)
72     return -1;
73 
74   CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
75   return textpage->CountChars();
76 }
77 
FPDFText_GetUnicode(FPDF_TEXTPAGE text_page,int index)78 DLLEXPORT unsigned int STDCALL FPDFText_GetUnicode(FPDF_TEXTPAGE text_page,
79                                                    int index) {
80   if (!text_page)
81     return 0;
82 
83   CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
84   if (index < 0 || index >= textpage->CountChars())
85     return 0;
86 
87   FPDF_CHAR_INFO charinfo;
88   textpage->GetCharInfo(index, &charinfo);
89   return charinfo.m_Unicode;
90 }
91 
FPDFText_GetFontSize(FPDF_TEXTPAGE text_page,int index)92 DLLEXPORT double STDCALL FPDFText_GetFontSize(FPDF_TEXTPAGE text_page,
93                                               int index) {
94   if (!text_page)
95     return 0;
96   CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
97 
98   if (index < 0 || index >= textpage->CountChars())
99     return 0;
100 
101   FPDF_CHAR_INFO charinfo;
102   textpage->GetCharInfo(index, &charinfo);
103   return charinfo.m_FontSize;
104 }
105 
FPDFText_GetCharBox(FPDF_TEXTPAGE text_page,int index,double * left,double * right,double * bottom,double * top)106 DLLEXPORT void STDCALL FPDFText_GetCharBox(FPDF_TEXTPAGE text_page,
107                                            int index,
108                                            double* left,
109                                            double* right,
110                                            double* bottom,
111                                            double* top) {
112   if (!text_page)
113     return;
114   CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
115 
116   if (index < 0 || index >= textpage->CountChars())
117     return;
118   FPDF_CHAR_INFO charinfo;
119   textpage->GetCharInfo(index, &charinfo);
120   *left = charinfo.m_CharBox.left;
121   *right = charinfo.m_CharBox.right;
122   *bottom = charinfo.m_CharBox.bottom;
123   *top = charinfo.m_CharBox.top;
124 }
125 
126 // select
FPDFText_GetCharIndexAtPos(FPDF_TEXTPAGE text_page,double x,double y,double xTolerance,double yTolerance)127 DLLEXPORT int STDCALL FPDFText_GetCharIndexAtPos(FPDF_TEXTPAGE text_page,
128                                                  double x,
129                                                  double y,
130                                                  double xTolerance,
131                                                  double yTolerance) {
132   if (!text_page)
133     return -3;
134 
135   CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
136   return textpage->GetIndexAtPos(
137       CFX_PointF(static_cast<FX_FLOAT>(x), static_cast<FX_FLOAT>(y)),
138       CFX_SizeF(static_cast<FX_FLOAT>(xTolerance),
139                 static_cast<FX_FLOAT>(yTolerance)));
140 }
141 
FPDFText_GetText(FPDF_TEXTPAGE text_page,int start,int count,unsigned short * result)142 DLLEXPORT int STDCALL FPDFText_GetText(FPDF_TEXTPAGE text_page,
143                                        int start,
144                                        int count,
145                                        unsigned short* result) {
146   if (!text_page)
147     return 0;
148 
149   CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
150   if (start >= textpage->CountChars())
151     return 0;
152 
153   CFX_WideString str = textpage->GetPageText(start, count);
154   if (str.GetLength() > count)
155     str = str.Left(count);
156 
157   CFX_ByteString cbUTF16str = str.UTF16LE_Encode();
158   FXSYS_memcpy(result, cbUTF16str.GetBuffer(cbUTF16str.GetLength()),
159                cbUTF16str.GetLength());
160   cbUTF16str.ReleaseBuffer(cbUTF16str.GetLength());
161 
162   return cbUTF16str.GetLength() / sizeof(unsigned short);
163 }
164 
FPDFText_CountRects(FPDF_TEXTPAGE text_page,int start,int count)165 DLLEXPORT int STDCALL FPDFText_CountRects(FPDF_TEXTPAGE text_page,
166                                           int start,
167                                           int count) {
168   if (!text_page)
169     return 0;
170 
171   CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
172   return textpage->CountRects(start, count);
173 }
174 
FPDFText_GetRect(FPDF_TEXTPAGE text_page,int rect_index,double * left,double * top,double * right,double * bottom)175 DLLEXPORT void STDCALL FPDFText_GetRect(FPDF_TEXTPAGE text_page,
176                                         int rect_index,
177                                         double* left,
178                                         double* top,
179                                         double* right,
180                                         double* bottom) {
181   if (!text_page)
182     return;
183 
184   CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
185   CFX_FloatRect rect;
186   textpage->GetRect(rect_index, rect.left, rect.top, rect.right, rect.bottom);
187   *left = rect.left;
188   *top = rect.top;
189   *right = rect.right;
190   *bottom = rect.bottom;
191 }
192 
FPDFText_GetBoundedText(FPDF_TEXTPAGE text_page,double left,double top,double right,double bottom,unsigned short * buffer,int buflen)193 DLLEXPORT int STDCALL FPDFText_GetBoundedText(FPDF_TEXTPAGE text_page,
194                                               double left,
195                                               double top,
196                                               double right,
197                                               double bottom,
198                                               unsigned short* buffer,
199                                               int buflen) {
200   if (!text_page)
201     return 0;
202 
203   CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
204   CFX_FloatRect rect((FX_FLOAT)left, (FX_FLOAT)bottom, (FX_FLOAT)right,
205                      (FX_FLOAT)top);
206   CFX_WideString str = textpage->GetTextByRect(rect);
207 
208   if (buflen <= 0 || !buffer)
209     return str.GetLength();
210 
211   CFX_ByteString cbUTF16Str = str.UTF16LE_Encode();
212   int len = cbUTF16Str.GetLength() / sizeof(unsigned short);
213   int size = buflen > len ? len : buflen;
214   FXSYS_memcpy(buffer, cbUTF16Str.GetBuffer(size * sizeof(unsigned short)),
215                size * sizeof(unsigned short));
216   cbUTF16Str.ReleaseBuffer(size * sizeof(unsigned short));
217 
218   return size;
219 }
220 
221 // Search
222 // -1 for end
FPDFText_FindStart(FPDF_TEXTPAGE text_page,FPDF_WIDESTRING findwhat,unsigned long flags,int start_index)223 DLLEXPORT FPDF_SCHHANDLE STDCALL FPDFText_FindStart(FPDF_TEXTPAGE text_page,
224                                                     FPDF_WIDESTRING findwhat,
225                                                     unsigned long flags,
226                                                     int start_index) {
227   if (!text_page)
228     return nullptr;
229 
230   CPDF_TextPageFind* textpageFind =
231       new CPDF_TextPageFind(CPDFTextPageFromFPDFTextPage(text_page));
232   FX_STRSIZE len = CFX_WideString::WStringLength(findwhat);
233   textpageFind->FindFirst(CFX_WideString::FromUTF16LE(findwhat, len), flags,
234                           start_index);
235   return textpageFind;
236 }
237 
FPDFText_FindNext(FPDF_SCHHANDLE handle)238 DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindNext(FPDF_SCHHANDLE handle) {
239   if (!handle)
240     return false;
241 
242   CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle);
243   return textpageFind->FindNext();
244 }
245 
FPDFText_FindPrev(FPDF_SCHHANDLE handle)246 DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindPrev(FPDF_SCHHANDLE handle) {
247   if (!handle)
248     return false;
249 
250   CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle);
251   return textpageFind->FindPrev();
252 }
253 
FPDFText_GetSchResultIndex(FPDF_SCHHANDLE handle)254 DLLEXPORT int STDCALL FPDFText_GetSchResultIndex(FPDF_SCHHANDLE handle) {
255   if (!handle)
256     return 0;
257 
258   CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle);
259   return textpageFind->GetCurOrder();
260 }
261 
FPDFText_GetSchCount(FPDF_SCHHANDLE handle)262 DLLEXPORT int STDCALL FPDFText_GetSchCount(FPDF_SCHHANDLE handle) {
263   if (!handle)
264     return 0;
265 
266   CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle);
267   return textpageFind->GetMatchedCount();
268 }
269 
FPDFText_FindClose(FPDF_SCHHANDLE handle)270 DLLEXPORT void STDCALL FPDFText_FindClose(FPDF_SCHHANDLE handle) {
271   if (!handle)
272     return;
273 
274   CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle);
275   delete textpageFind;
276   handle = nullptr;
277 }
278 
279 // web link
FPDFLink_LoadWebLinks(FPDF_TEXTPAGE text_page)280 DLLEXPORT FPDF_PAGELINK STDCALL FPDFLink_LoadWebLinks(FPDF_TEXTPAGE text_page) {
281   if (!text_page)
282     return nullptr;
283 
284   CPDF_LinkExtract* pageLink =
285       new CPDF_LinkExtract(CPDFTextPageFromFPDFTextPage(text_page));
286   pageLink->ExtractLinks();
287   return pageLink;
288 }
289 
FPDFLink_CountWebLinks(FPDF_PAGELINK link_page)290 DLLEXPORT int STDCALL FPDFLink_CountWebLinks(FPDF_PAGELINK link_page) {
291   if (!link_page)
292     return 0;
293 
294   CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page);
295   return pdfium::base::checked_cast<int>(pageLink->CountLinks());
296 }
297 
FPDFLink_GetURL(FPDF_PAGELINK link_page,int link_index,unsigned short * buffer,int buflen)298 DLLEXPORT int STDCALL FPDFLink_GetURL(FPDF_PAGELINK link_page,
299                                       int link_index,
300                                       unsigned short* buffer,
301                                       int buflen) {
302   CFX_WideString wsUrl(L"");
303   if (link_page && link_index >= 0) {
304     CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page);
305     wsUrl = pageLink->GetURL(link_index);
306   }
307   CFX_ByteString cbUTF16URL = wsUrl.UTF16LE_Encode();
308   int required = cbUTF16URL.GetLength() / sizeof(unsigned short);
309   if (!buffer || buflen <= 0)
310     return required;
311 
312   int size = std::min(required, buflen);
313   if (size > 0) {
314     int buf_size = size * sizeof(unsigned short);
315     FXSYS_memcpy(buffer, cbUTF16URL.GetBuffer(buf_size), buf_size);
316   }
317   return size;
318 }
319 
FPDFLink_CountRects(FPDF_PAGELINK link_page,int link_index)320 DLLEXPORT int STDCALL FPDFLink_CountRects(FPDF_PAGELINK link_page,
321                                           int link_index) {
322   if (!link_page || link_index < 0)
323     return 0;
324 
325   CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page);
326   return pdfium::CollectionSize<int>(pageLink->GetRects(link_index));
327 }
328 
FPDFLink_GetRect(FPDF_PAGELINK link_page,int link_index,int rect_index,double * left,double * top,double * right,double * bottom)329 DLLEXPORT void STDCALL FPDFLink_GetRect(FPDF_PAGELINK link_page,
330                                         int link_index,
331                                         int rect_index,
332                                         double* left,
333                                         double* top,
334                                         double* right,
335                                         double* bottom) {
336   if (!link_page || link_index < 0 || rect_index < 0)
337     return;
338 
339   CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page);
340   std::vector<CFX_FloatRect> rectArray = pageLink->GetRects(link_index);
341   if (rect_index >= pdfium::CollectionSize<int>(rectArray))
342     return;
343 
344   *left = rectArray[rect_index].left;
345   *right = rectArray[rect_index].right;
346   *top = rectArray[rect_index].top;
347   *bottom = rectArray[rect_index].bottom;
348 }
349 
FPDFLink_CloseWebLinks(FPDF_PAGELINK link_page)350 DLLEXPORT void STDCALL FPDFLink_CloseWebLinks(FPDF_PAGELINK link_page) {
351   delete CPDFLinkExtractFromFPDFPageLink(link_page);
352 }
353