1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "public/fpdf_text.h"
8 
9 #include "core/include/fpdfdoc/fpdf_doc.h"
10 #include "core/include/fpdftext/fpdf_text.h"
11 #include "fpdfsdk/include/fsdk_define.h"
12 
13 #ifdef PDF_ENABLE_XFA
14 #include "fpdfsdk/include/fpdfxfa/fpdfxfa_doc.h"
15 #include "fpdfsdk/include/fpdfxfa/fpdfxfa_page.h"
16 #endif  // PDF_ENABLE_XFA
17 
18 #ifdef _WIN32
19 #include <tchar.h>
20 #endif
21 
FPDFText_LoadPage(FPDF_PAGE page)22 DLLEXPORT FPDF_TEXTPAGE STDCALL FPDFText_LoadPage(FPDF_PAGE page) {
23   CPDF_Page* pPDFPage = CPDFPageFromFPDFPage(page);
24   if (!pPDFPage)
25     return nullptr;
26 #ifdef PDF_ENABLE_XFA
27   CPDFXFA_Page* pPage = (CPDFXFA_Page*)page;
28   CPDFXFA_Document* pDoc = pPage->GetDocument();
29   CPDF_ViewerPreferences viewRef(pDoc->GetPDFDoc());
30 #else  // PDF_ENABLE_XFA
31   CPDF_ViewerPreferences viewRef(pPDFPage->m_pDocument);
32 #endif  // PDF_ENABLE_XFA
33   IPDF_TextPage* textpage =
34       IPDF_TextPage::CreateTextPage(pPDFPage, viewRef.IsDirectionR2L());
35   textpage->ParseTextPage();
36   return textpage;
37 }
FPDFText_ClosePage(FPDF_TEXTPAGE text_page)38 DLLEXPORT void STDCALL FPDFText_ClosePage(FPDF_TEXTPAGE text_page) {
39   delete (IPDF_TextPage*)text_page;
40 }
FPDFText_CountChars(FPDF_TEXTPAGE text_page)41 DLLEXPORT int STDCALL FPDFText_CountChars(FPDF_TEXTPAGE text_page) {
42   if (!text_page)
43     return -1;
44   IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
45   return textpage->CountChars();
46 }
47 
FPDFText_GetUnicode(FPDF_TEXTPAGE text_page,int index)48 DLLEXPORT unsigned int STDCALL FPDFText_GetUnicode(FPDF_TEXTPAGE text_page,
49                                                    int index) {
50   if (!text_page)
51     return -1;
52   IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
53 
54   if (index < 0 || index >= textpage->CountChars())
55     return 0;
56 
57   FPDF_CHAR_INFO charinfo;
58   textpage->GetCharInfo(index, &charinfo);
59   return charinfo.m_Unicode;
60 }
61 
FPDFText_GetFontSize(FPDF_TEXTPAGE text_page,int index)62 DLLEXPORT double STDCALL FPDFText_GetFontSize(FPDF_TEXTPAGE text_page,
63                                               int index) {
64   if (!text_page)
65     return 0;
66   IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
67 
68   if (index < 0 || index >= textpage->CountChars())
69     return 0;
70 
71   FPDF_CHAR_INFO charinfo;
72   textpage->GetCharInfo(index, &charinfo);
73   return charinfo.m_FontSize;
74 }
75 
FPDFText_GetCharBox(FPDF_TEXTPAGE text_page,int index,double * left,double * right,double * bottom,double * top)76 DLLEXPORT void STDCALL FPDFText_GetCharBox(FPDF_TEXTPAGE text_page,
77                                            int index,
78                                            double* left,
79                                            double* right,
80                                            double* bottom,
81                                            double* top) {
82   if (!text_page)
83     return;
84   IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
85 
86   if (index < 0 || index >= textpage->CountChars())
87     return;
88   FPDF_CHAR_INFO charinfo;
89   textpage->GetCharInfo(index, &charinfo);
90   *left = charinfo.m_CharBox.left;
91   *right = charinfo.m_CharBox.right;
92   *bottom = charinfo.m_CharBox.bottom;
93   *top = charinfo.m_CharBox.top;
94 }
95 
96 // select
FPDFText_GetCharIndexAtPos(FPDF_TEXTPAGE text_page,double x,double y,double xTolerance,double yTolerance)97 DLLEXPORT int STDCALL FPDFText_GetCharIndexAtPos(FPDF_TEXTPAGE text_page,
98                                                  double x,
99                                                  double y,
100                                                  double xTolerance,
101                                                  double yTolerance) {
102   if (!text_page)
103     return -3;
104   IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
105   return textpage->GetIndexAtPos((FX_FLOAT)x, (FX_FLOAT)y, (FX_FLOAT)xTolerance,
106                                  (FX_FLOAT)yTolerance);
107 }
108 
FPDFText_GetText(FPDF_TEXTPAGE text_page,int start,int count,unsigned short * result)109 DLLEXPORT int STDCALL FPDFText_GetText(FPDF_TEXTPAGE text_page,
110                                        int start,
111                                        int count,
112                                        unsigned short* result) {
113   if (!text_page)
114     return 0;
115   IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
116 
117   if (start >= textpage->CountChars())
118     return 0;
119 
120   CFX_WideString str = textpage->GetPageText(start, count);
121   if (str.GetLength() > count)
122     str = str.Left(count);
123 
124   CFX_ByteString cbUTF16str = str.UTF16LE_Encode();
125   FXSYS_memcpy(result, cbUTF16str.GetBuffer(cbUTF16str.GetLength()),
126                cbUTF16str.GetLength());
127   cbUTF16str.ReleaseBuffer(cbUTF16str.GetLength());
128 
129   return cbUTF16str.GetLength() / sizeof(unsigned short);
130 }
131 
FPDFText_CountRects(FPDF_TEXTPAGE text_page,int start,int count)132 DLLEXPORT int STDCALL FPDFText_CountRects(FPDF_TEXTPAGE text_page,
133                                           int start,
134                                           int count) {
135   if (!text_page)
136     return 0;
137   IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
138   return textpage->CountRects(start, count);
139 }
FPDFText_GetRect(FPDF_TEXTPAGE text_page,int rect_index,double * left,double * top,double * right,double * bottom)140 DLLEXPORT void STDCALL FPDFText_GetRect(FPDF_TEXTPAGE text_page,
141                                         int rect_index,
142                                         double* left,
143                                         double* top,
144                                         double* right,
145                                         double* bottom) {
146   if (!text_page)
147     return;
148   IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
149   CFX_FloatRect rect;
150   textpage->GetRect(rect_index, rect.left, rect.top, rect.right, rect.bottom);
151   *left = rect.left;
152   *top = rect.top;
153   *right = rect.right;
154   *bottom = rect.bottom;
155 }
156 
FPDFText_GetBoundedText(FPDF_TEXTPAGE text_page,double left,double top,double right,double bottom,unsigned short * buffer,int buflen)157 DLLEXPORT int STDCALL FPDFText_GetBoundedText(FPDF_TEXTPAGE text_page,
158                                               double left,
159                                               double top,
160                                               double right,
161                                               double bottom,
162                                               unsigned short* buffer,
163                                               int buflen) {
164   if (!text_page)
165     return 0;
166   IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
167   CFX_FloatRect rect((FX_FLOAT)left, (FX_FLOAT)bottom, (FX_FLOAT)right,
168                      (FX_FLOAT)top);
169   CFX_WideString str = textpage->GetTextByRect(rect);
170 
171   if (buflen <= 0 || !buffer) {
172     return str.GetLength();
173   }
174 
175   CFX_ByteString cbUTF16Str = str.UTF16LE_Encode();
176   int len = cbUTF16Str.GetLength() / sizeof(unsigned short);
177   int size = buflen > len ? len : buflen;
178   FXSYS_memcpy(buffer, cbUTF16Str.GetBuffer(size * sizeof(unsigned short)),
179                size * sizeof(unsigned short));
180   cbUTF16Str.ReleaseBuffer(size * sizeof(unsigned short));
181 
182   return size;
183 }
184 
185 // Search
186 //-1 for end
FPDFText_FindStart(FPDF_TEXTPAGE text_page,FPDF_WIDESTRING findwhat,unsigned long flags,int start_index)187 DLLEXPORT FPDF_SCHHANDLE STDCALL FPDFText_FindStart(FPDF_TEXTPAGE text_page,
188                                                     FPDF_WIDESTRING findwhat,
189                                                     unsigned long flags,
190                                                     int start_index) {
191   if (!text_page)
192     return NULL;
193   IPDF_TextPageFind* textpageFind = NULL;
194   textpageFind = IPDF_TextPageFind::CreatePageFind((IPDF_TextPage*)text_page);
195   FX_STRSIZE len = CFX_WideString::WStringLength(findwhat);
196   textpageFind->FindFirst(CFX_WideString::FromUTF16LE(findwhat, len), flags,
197                           start_index);
198   return textpageFind;
199 }
FPDFText_FindNext(FPDF_SCHHANDLE handle)200 DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindNext(FPDF_SCHHANDLE handle) {
201   if (!handle)
202     return FALSE;
203   IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
204   return textpageFind->FindNext();
205 }
FPDFText_FindPrev(FPDF_SCHHANDLE handle)206 DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindPrev(FPDF_SCHHANDLE handle) {
207   if (!handle)
208     return FALSE;
209   IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
210   return textpageFind->FindPrev();
211 }
FPDFText_GetSchResultIndex(FPDF_SCHHANDLE handle)212 DLLEXPORT int STDCALL FPDFText_GetSchResultIndex(FPDF_SCHHANDLE handle) {
213   if (!handle)
214     return 0;
215   IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
216   return textpageFind->GetCurOrder();
217 }
FPDFText_GetSchCount(FPDF_SCHHANDLE handle)218 DLLEXPORT int STDCALL FPDFText_GetSchCount(FPDF_SCHHANDLE handle) {
219   if (!handle)
220     return 0;
221   IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
222   return textpageFind->GetMatchedCount();
223 }
FPDFText_FindClose(FPDF_SCHHANDLE handle)224 DLLEXPORT void STDCALL FPDFText_FindClose(FPDF_SCHHANDLE handle) {
225   if (!handle)
226     return;
227   IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
228   delete textpageFind;
229   handle = NULL;
230 }
231 
232 // web link
FPDFLink_LoadWebLinks(FPDF_TEXTPAGE text_page)233 DLLEXPORT FPDF_PAGELINK STDCALL FPDFLink_LoadWebLinks(FPDF_TEXTPAGE text_page) {
234   if (!text_page)
235     return NULL;
236   IPDF_LinkExtract* pageLink = NULL;
237   pageLink = IPDF_LinkExtract::CreateLinkExtract();
238   pageLink->ExtractLinks((IPDF_TextPage*)text_page);
239   return pageLink;
240 }
FPDFLink_CountWebLinks(FPDF_PAGELINK link_page)241 DLLEXPORT int STDCALL FPDFLink_CountWebLinks(FPDF_PAGELINK link_page) {
242   if (!link_page)
243     return 0;
244   IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page;
245   return pageLink->CountLinks();
246 }
FPDFLink_GetURL(FPDF_PAGELINK link_page,int link_index,unsigned short * buffer,int buflen)247 DLLEXPORT int STDCALL FPDFLink_GetURL(FPDF_PAGELINK link_page,
248                                       int link_index,
249                                       unsigned short* buffer,
250                                       int buflen) {
251   if (!link_page)
252     return 0;
253   IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page;
254   CFX_WideString url = pageLink->GetURL(link_index);
255 
256   CFX_ByteString cbUTF16URL = url.UTF16LE_Encode();
257   int len = cbUTF16URL.GetLength() / sizeof(unsigned short);
258   if (!buffer || buflen <= 0)
259     return len;
260   int size = len < buflen ? len : buflen;
261   if (size > 0) {
262     FXSYS_memcpy(buffer, cbUTF16URL.GetBuffer(size * sizeof(unsigned short)),
263                  size * sizeof(unsigned short));
264     cbUTF16URL.ReleaseBuffer(size * sizeof(unsigned short));
265   }
266   return size;
267 }
FPDFLink_CountRects(FPDF_PAGELINK link_page,int link_index)268 DLLEXPORT int STDCALL FPDFLink_CountRects(FPDF_PAGELINK link_page,
269                                           int link_index) {
270   if (!link_page)
271     return 0;
272   IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page;
273   CFX_RectArray rectArray;
274   pageLink->GetRects(link_index, rectArray);
275   return rectArray.GetSize();
276 }
FPDFLink_GetRect(FPDF_PAGELINK link_page,int link_index,int rect_index,double * left,double * top,double * right,double * bottom)277 DLLEXPORT void STDCALL FPDFLink_GetRect(FPDF_PAGELINK link_page,
278                                         int link_index,
279                                         int rect_index,
280                                         double* left,
281                                         double* top,
282                                         double* right,
283                                         double* bottom) {
284   if (!link_page)
285     return;
286   IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page;
287   CFX_RectArray rectArray;
288   pageLink->GetRects(link_index, rectArray);
289   if (rect_index >= 0 && rect_index < rectArray.GetSize()) {
290     CFX_FloatRect rect = rectArray.GetAt(rect_index);
291     *left = rect.left;
292     *right = rect.right;
293     *top = rect.top;
294     *bottom = rect.bottom;
295   }
296 }
FPDFLink_CloseWebLinks(FPDF_PAGELINK link_page)297 DLLEXPORT void STDCALL FPDFLink_CloseWebLinks(FPDF_PAGELINK link_page) {
298   delete (IPDF_LinkExtract*)link_page;
299 }
300