1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "public/fpdf_text.h"
8
9 #include "core/include/fpdfdoc/fpdf_doc.h"
10 #include "core/include/fpdftext/fpdf_text.h"
11 #include "fpdfsdk/include/fsdk_define.h"
12
13 #ifdef PDF_ENABLE_XFA
14 #include "fpdfsdk/include/fpdfxfa/fpdfxfa_doc.h"
15 #include "fpdfsdk/include/fpdfxfa/fpdfxfa_page.h"
16 #endif // PDF_ENABLE_XFA
17
18 #ifdef _WIN32
19 #include <tchar.h>
20 #endif
21
FPDFText_LoadPage(FPDF_PAGE page)22 DLLEXPORT FPDF_TEXTPAGE STDCALL FPDFText_LoadPage(FPDF_PAGE page) {
23 CPDF_Page* pPDFPage = CPDFPageFromFPDFPage(page);
24 if (!pPDFPage)
25 return nullptr;
26 #ifdef PDF_ENABLE_XFA
27 CPDFXFA_Page* pPage = (CPDFXFA_Page*)page;
28 CPDFXFA_Document* pDoc = pPage->GetDocument();
29 CPDF_ViewerPreferences viewRef(pDoc->GetPDFDoc());
30 #else // PDF_ENABLE_XFA
31 CPDF_ViewerPreferences viewRef(pPDFPage->m_pDocument);
32 #endif // PDF_ENABLE_XFA
33 IPDF_TextPage* textpage =
34 IPDF_TextPage::CreateTextPage(pPDFPage, viewRef.IsDirectionR2L());
35 textpage->ParseTextPage();
36 return textpage;
37 }
FPDFText_ClosePage(FPDF_TEXTPAGE text_page)38 DLLEXPORT void STDCALL FPDFText_ClosePage(FPDF_TEXTPAGE text_page) {
39 delete (IPDF_TextPage*)text_page;
40 }
FPDFText_CountChars(FPDF_TEXTPAGE text_page)41 DLLEXPORT int STDCALL FPDFText_CountChars(FPDF_TEXTPAGE text_page) {
42 if (!text_page)
43 return -1;
44 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
45 return textpage->CountChars();
46 }
47
FPDFText_GetUnicode(FPDF_TEXTPAGE text_page,int index)48 DLLEXPORT unsigned int STDCALL FPDFText_GetUnicode(FPDF_TEXTPAGE text_page,
49 int index) {
50 if (!text_page)
51 return -1;
52 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
53
54 if (index < 0 || index >= textpage->CountChars())
55 return 0;
56
57 FPDF_CHAR_INFO charinfo;
58 textpage->GetCharInfo(index, &charinfo);
59 return charinfo.m_Unicode;
60 }
61
FPDFText_GetFontSize(FPDF_TEXTPAGE text_page,int index)62 DLLEXPORT double STDCALL FPDFText_GetFontSize(FPDF_TEXTPAGE text_page,
63 int index) {
64 if (!text_page)
65 return 0;
66 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
67
68 if (index < 0 || index >= textpage->CountChars())
69 return 0;
70
71 FPDF_CHAR_INFO charinfo;
72 textpage->GetCharInfo(index, &charinfo);
73 return charinfo.m_FontSize;
74 }
75
FPDFText_GetCharBox(FPDF_TEXTPAGE text_page,int index,double * left,double * right,double * bottom,double * top)76 DLLEXPORT void STDCALL FPDFText_GetCharBox(FPDF_TEXTPAGE text_page,
77 int index,
78 double* left,
79 double* right,
80 double* bottom,
81 double* top) {
82 if (!text_page)
83 return;
84 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
85
86 if (index < 0 || index >= textpage->CountChars())
87 return;
88 FPDF_CHAR_INFO charinfo;
89 textpage->GetCharInfo(index, &charinfo);
90 *left = charinfo.m_CharBox.left;
91 *right = charinfo.m_CharBox.right;
92 *bottom = charinfo.m_CharBox.bottom;
93 *top = charinfo.m_CharBox.top;
94 }
95
96 // select
FPDFText_GetCharIndexAtPos(FPDF_TEXTPAGE text_page,double x,double y,double xTolerance,double yTolerance)97 DLLEXPORT int STDCALL FPDFText_GetCharIndexAtPos(FPDF_TEXTPAGE text_page,
98 double x,
99 double y,
100 double xTolerance,
101 double yTolerance) {
102 if (!text_page)
103 return -3;
104 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
105 return textpage->GetIndexAtPos((FX_FLOAT)x, (FX_FLOAT)y, (FX_FLOAT)xTolerance,
106 (FX_FLOAT)yTolerance);
107 }
108
FPDFText_GetText(FPDF_TEXTPAGE text_page,int start,int count,unsigned short * result)109 DLLEXPORT int STDCALL FPDFText_GetText(FPDF_TEXTPAGE text_page,
110 int start,
111 int count,
112 unsigned short* result) {
113 if (!text_page)
114 return 0;
115 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
116
117 if (start >= textpage->CountChars())
118 return 0;
119
120 CFX_WideString str = textpage->GetPageText(start, count);
121 if (str.GetLength() > count)
122 str = str.Left(count);
123
124 CFX_ByteString cbUTF16str = str.UTF16LE_Encode();
125 FXSYS_memcpy(result, cbUTF16str.GetBuffer(cbUTF16str.GetLength()),
126 cbUTF16str.GetLength());
127 cbUTF16str.ReleaseBuffer(cbUTF16str.GetLength());
128
129 return cbUTF16str.GetLength() / sizeof(unsigned short);
130 }
131
FPDFText_CountRects(FPDF_TEXTPAGE text_page,int start,int count)132 DLLEXPORT int STDCALL FPDFText_CountRects(FPDF_TEXTPAGE text_page,
133 int start,
134 int count) {
135 if (!text_page)
136 return 0;
137 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
138 return textpage->CountRects(start, count);
139 }
FPDFText_GetRect(FPDF_TEXTPAGE text_page,int rect_index,double * left,double * top,double * right,double * bottom)140 DLLEXPORT void STDCALL FPDFText_GetRect(FPDF_TEXTPAGE text_page,
141 int rect_index,
142 double* left,
143 double* top,
144 double* right,
145 double* bottom) {
146 if (!text_page)
147 return;
148 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
149 CFX_FloatRect rect;
150 textpage->GetRect(rect_index, rect.left, rect.top, rect.right, rect.bottom);
151 *left = rect.left;
152 *top = rect.top;
153 *right = rect.right;
154 *bottom = rect.bottom;
155 }
156
FPDFText_GetBoundedText(FPDF_TEXTPAGE text_page,double left,double top,double right,double bottom,unsigned short * buffer,int buflen)157 DLLEXPORT int STDCALL FPDFText_GetBoundedText(FPDF_TEXTPAGE text_page,
158 double left,
159 double top,
160 double right,
161 double bottom,
162 unsigned short* buffer,
163 int buflen) {
164 if (!text_page)
165 return 0;
166 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
167 CFX_FloatRect rect((FX_FLOAT)left, (FX_FLOAT)bottom, (FX_FLOAT)right,
168 (FX_FLOAT)top);
169 CFX_WideString str = textpage->GetTextByRect(rect);
170
171 if (buflen <= 0 || !buffer) {
172 return str.GetLength();
173 }
174
175 CFX_ByteString cbUTF16Str = str.UTF16LE_Encode();
176 int len = cbUTF16Str.GetLength() / sizeof(unsigned short);
177 int size = buflen > len ? len : buflen;
178 FXSYS_memcpy(buffer, cbUTF16Str.GetBuffer(size * sizeof(unsigned short)),
179 size * sizeof(unsigned short));
180 cbUTF16Str.ReleaseBuffer(size * sizeof(unsigned short));
181
182 return size;
183 }
184
185 // Search
186 //-1 for end
FPDFText_FindStart(FPDF_TEXTPAGE text_page,FPDF_WIDESTRING findwhat,unsigned long flags,int start_index)187 DLLEXPORT FPDF_SCHHANDLE STDCALL FPDFText_FindStart(FPDF_TEXTPAGE text_page,
188 FPDF_WIDESTRING findwhat,
189 unsigned long flags,
190 int start_index) {
191 if (!text_page)
192 return NULL;
193 IPDF_TextPageFind* textpageFind = NULL;
194 textpageFind = IPDF_TextPageFind::CreatePageFind((IPDF_TextPage*)text_page);
195 FX_STRSIZE len = CFX_WideString::WStringLength(findwhat);
196 textpageFind->FindFirst(CFX_WideString::FromUTF16LE(findwhat, len), flags,
197 start_index);
198 return textpageFind;
199 }
FPDFText_FindNext(FPDF_SCHHANDLE handle)200 DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindNext(FPDF_SCHHANDLE handle) {
201 if (!handle)
202 return FALSE;
203 IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
204 return textpageFind->FindNext();
205 }
FPDFText_FindPrev(FPDF_SCHHANDLE handle)206 DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindPrev(FPDF_SCHHANDLE handle) {
207 if (!handle)
208 return FALSE;
209 IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
210 return textpageFind->FindPrev();
211 }
FPDFText_GetSchResultIndex(FPDF_SCHHANDLE handle)212 DLLEXPORT int STDCALL FPDFText_GetSchResultIndex(FPDF_SCHHANDLE handle) {
213 if (!handle)
214 return 0;
215 IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
216 return textpageFind->GetCurOrder();
217 }
FPDFText_GetSchCount(FPDF_SCHHANDLE handle)218 DLLEXPORT int STDCALL FPDFText_GetSchCount(FPDF_SCHHANDLE handle) {
219 if (!handle)
220 return 0;
221 IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
222 return textpageFind->GetMatchedCount();
223 }
FPDFText_FindClose(FPDF_SCHHANDLE handle)224 DLLEXPORT void STDCALL FPDFText_FindClose(FPDF_SCHHANDLE handle) {
225 if (!handle)
226 return;
227 IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
228 delete textpageFind;
229 handle = NULL;
230 }
231
232 // web link
FPDFLink_LoadWebLinks(FPDF_TEXTPAGE text_page)233 DLLEXPORT FPDF_PAGELINK STDCALL FPDFLink_LoadWebLinks(FPDF_TEXTPAGE text_page) {
234 if (!text_page)
235 return NULL;
236 IPDF_LinkExtract* pageLink = NULL;
237 pageLink = IPDF_LinkExtract::CreateLinkExtract();
238 pageLink->ExtractLinks((IPDF_TextPage*)text_page);
239 return pageLink;
240 }
FPDFLink_CountWebLinks(FPDF_PAGELINK link_page)241 DLLEXPORT int STDCALL FPDFLink_CountWebLinks(FPDF_PAGELINK link_page) {
242 if (!link_page)
243 return 0;
244 IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page;
245 return pageLink->CountLinks();
246 }
FPDFLink_GetURL(FPDF_PAGELINK link_page,int link_index,unsigned short * buffer,int buflen)247 DLLEXPORT int STDCALL FPDFLink_GetURL(FPDF_PAGELINK link_page,
248 int link_index,
249 unsigned short* buffer,
250 int buflen) {
251 if (!link_page)
252 return 0;
253 IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page;
254 CFX_WideString url = pageLink->GetURL(link_index);
255
256 CFX_ByteString cbUTF16URL = url.UTF16LE_Encode();
257 int len = cbUTF16URL.GetLength() / sizeof(unsigned short);
258 if (!buffer || buflen <= 0)
259 return len;
260 int size = len < buflen ? len : buflen;
261 if (size > 0) {
262 FXSYS_memcpy(buffer, cbUTF16URL.GetBuffer(size * sizeof(unsigned short)),
263 size * sizeof(unsigned short));
264 cbUTF16URL.ReleaseBuffer(size * sizeof(unsigned short));
265 }
266 return size;
267 }
FPDFLink_CountRects(FPDF_PAGELINK link_page,int link_index)268 DLLEXPORT int STDCALL FPDFLink_CountRects(FPDF_PAGELINK link_page,
269 int link_index) {
270 if (!link_page)
271 return 0;
272 IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page;
273 CFX_RectArray rectArray;
274 pageLink->GetRects(link_index, rectArray);
275 return rectArray.GetSize();
276 }
FPDFLink_GetRect(FPDF_PAGELINK link_page,int link_index,int rect_index,double * left,double * top,double * right,double * bottom)277 DLLEXPORT void STDCALL FPDFLink_GetRect(FPDF_PAGELINK link_page,
278 int link_index,
279 int rect_index,
280 double* left,
281 double* top,
282 double* right,
283 double* bottom) {
284 if (!link_page)
285 return;
286 IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page;
287 CFX_RectArray rectArray;
288 pageLink->GetRects(link_index, rectArray);
289 if (rect_index >= 0 && rect_index < rectArray.GetSize()) {
290 CFX_FloatRect rect = rectArray.GetAt(rect_index);
291 *left = rect.left;
292 *right = rect.right;
293 *top = rect.top;
294 *bottom = rect.bottom;
295 }
296 }
FPDFLink_CloseWebLinks(FPDF_PAGELINK link_page)297 DLLEXPORT void STDCALL FPDFLink_CloseWebLinks(FPDF_PAGELINK link_page) {
298 delete (IPDF_LinkExtract*)link_page;
299 }
300