1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #ifndef CORE_INCLUDE_FPDFTEXT_FPDF_TEXT_H_
8 #define CORE_INCLUDE_FPDFTEXT_FPDF_TEXT_H_
9 
10 #include "../fpdfapi/fpdf_page.h"
11 #include "../fpdfapi/fpdf_pageobj.h"
12 #include "../fpdfapi/fpdf_parser.h"
13 
14 class CPDF_PageObjects;
15 class IPDF_LinkExtract;
16 class IPDF_ReflowedPage;
17 class IPDF_TextPage;
18 class IPDF_TextPageFind;
19 
20 #define PDF2TXT_AUTO_ROTATE		1
21 #define PDF2TXT_AUTO_WIDTH		2
22 #define PDF2TXT_KEEP_COLUMN		4
23 #define PDF2TXT_USE_OCR			8
24 #define PDF2TXT_INCLUDE_INVISIBLE	16
25 void PDF_GetPageText(CFX_ByteStringArray& lines, CPDF_Document* pDoc, CPDF_Dictionary* pPage,
26                      int iMinWidth, FX_DWORD flags);
27 void PDF_GetPageText_Unicode(CFX_WideStringArray& lines, CPDF_Document* pDoc, CPDF_Dictionary* pPage,
28                              int iMinWidth, FX_DWORD flags);
29 void PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, CPDF_Document* pDoc, CPDF_Dictionary* pPage,
30                                FX_DWORD flags);
31 CFX_WideString PDF_GetFirstTextLine_Unicode(CPDF_Document* pDoc, CPDF_Dictionary* pPage);
32 #define CHAR_ERROR			-1
33 #define CHAR_NORMAL			0
34 #define CHAR_GENERATED		1
35 #define CHAR_UNUNICODE		2
36 typedef struct {
37     FX_WCHAR			m_Unicode;
38     FX_WCHAR			m_Charcode;
39     FX_INT32			m_Flag;
40     FX_FLOAT			m_FontSize;
41     FX_FLOAT			m_OriginX;
42     FX_FLOAT			m_OriginY;
43     CFX_FloatRect		m_CharBox;
44     CPDF_TextObject*	m_pTextObj;
45     CFX_AffineMatrix	m_Matrix;
46 } FPDF_CHAR_INFO;
47 typedef	CFX_ArrayTemplate<CFX_FloatRect> CFX_RectArray;
48 #define FPDFTEXT_LRTB	0
49 #define FPDFTEXT_RLTB	1
50 #define FPDFTEXT_TBRL	2
51 #define FPDFTEXT_LEFT			-1
52 #define FPDFTEXT_RIGHT			1
53 #define FPDFTEXT_UP				-2
54 #define FPDFTEXT_DOWN			2
55 #define FPDFTEXT_WRITINGMODE_UNKNOW	0
56 #define FPDFTEXT_WRITINGMODE_LRTB	1
57 #define FPDFTEXT_WRITINGMODE_RLTB	2
58 #define FPDFTEXT_WRITINGMODE_TBRL	3
59 class CPDFText_ParseOptions
60 {
61 public:
62 
63     CPDFText_ParseOptions();
64     FX_BOOL			m_bGetCharCodeOnly;
65     FX_BOOL			m_bNormalizeObjs;
66     FX_BOOL			m_bOutputHyphen;
67 };
68 class IPDF_TextPage
69 {
70 public:
71 
~IPDF_TextPage()72     virtual ~IPDF_TextPage() {}
73     static IPDF_TextPage*	CreateTextPage(const CPDF_Page* pPage, CPDFText_ParseOptions ParserOptions);
74     static IPDF_TextPage*	CreateTextPage(const CPDF_Page* pPage, int flags = 0);
75     static IPDF_TextPage*	CreateTextPage(const CPDF_PageObjects* pObjs, int flags = 0);
76     static IPDF_TextPage*	CreateReflowTextPage(IPDF_ReflowedPage* pRefPage);
77 
78     virtual void			NormalizeObjects(FX_BOOL bNormalize) = 0;
79 
80     virtual FX_BOOL			ParseTextPage() = 0;
81 
82 
83     virtual FX_BOOL			IsParsered() const = 0;
84 public:
85 
86     virtual int CharIndexFromTextIndex(int TextIndex) const = 0;
87 
88     virtual int TextIndexFromCharIndex(int CharIndex) const = 0;
89 
90 
91     virtual int				CountChars() const = 0;
92 
93     virtual	void			GetCharInfo(int index, FPDF_CHAR_INFO & info) const = 0;
94 
95     virtual void			GetRectArray(int start, int nCount, CFX_RectArray& rectArray) const = 0;
96 
97 
98 
99     virtual int				GetIndexAtPos(CPDF_Point point, FX_FLOAT xTorelance, FX_FLOAT yTorelance) const = 0;
100 
101     virtual int				GetIndexAtPos(FX_FLOAT x, FX_FLOAT y, FX_FLOAT xTorelance, FX_FLOAT yTorelance) const = 0;
102 
103     virtual	int				GetOrderByDirection(int index, int direction) const = 0;
104 
105     virtual CFX_WideString	GetTextByRect(const CFX_FloatRect& rect) const = 0;
106 
107     virtual void			GetRectsArrayByRect(const CFX_FloatRect& rect, CFX_RectArray& resRectArray) const = 0;
108 
109 
110     virtual int				CountRects(int start, int nCount) = 0;
111 
112     virtual	void			GetRect(int rectIndex, FX_FLOAT& left, FX_FLOAT& top, FX_FLOAT& right, FX_FLOAT &bottom) const = 0;
113 
114     virtual FX_BOOL			GetBaselineRotate(int rectIndex, int& Rotate) = 0;
115 
116     virtual FX_BOOL			GetBaselineRotate(const CFX_FloatRect& rect, int& Rotate) = 0;
117 
118     virtual	int				CountBoundedSegments(FX_FLOAT left, FX_FLOAT top, FX_FLOAT right, FX_FLOAT bottom, FX_BOOL bContains = FALSE) = 0;
119 
120     virtual	void			GetBoundedSegment(int index, int& start, int& count) const = 0;
121 
122 
123     virtual int				GetWordBreak(int index, int direction) const = 0;
124 
125     virtual CFX_WideString	GetPageText(int start = 0, int nCount = -1 ) const = 0;
126 };
127 #define FPDFTEXT_MATCHCASE      0x00000001
128 #define FPDFTEXT_MATCHWHOLEWORD 0x00000002
129 #define FPDFTEXT_CONSECUTIVE	0x00000004
130 class IPDF_TextPageFind
131 {
132 public:
133 
~IPDF_TextPageFind()134     virtual	~IPDF_TextPageFind() {}
135 
136     static	IPDF_TextPageFind*	CreatePageFind(const IPDF_TextPage* pTextPage);
137 public:
138 
139     virtual	FX_BOOL				FindFirst(const CFX_WideString& findwhat, int flags, int startPos = 0) = 0;
140 
141     virtual	FX_BOOL				FindNext() = 0;
142 
143     virtual	FX_BOOL				FindPrev() = 0;
144 
145     virtual void				GetRectArray(CFX_RectArray& rects) const = 0;
146 
147     virtual int					GetCurOrder() const = 0;
148 
149     virtual int					GetMatchedCount() const = 0;
150 };
151 class IPDF_LinkExtract
152 {
153 public:
154 
~IPDF_LinkExtract()155     virtual	~IPDF_LinkExtract() {}
156 
157     static	IPDF_LinkExtract*	CreateLinkExtract();
158 
159     virtual FX_BOOL				ExtractLinks(const IPDF_TextPage* pTextPage) = 0;
160 public:
161 
162     virtual int					CountLinks() const = 0;
163 
164     virtual CFX_WideString		GetURL(int index) const = 0;
165 
166     virtual	void				GetBoundedSegment(int index, int& start, int& count) const = 0;
167 
168     virtual void				GetRects(int index, CFX_RectArray& rects) const = 0;
169 };
170 
171 #endif  // CORE_INCLUDE_FPDFTEXT_FPDF_TEXT_H_
172