// Copyright 2017 PDFium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include #include #include #include #include #include #include "core/fpdfapi/font/cpdf_font.h" #include "core/fpdfapi/font/cpdf_type1font.h" #include "core/fpdfapi/page/cpdf_docpagedata.h" #include "core/fpdfapi/page/cpdf_textobject.h" #include "core/fpdfapi/page/cpdf_textstate.h" #include "core/fpdfapi/parser/cpdf_array.h" #include "core/fpdfapi/parser/cpdf_dictionary.h" #include "core/fpdfapi/parser/cpdf_document.h" #include "core/fpdfapi/parser/cpdf_name.h" #include "core/fpdfapi/parser/cpdf_number.h" #include "core/fpdfapi/parser/cpdf_reference.h" #include "core/fpdfapi/parser/cpdf_stream.h" #include "core/fpdfapi/parser/cpdf_string.h" #include "core/fpdftext/cpdf_textpage.h" #include "core/fxcrt/fx_extension.h" #include "core/fxge/cfx_fontmgr.h" #include "core/fxge/fx_font.h" #include "fpdfsdk/cpdfsdk_helpers.h" #include "public/fpdf_edit.h" #include "third_party/base/ptr_util.h" // These checks are here because core/ and public/ cannot depend on each other. static_assert(static_cast(TextRenderingMode::MODE_UNKNOWN) == FPDF_TEXTRENDERMODE_UNKNOWN, "TextRenderingMode::MODE_UNKNOWN value mismatch"); static_assert(static_cast(TextRenderingMode::MODE_FILL) == FPDF_TEXTRENDERMODE_FILL, "TextRenderingMode::MODE_FILL value mismatch"); static_assert(static_cast(TextRenderingMode::MODE_STROKE) == FPDF_TEXTRENDERMODE_STROKE, "TextRenderingMode::MODE_STROKE value mismatch"); static_assert(static_cast(TextRenderingMode::MODE_FILL_STROKE) == FPDF_TEXTRENDERMODE_FILL_STROKE, "TextRenderingMode::MODE_FILL_STROKE value mismatch"); static_assert(static_cast(TextRenderingMode::MODE_INVISIBLE) == FPDF_TEXTRENDERMODE_INVISIBLE, "TextRenderingMode::MODE_INVISIBLE value mismatch"); static_assert(static_cast(TextRenderingMode::MODE_FILL_CLIP) == FPDF_TEXTRENDERMODE_FILL_CLIP, "TextRenderingMode::MODE_FILL_CLIP value mismatch"); static_assert(static_cast(TextRenderingMode::MODE_STROKE_CLIP) == FPDF_TEXTRENDERMODE_STROKE_CLIP, "TextRenderingMode::MODE_STROKE_CLIP value mismatch"); static_assert(static_cast(TextRenderingMode::MODE_FILL_STROKE_CLIP) == FPDF_TEXTRENDERMODE_FILL_STROKE_CLIP, "TextRenderingMode::MODE_FILL_STROKE_CLIP value mismatch"); static_assert(static_cast(TextRenderingMode::MODE_CLIP) == FPDF_TEXTRENDERMODE_CLIP, "TextRenderingMode::MODE_CLIP value mismatch"); static_assert(static_cast(TextRenderingMode::MODE_LAST) == FPDF_TEXTRENDERMODE_LAST, "TextRenderingMode::MODE_LAST value mismatch"); namespace { CPDF_Dictionary* LoadFontDesc(CPDF_Document* pDoc, const ByteString& font_name, CFX_Font* pFont, pdfium::span span, int font_type) { CPDF_Dictionary* pFontDesc = pDoc->NewIndirect(); pFontDesc->SetNewFor("Type", "FontDescriptor"); pFontDesc->SetNewFor("FontName", font_name); int flags = 0; if (FXFT_Is_Face_fixedwidth(pFont->GetFaceRec())) flags |= FXFONT_FIXED_PITCH; if (font_name.Contains("Serif")) flags |= FXFONT_SERIF; if (FXFT_Is_Face_Italic(pFont->GetFaceRec())) flags |= FXFONT_ITALIC; if (FXFT_Is_Face_Bold(pFont->GetFaceRec())) flags |= FXFONT_FORCE_BOLD; // TODO(npm): How do I know if a font is symbolic, script, allcap, smallcap flags |= FXFONT_NONSYMBOLIC; pFontDesc->SetNewFor("Flags", flags); FX_RECT bbox; pFont->GetBBox(&bbox); pFontDesc->SetRectFor("FontBBox", CFX_FloatRect(bbox)); // TODO(npm): calculate italic angle correctly pFontDesc->SetNewFor("ItalicAngle", pFont->IsItalic() ? -12 : 0); pFontDesc->SetNewFor("Ascent", pFont->GetAscent()); pFontDesc->SetNewFor("Descent", pFont->GetDescent()); // TODO(npm): calculate the capheight, stemV correctly pFontDesc->SetNewFor("CapHeight", pFont->GetAscent()); pFontDesc->SetNewFor("StemV", pFont->IsBold() ? 120 : 70); CPDF_Stream* pStream = pDoc->NewIndirect(); pStream->SetData(span); // TODO(npm): Lengths for Type1 fonts. if (font_type == FPDF_FONT_TRUETYPE) { pStream->GetDict()->SetNewFor("Length1", static_cast(span.size())); } ByteString fontFile = font_type == FPDF_FONT_TYPE1 ? "FontFile" : "FontFile2"; pFontDesc->SetNewFor(fontFile, pDoc, pStream->GetObjNum()); return pFontDesc; } const char ToUnicodeStart[] = "/CIDInit /ProcSet findresource begin\n" "12 dict begin\n" "begincmap\n" "/CIDSystemInfo\n" "<> def\n" "/CMapName /Adobe-Identity-H def\n" "CMapType 2 def\n" "1 begincodespacerange\n" "<0000> \n" "endcodespacerange\n"; const char ToUnicodeEnd[] = "endcmap\n" "CMapName currentdict /CMap defineresource pop\n" "end\n" "end\n"; void AddCharcode(std::ostringstream* pBuffer, uint32_t number) { ASSERT(number <= 0xFFFF); *pBuffer << "<"; char ans[4]; FXSYS_IntToFourHexChars(number, ans); for (size_t i = 0; i < 4; ++i) *pBuffer << ans[i]; *pBuffer << ">"; } // PDF spec 1.7 Section 5.9.2: "Unicode character sequences as expressed in // UTF-16BE encoding." See https://en.wikipedia.org/wiki/UTF-16#Description void AddUnicode(std::ostringstream* pBuffer, uint32_t unicode) { if (unicode >= 0xD800 && unicode <= 0xDFFF) unicode = 0; char ans[8]; *pBuffer << "<"; size_t numChars = FXSYS_ToUTF16BE(unicode, ans); for (size_t i = 0; i < numChars; ++i) *pBuffer << ans[i]; *pBuffer << ">"; } // Loads the charcode to unicode mapping into a stream CPDF_Stream* LoadUnicode(CPDF_Document* pDoc, const std::map& to_unicode) { // A map charcode->unicode std::map char_to_uni; // A map to vector v of unicode characters of size (end // - start + 1). This abbreviates: start->v[0], start+1->v[1], etc. PDF spec // 1.7 Section 5.9.2 says that only the last byte of the unicode may change. std::map, std::vector> map_range_vector; // A map -> unicode // This abbreviates: start->unicode, start+1->unicode+1, etc. // PDF spec 1.7 Section 5.9.2 says that only the last byte of the unicode may // change. std::map, uint32_t> map_range; // Calculate the maps for (auto iter = to_unicode.begin(); iter != to_unicode.end(); ++iter) { uint32_t firstCharcode = iter->first; uint32_t firstUnicode = iter->second; if (std::next(iter) == to_unicode.end() || firstCharcode + 1 != std::next(iter)->first) { char_to_uni[firstCharcode] = firstUnicode; continue; } ++iter; uint32_t curCharcode = iter->first; uint32_t curUnicode = iter->second; if (curCharcode % 256 == 0) { char_to_uni[firstCharcode] = firstUnicode; char_to_uni[curCharcode] = curUnicode; continue; } const size_t maxExtra = 255 - (curCharcode % 256); auto next_it = std::next(iter); if (firstUnicode + 1 != curUnicode) { // Consecutive charcodes mapping to non-consecutive unicodes std::vector unicodes; unicodes.push_back(firstUnicode); unicodes.push_back(curUnicode); for (size_t i = 0; i < maxExtra; ++i) { if (next_it == to_unicode.end() || curCharcode + 1 != next_it->first) break; ++iter; ++curCharcode; unicodes.push_back(iter->second); next_it = std::next(iter); } ASSERT(iter->first - firstCharcode + 1 == unicodes.size()); map_range_vector[std::make_pair(firstCharcode, iter->first)] = unicodes; continue; } // Consecutive charcodes mapping to consecutive unicodes for (size_t i = 0; i < maxExtra; ++i) { if (next_it == to_unicode.end() || curCharcode + 1 != next_it->first || curUnicode + 1 != next_it->second) { break; } ++iter; ++curCharcode; ++curUnicode; next_it = std::next(iter); } map_range[std::make_pair(firstCharcode, curCharcode)] = firstUnicode; } std::ostringstream buffer; buffer << ToUnicodeStart; // Add maps to buffer buffer << static_cast(char_to_uni.size()) << " beginbfchar\n"; for (const auto& iter : char_to_uni) { AddCharcode(&buffer, iter.first); buffer << " "; AddUnicode(&buffer, iter.second); buffer << "\n"; } buffer << "endbfchar\n" << static_cast(map_range_vector.size() + map_range.size()) << " beginbfrange\n"; for (const auto& iter : map_range_vector) { const std::pair& charcodeRange = iter.first; AddCharcode(&buffer, charcodeRange.first); buffer << " "; AddCharcode(&buffer, charcodeRange.second); buffer << " ["; const std::vector& unicodes = iter.second; for (size_t i = 0; i < unicodes.size(); ++i) { uint32_t uni = unicodes[i]; AddUnicode(&buffer, uni); if (i != unicodes.size() - 1) buffer << " "; } buffer << "]\n"; } for (const auto& iter : map_range) { const std::pair& charcodeRange = iter.first; AddCharcode(&buffer, charcodeRange.first); buffer << " "; AddCharcode(&buffer, charcodeRange.second); buffer << " "; AddUnicode(&buffer, iter.second); buffer << "\n"; } buffer << "endbfrange\n"; buffer << ToUnicodeEnd; // TODO(npm): Encrypt / Compress? CPDF_Stream* stream = pDoc->NewIndirect(); stream->SetDataFromStringstream(&buffer); return stream; } RetainPtr LoadSimpleFont(CPDF_Document* pDoc, std::unique_ptr pFont, pdfium::span span, int font_type) { CPDF_Dictionary* pFontDict = pDoc->NewIndirect(); pFontDict->SetNewFor("Type", "Font"); pFontDict->SetNewFor( "Subtype", font_type == FPDF_FONT_TYPE1 ? "Type1" : "TrueType"); ByteString name = pFont->GetBaseFontName(font_type == FPDF_FONT_TYPE1); if (name.IsEmpty()) name = CFX_Font::kUntitledFontName; pFontDict->SetNewFor("BaseFont", name); uint32_t dwGlyphIndex; uint32_t dwCurrentChar = FT_Get_First_Char(pFont->GetFaceRec(), &dwGlyphIndex); static constexpr uint32_t kMaxSimpleFontChar = 0xFF; if (dwCurrentChar > kMaxSimpleFontChar || dwGlyphIndex == 0) return nullptr; pFontDict->SetNewFor("FirstChar", static_cast(dwCurrentChar)); CPDF_Array* widthsArray = pDoc->NewIndirect(); while (true) { uint32_t width = std::min(pFont->GetGlyphWidth(dwGlyphIndex), static_cast(std::numeric_limits::max())); widthsArray->AddNew(static_cast(width)); uint32_t nextChar = FT_Get_Next_Char(pFont->GetFaceRec(), dwCurrentChar, &dwGlyphIndex); // Simple fonts have 1-byte charcodes only. if (nextChar > kMaxSimpleFontChar || dwGlyphIndex == 0) break; for (uint32_t i = dwCurrentChar + 1; i < nextChar; i++) widthsArray->AddNew(0); dwCurrentChar = nextChar; } pFontDict->SetNewFor("LastChar", static_cast(dwCurrentChar)); pFontDict->SetNewFor("Widths", pDoc, widthsArray->GetObjNum()); CPDF_Dictionary* pFontDesc = LoadFontDesc(pDoc, name, pFont.get(), span, font_type); pFontDict->SetNewFor("FontDescriptor", pDoc, pFontDesc->GetObjNum()); return CPDF_DocPageData::FromDocument(pDoc)->GetFont(pFontDict); } RetainPtr LoadCompositeFont(CPDF_Document* pDoc, std::unique_ptr pFont, pdfium::span span, int font_type) { CPDF_Dictionary* pFontDict = pDoc->NewIndirect(); pFontDict->SetNewFor("Type", "Font"); pFontDict->SetNewFor("Subtype", "Type0"); // TODO(npm): Get the correct encoding, if it's not identity. ByteString encoding = "Identity-H"; pFontDict->SetNewFor("Encoding", encoding); ByteString name = pFont->GetBaseFontName(font_type == FPDF_FONT_TYPE1); if (name.IsEmpty()) name = CFX_Font::kUntitledFontName; pFontDict->SetNewFor( "BaseFont", font_type == FPDF_FONT_TYPE1 ? name + "-" + encoding : name); CPDF_Dictionary* pCIDFont = pDoc->NewIndirect(); pCIDFont->SetNewFor("Type", "Font"); pCIDFont->SetNewFor("Subtype", font_type == FPDF_FONT_TYPE1 ? "CIDFontType0" : "CIDFontType2"); pCIDFont->SetNewFor("BaseFont", name); // TODO(npm): Maybe use FT_Get_CID_Registry_Ordering_Supplement to get the // CIDSystemInfo CPDF_Dictionary* pCIDSystemInfo = pDoc->NewIndirect(); pCIDSystemInfo->SetNewFor("Registry", "Adobe", false); pCIDSystemInfo->SetNewFor("Ordering", "Identity", false); pCIDSystemInfo->SetNewFor("Supplement", 0); pCIDFont->SetNewFor("CIDSystemInfo", pDoc, pCIDSystemInfo->GetObjNum()); CPDF_Dictionary* pFontDesc = LoadFontDesc(pDoc, name, pFont.get(), span, font_type); pCIDFont->SetNewFor("FontDescriptor", pDoc, pFontDesc->GetObjNum()); uint32_t dwGlyphIndex; uint32_t dwCurrentChar = FT_Get_First_Char(pFont->GetFaceRec(), &dwGlyphIndex); static constexpr uint32_t kMaxUnicode = 0x10FFFF; // If it doesn't have a single char, just fail if (dwGlyphIndex == 0 || dwCurrentChar > kMaxUnicode) return nullptr; std::map to_unicode; std::map widths; while (true) { if (dwCurrentChar > kMaxUnicode) break; if (!pdfium::ContainsKey(widths, dwGlyphIndex)) widths[dwGlyphIndex] = pFont->GetGlyphWidth(dwGlyphIndex); to_unicode[dwGlyphIndex] = dwCurrentChar; dwCurrentChar = FT_Get_Next_Char(pFont->GetFaceRec(), dwCurrentChar, &dwGlyphIndex); if (dwGlyphIndex == 0) break; } CPDF_Array* widthsArray = pDoc->NewIndirect(); for (auto it = widths.begin(); it != widths.end(); ++it) { int ch = it->first; int w = it->second; if (std::next(it) == widths.end()) { // Only one char left, use format c [w] auto oneW = pdfium::MakeRetain(); oneW->AddNew(w); widthsArray->AddNew(ch); widthsArray->Add(oneW); break; } ++it; int next_ch = it->first; int next_w = it->second; if (next_ch == ch + 1 && next_w == w) { // The array can have a group c_first c_last w: all CIDs in the range from // c_first to c_last will have width w widthsArray->AddNew(ch); ch = next_ch; while (true) { auto next_it = std::next(it); if (next_it == widths.end() || next_it->first != it->first + 1 || next_it->second != it->second) { break; } ++it; ch = it->first; } widthsArray->AddNew(ch); widthsArray->AddNew(w); continue; } // Otherwise we can have a group of the form c [w1 w2 ...]: c has width // w1, c+1 has width w2, etc. widthsArray->AddNew(ch); auto curWidthArray = pdfium::MakeRetain(); curWidthArray->AddNew(w); curWidthArray->AddNew(next_w); while (true) { auto next_it = std::next(it); if (next_it == widths.end() || next_it->first != it->first + 1) break; ++it; curWidthArray->AddNew(static_cast(it->second)); } widthsArray->Add(curWidthArray); } pCIDFont->SetNewFor("W", pDoc, widthsArray->GetObjNum()); // TODO(npm): Support vertical writing auto* pDescendant = pFontDict->SetNewFor("DescendantFonts"); pDescendant->AddNew(pDoc, pCIDFont->GetObjNum()); CPDF_Stream* toUnicodeStream = LoadUnicode(pDoc, to_unicode); pFontDict->SetNewFor("ToUnicode", pDoc, toUnicodeStream->GetObjNum()); return CPDF_DocPageData::FromDocument(pDoc)->GetFont(pFontDict); } CPDF_TextObject* CPDFTextObjectFromFPDFPageObject(FPDF_PAGEOBJECT page_object) { auto* obj = CPDFPageObjectFromFPDFPageObject(page_object); return obj ? obj->AsText() : nullptr; } } // namespace FPDF_EXPORT FPDF_PAGEOBJECT FPDF_CALLCONV FPDFPageObj_NewTextObj(FPDF_DOCUMENT document, FPDF_BYTESTRING font, float font_size) { CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document); if (!pDoc) return nullptr; RetainPtr pFont = CPDF_Font::GetStockFont(pDoc, ByteStringView(font)); if (!pFont) return nullptr; auto pTextObj = pdfium::MakeUnique(); pTextObj->m_TextState.SetFont(pFont); pTextObj->m_TextState.SetFontSize(font_size); pTextObj->DefaultStates(); // Caller takes ownership. return FPDFPageObjectFromCPDFPageObject(pTextObj.release()); } FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFText_SetText(FPDF_PAGEOBJECT text_object, FPDF_WIDESTRING text) { CPDF_TextObject* pTextObj = CPDFTextObjectFromFPDFPageObject(text_object); if (!pTextObj) return false; WideString encodedText = WideStringFromFPDFWideString(text); ByteString byteText; for (wchar_t wc : encodedText) { pTextObj->GetFont()->AppendChar( &byteText, pTextObj->GetFont()->CharCodeFromUnicode(wc)); } pTextObj->SetText(byteText); return true; } FPDF_EXPORT FPDF_FONT FPDF_CALLCONV FPDFText_LoadFont(FPDF_DOCUMENT document, const uint8_t* data, uint32_t size, int font_type, FPDF_BOOL cid) { CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document); if (!pDoc || !data || size == 0 || (font_type != FPDF_FONT_TYPE1 && font_type != FPDF_FONT_TRUETYPE)) { return nullptr; } auto span = pdfium::make_span(data, size); auto pFont = pdfium::MakeUnique(); // TODO(npm): Maybe use FT_Get_X11_Font_Format to check format? Otherwise, we // are allowing giving any font that can be loaded on freetype and setting it // as any font type. if (!pFont->LoadEmbedded(span, false)) return nullptr; // Caller takes ownership. return FPDFFontFromCPDFFont( cid ? LoadCompositeFont(pDoc, std::move(pFont), span, font_type).Leak() : LoadSimpleFont(pDoc, std::move(pFont), span, font_type).Leak()); } FPDF_EXPORT FPDF_FONT FPDF_CALLCONV FPDFText_LoadStandardFont(FPDF_DOCUMENT document, FPDF_BYTESTRING font) { CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document); if (!pDoc) return nullptr; // Caller takes ownership. return FPDFFontFromCPDFFont( CPDF_Font::GetStockFont(pDoc, ByteStringView(font)).Leak()); } FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFTextObj_GetMatrix(FPDF_PAGEOBJECT text, FS_MATRIX* matrix) { if (!matrix) return false; CPDF_TextObject* pTextObj = CPDFTextObjectFromFPDFPageObject(text); if (!pTextObj) return false; *matrix = FSMatrixFromCFXMatrix(pTextObj->GetTextMatrix()); return true; } FPDF_EXPORT float FPDF_CALLCONV FPDFTextObj_GetFontSize(FPDF_PAGEOBJECT text) { CPDF_TextObject* pTextObj = CPDFTextObjectFromFPDFPageObject(text); return pTextObj ? pTextObj->GetFontSize() : 0.0f; } FPDF_EXPORT unsigned long FPDF_CALLCONV FPDFTextObj_GetFontName(FPDF_PAGEOBJECT text, void* buffer, unsigned long length) { CPDF_TextObject* pTextObj = CPDFTextObjectFromFPDFPageObject(text); if (!pTextObj) return 0; RetainPtr pPdfFont = pTextObj->GetFont(); CFX_Font* pFont = pPdfFont->GetFont(); ByteString name = pFont->GetFamilyName(); unsigned long dwStringLen = name.GetLength() + 1; if (buffer && length >= dwStringLen) memcpy(buffer, name.c_str(), dwStringLen); return dwStringLen; } FPDF_EXPORT unsigned long FPDF_CALLCONV FPDFTextObj_GetText(FPDF_PAGEOBJECT text_object, FPDF_TEXTPAGE text_page, void* buffer, unsigned long length) { CPDF_TextObject* pTextObj = CPDFTextObjectFromFPDFPageObject(text_object); if (!pTextObj) return 0; CPDF_TextPage* pTextPage = CPDFTextPageFromFPDFTextPage(text_page); if (!pTextPage) return 0; WideString text = pTextPage->GetTextByObject(pTextObj); return Utf16EncodeMaybeCopyAndReturnLength(text, buffer, length); } FPDF_EXPORT void FPDF_CALLCONV FPDFFont_Close(FPDF_FONT font) { // Take back ownership from caller and release. RetainPtr().Unleak(CPDFFontFromFPDFFont(font)); } FPDF_EXPORT FPDF_PAGEOBJECT FPDF_CALLCONV FPDFPageObj_CreateTextObj(FPDF_DOCUMENT document, FPDF_FONT font, float font_size) { CPDF_Document* pDoc = CPDFDocumentFromFPDFDocument(document); CPDF_Font* pFont = CPDFFontFromFPDFFont(font); if (!pDoc || !pFont) return nullptr; auto pTextObj = pdfium::MakeUnique(); pTextObj->m_TextState.SetFont( CPDF_DocPageData::FromDocument(pDoc)->GetFont(pFont->GetFontDict())); pTextObj->m_TextState.SetFontSize(font_size); pTextObj->DefaultStates(); return FPDFPageObjectFromCPDFPageObject(pTextObj.release()); } FPDF_EXPORT FPDF_TEXT_RENDERMODE FPDF_CALLCONV FPDFTextObj_GetTextRenderMode(FPDF_PAGEOBJECT text) { CPDF_TextObject* pTextObj = CPDFTextObjectFromFPDFPageObject(text); if (!pTextObj) return FPDF_TEXTRENDERMODE_UNKNOWN; return static_cast(pTextObj->m_TextState.GetTextMode()); } FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFTextObj_SetTextRenderMode(FPDF_PAGEOBJECT text, FPDF_TEXT_RENDERMODE render_mode) { if (render_mode <= FPDF_TEXTRENDERMODE_UNKNOWN || render_mode > FPDF_TEXTRENDERMODE_LAST) { return false; } CPDF_TextObject* pTextObj = CPDFTextObjectFromFPDFPageObject(text); if (!pTextObj) return false; pTextObj->m_TextState.SetTextMode( static_cast(render_mode)); return true; }