diff options
Diffstat (limited to 'external/pdfium/0012-svx-import-processed-PDF-text.patch.2')
-rw-r--r-- | external/pdfium/0012-svx-import-processed-PDF-text.patch.2 | 148 |
1 files changed, 148 insertions, 0 deletions
diff --git a/external/pdfium/0012-svx-import-processed-PDF-text.patch.2 b/external/pdfium/0012-svx-import-processed-PDF-text.patch.2 new file mode 100644 index 000000000000..cae9ec808aba --- /dev/null +++ b/external/pdfium/0012-svx-import-processed-PDF-text.patch.2 @@ -0,0 +1,148 @@ +From 7e8ecec81f102993e3fe73256415dcf049c09e29 Mon Sep 17 00:00:00 2001 +From: Ashod Nakashian <ashod.nakashian@collabora.co.uk> +Date: Tue, 5 Jun 2018 11:35:39 +0200 +Subject: [PATCH 12/14] svx: import processed PDF text + +--- + pdfium/core/fpdftext/cpdf_textpage.cpp | 29 ++++++++++++++++++++++++ + pdfium/core/fpdftext/cpdf_textpage.h | 2 ++ + pdfium/fpdfsdk/fpdf_editpage.cpp | 41 ++++++++++++++++++++++++++++++++++ + pdfium/public/fpdf_edit.h | 13 +++++++++++ + 4 files changed, 85 insertions(+) + +diff --git a/pdfium/core/fpdftext/cpdf_textpage.cpp b/pdfium/core/fpdftext/cpdf_textpage.cpp +index 5690698..4d7c48a 100644 +--- a/pdfium/core/fpdftext/cpdf_textpage.cpp ++++ b/pdfium/core/fpdftext/cpdf_textpage.cpp +@@ -1464,3 +1464,32 @@ Optional<PAGECHAR_INFO> CPDF_TextPage::GenerateCharInfo(wchar_t unicode) { + info.m_Origin.x, info.m_Origin.y); + return info; + } ++ ++WideString CPDF_TextPage::GetTextObjectText(CPDF_TextObject* pTextObj) ++{ ++ if (!m_bIsParsed) ++ return WideString(); ++ ++ float posy = 0; ++ bool IsContainPreChar = false; ++ bool IsAddLineFeed = false; ++ WideString strText; ++ for (const auto& charinfo : m_CharList) { ++ if (charinfo.m_pTextObj == pTextObj) { ++ IsContainPreChar = true; ++ IsAddLineFeed = false; ++ if (charinfo.m_Unicode) ++ strText += charinfo.m_Unicode; ++ } else if (charinfo.m_Unicode == 32) { ++ if (IsContainPreChar && charinfo.m_Unicode) { ++ strText += charinfo.m_Unicode; ++ IsContainPreChar = false; ++ IsAddLineFeed = false; ++ } ++ } else { ++ IsContainPreChar = false; ++ IsAddLineFeed = true; ++ } ++ } ++ return strText; ++} +diff --git a/pdfium/core/fpdftext/cpdf_textpage.h b/pdfium/core/fpdftext/cpdf_textpage.h +index 43a0312..7d5d5ec 100644 +--- a/pdfium/core/fpdftext/cpdf_textpage.h ++++ b/pdfium/core/fpdftext/cpdf_textpage.h +@@ -105,6 +105,8 @@ class CPDF_TextPage { + WideString GetPageText(int start, int count) const; + WideString GetAllPageText() const { return GetPageText(0, CountChars()); } + ++ WideString GetTextObjectText(CPDF_TextObject* pTextObj); ++ + int CountRects(int start, int nCount); + bool GetRect(int rectIndex, CFX_FloatRect* pRect) const; + +diff --git a/pdfium/fpdfsdk/fpdf_editpage.cpp b/pdfium/fpdfsdk/fpdf_editpage.cpp +index f4a1688..f34d3b5 100644 +--- a/pdfium/fpdfsdk/fpdf_editpage.cpp ++++ b/pdfium/fpdfsdk/fpdf_editpage.cpp +@@ -27,6 +27,7 @@ + #include "core/fpdfapi/parser/cpdf_string.h" + #include "core/fpdfdoc/cpdf_annot.h" + #include "core/fpdfdoc/cpdf_annotlist.h" ++#include "core/fpdftext/cpdf_textpage.h" + #include "fpdfsdk/cpdfsdk_helpers.h" + #include "public/fpdf_formfill.h" + #include "third_party/base/logging.h" +@@ -732,6 +733,46 @@ FPDF_EXPORT int FPDF_CALLCONV FPDFTextObj_GetText(FPDF_PAGEOBJECT text_object, + return ret_count; + } + ++FPDF_EXPORT int FPDF_CALLCONV ++FPDFTextObj_GetTextProcessed(FPDF_PAGEOBJECT text_object, ++ FPDF_TEXTPAGE page, ++ int char_start, ++ int char_count, ++ unsigned short* result) ++{ ++ if (!page || !text_object || char_start < 0 || char_count < 0 || !result) ++ return 0; ++ ++ CPDF_TextObject* pTxtObj = CPDFTextObjectFromFPDFPageObject(text_object); ++ CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(page); ++ int char_available = textpage->CountChars() - char_start; ++ if (char_available <= 0) ++ return 0; ++ ++ char_count = std::min(char_count, char_available); ++ if (char_count == 0) { ++ // Writing out "", which has a character count of 1 due to the NUL. ++ *result = '\0'; ++ return 1; ++ } ++ ++ WideString str = textpage->GetTextObjectText(pTxtObj); ++ ++ if (str.GetLength() > static_cast<size_t>(char_count)) ++ str = str.Left(static_cast<size_t>(char_count)); ++ ++ // UFT16LE_Encode doesn't handle surrogate pairs properly, so it is expected ++ // the number of items to stay the same. ++ ByteString byte_str = str.UTF16LE_Encode(); ++ size_t byte_str_len = byte_str.GetLength(); ++ constexpr size_t kBytesPerCharacter = sizeof(unsigned short); ++ int ret_count = byte_str_len / kBytesPerCharacter; ++ ++ ASSERT(ret_count <= char_count + 1); // +1 to account for the NUL terminator. ++ memcpy(result, byte_str.GetBuffer(byte_str_len), byte_str_len); ++ return ret_count; ++} ++ + FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV + FPDFTextObj_GetColor(FPDF_PAGEOBJECT text_object, + unsigned int* R, +diff --git a/pdfium/public/fpdf_edit.h b/pdfium/public/fpdf_edit.h +index f249e64..e14b2a5 100644 +--- a/pdfium/public/fpdf_edit.h ++++ b/pdfium/public/fpdf_edit.h +@@ -1065,6 +1065,19 @@ FPDFTextObj_GetText(FPDF_PAGEOBJECT text_object, + int char_count, + unsigned short* result); + ++// Get the processed text of a text object. ++// ++// text_object - Handle of text object returned by FPDFPageObj_NewTextObj ++// or FPDFPageObj_NewTextObjEx. ++// Return Value: ++// The number of characters (not bytes) written in result. ++FPDF_EXPORT int FPDF_CALLCONV ++FPDFTextObj_GetTextProcessed(FPDF_PAGEOBJECT text_object, ++ FPDF_TEXTPAGE page, ++ int char_start, ++ int char_count, ++ unsigned short* result); ++ + // Get the stroke RGBA of a text. Range of values: 0 - 255. + // + // path - the handle to the path object. +-- +2.16.3 + |