summaryrefslogtreecommitdiffstats
path: root/external
diff options
context:
space:
mode:
Diffstat (limited to 'external')
-rw-r--r--external/pdfium/0012-svx-import-processed-PDF-text.patch.2148
-rw-r--r--external/pdfium/UnpackedTarball_pdfium.mk1
2 files changed, 149 insertions, 0 deletions
diff --git a/external/pdfium/0012-svx-import-processed-PDF-text.patch.2 b/external/pdfium/0012-svx-import-processed-PDF-text.patch.2
new file mode 100644
index 000000000000..cae9ec808aba
--- /dev/null
+++ b/external/pdfium/0012-svx-import-processed-PDF-text.patch.2
@@ -0,0 +1,148 @@
+From 7e8ecec81f102993e3fe73256415dcf049c09e29 Mon Sep 17 00:00:00 2001
+From: Ashod Nakashian <ashod.nakashian@collabora.co.uk>
+Date: Tue, 5 Jun 2018 11:35:39 +0200
+Subject: [PATCH 12/14] svx: import processed PDF text
+
+---
+ pdfium/core/fpdftext/cpdf_textpage.cpp | 29 ++++++++++++++++++++++++
+ pdfium/core/fpdftext/cpdf_textpage.h | 2 ++
+ pdfium/fpdfsdk/fpdf_editpage.cpp | 41 ++++++++++++++++++++++++++++++++++
+ pdfium/public/fpdf_edit.h | 13 +++++++++++
+ 4 files changed, 85 insertions(+)
+
+diff --git a/pdfium/core/fpdftext/cpdf_textpage.cpp b/pdfium/core/fpdftext/cpdf_textpage.cpp
+index 5690698..4d7c48a 100644
+--- a/pdfium/core/fpdftext/cpdf_textpage.cpp
++++ b/pdfium/core/fpdftext/cpdf_textpage.cpp
+@@ -1464,3 +1464,32 @@ Optional<PAGECHAR_INFO> CPDF_TextPage::GenerateCharInfo(wchar_t unicode) {
+ info.m_Origin.x, info.m_Origin.y);
+ return info;
+ }
++
++WideString CPDF_TextPage::GetTextObjectText(CPDF_TextObject* pTextObj)
++{
++ if (!m_bIsParsed)
++ return WideString();
++
++ float posy = 0;
++ bool IsContainPreChar = false;
++ bool IsAddLineFeed = false;
++ WideString strText;
++ for (const auto& charinfo : m_CharList) {
++ if (charinfo.m_pTextObj == pTextObj) {
++ IsContainPreChar = true;
++ IsAddLineFeed = false;
++ if (charinfo.m_Unicode)
++ strText += charinfo.m_Unicode;
++ } else if (charinfo.m_Unicode == 32) {
++ if (IsContainPreChar && charinfo.m_Unicode) {
++ strText += charinfo.m_Unicode;
++ IsContainPreChar = false;
++ IsAddLineFeed = false;
++ }
++ } else {
++ IsContainPreChar = false;
++ IsAddLineFeed = true;
++ }
++ }
++ return strText;
++}
+diff --git a/pdfium/core/fpdftext/cpdf_textpage.h b/pdfium/core/fpdftext/cpdf_textpage.h
+index 43a0312..7d5d5ec 100644
+--- a/pdfium/core/fpdftext/cpdf_textpage.h
++++ b/pdfium/core/fpdftext/cpdf_textpage.h
+@@ -105,6 +105,8 @@ class CPDF_TextPage {
+ WideString GetPageText(int start, int count) const;
+ WideString GetAllPageText() const { return GetPageText(0, CountChars()); }
+
++ WideString GetTextObjectText(CPDF_TextObject* pTextObj);
++
+ int CountRects(int start, int nCount);
+ bool GetRect(int rectIndex, CFX_FloatRect* pRect) const;
+
+diff --git a/pdfium/fpdfsdk/fpdf_editpage.cpp b/pdfium/fpdfsdk/fpdf_editpage.cpp
+index f4a1688..f34d3b5 100644
+--- a/pdfium/fpdfsdk/fpdf_editpage.cpp
++++ b/pdfium/fpdfsdk/fpdf_editpage.cpp
+@@ -27,6 +27,7 @@
+ #include "core/fpdfapi/parser/cpdf_string.h"
+ #include "core/fpdfdoc/cpdf_annot.h"
+ #include "core/fpdfdoc/cpdf_annotlist.h"
++#include "core/fpdftext/cpdf_textpage.h"
+ #include "fpdfsdk/cpdfsdk_helpers.h"
+ #include "public/fpdf_formfill.h"
+ #include "third_party/base/logging.h"
+@@ -732,6 +733,46 @@ FPDF_EXPORT int FPDF_CALLCONV FPDFTextObj_GetText(FPDF_PAGEOBJECT text_object,
+ return ret_count;
+ }
+
++FPDF_EXPORT int FPDF_CALLCONV
++FPDFTextObj_GetTextProcessed(FPDF_PAGEOBJECT text_object,
++ FPDF_TEXTPAGE page,
++ int char_start,
++ int char_count,
++ unsigned short* result)
++{
++ if (!page || !text_object || char_start < 0 || char_count < 0 || !result)
++ return 0;
++
++ CPDF_TextObject* pTxtObj = CPDFTextObjectFromFPDFPageObject(text_object);
++ CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(page);
++ int char_available = textpage->CountChars() - char_start;
++ if (char_available <= 0)
++ return 0;
++
++ char_count = std::min(char_count, char_available);
++ if (char_count == 0) {
++ // Writing out "", which has a character count of 1 due to the NUL.
++ *result = '\0';
++ return 1;
++ }
++
++ WideString str = textpage->GetTextObjectText(pTxtObj);
++
++ if (str.GetLength() > static_cast<size_t>(char_count))
++ str = str.Left(static_cast<size_t>(char_count));
++
++ // UFT16LE_Encode doesn't handle surrogate pairs properly, so it is expected
++ // the number of items to stay the same.
++ ByteString byte_str = str.UTF16LE_Encode();
++ size_t byte_str_len = byte_str.GetLength();
++ constexpr size_t kBytesPerCharacter = sizeof(unsigned short);
++ int ret_count = byte_str_len / kBytesPerCharacter;
++
++ ASSERT(ret_count <= char_count + 1); // +1 to account for the NUL terminator.
++ memcpy(result, byte_str.GetBuffer(byte_str_len), byte_str_len);
++ return ret_count;
++}
++
+ FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
+ FPDFTextObj_GetColor(FPDF_PAGEOBJECT text_object,
+ unsigned int* R,
+diff --git a/pdfium/public/fpdf_edit.h b/pdfium/public/fpdf_edit.h
+index f249e64..e14b2a5 100644
+--- a/pdfium/public/fpdf_edit.h
++++ b/pdfium/public/fpdf_edit.h
+@@ -1065,6 +1065,19 @@ FPDFTextObj_GetText(FPDF_PAGEOBJECT text_object,
+ int char_count,
+ unsigned short* result);
+
++// Get the processed text of a text object.
++//
++// text_object - Handle of text object returned by FPDFPageObj_NewTextObj
++// or FPDFPageObj_NewTextObjEx.
++// Return Value:
++// The number of characters (not bytes) written in result.
++FPDF_EXPORT int FPDF_CALLCONV
++FPDFTextObj_GetTextProcessed(FPDF_PAGEOBJECT text_object,
++ FPDF_TEXTPAGE page,
++ int char_start,
++ int char_count,
++ unsigned short* result);
++
+ // Get the stroke RGBA of a text. Range of values: 0 - 255.
+ //
+ // path - the handle to the path object.
+--
+2.16.3
+
diff --git a/external/pdfium/UnpackedTarball_pdfium.mk b/external/pdfium/UnpackedTarball_pdfium.mk
index 6880ac0b670b..5525e9ddf65d 100644
--- a/external/pdfium/UnpackedTarball_pdfium.mk
+++ b/external/pdfium/UnpackedTarball_pdfium.mk
@@ -25,6 +25,7 @@ pdfium_patches += 0008-svx-correct-the-positioning-of-PDF-Paths-and-the-str.patc
pdfium_patches += 0009-svx-support-color-text-for-imported-PDFs.patch.2
pdfium_patches += 0010-svx-support-importing-forms-from-PDFs.patch.2
pdfium_patches += 0011-svx-correctly-possition-form-objects-from-PDF.patch.2
+pdfium_patches += 0012-svx-import-processed-PDF-text.patch.2
$(eval $(call gb_UnpackedTarball_UnpackedTarball,pdfium))