summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDennis Francis <dennis.francis@collabora.com>2022-10-18 16:14:37 +0530
committerXisco Fauli <xiscofauli@libreoffice.org>2022-11-02 11:13:43 +0100
commitc830ca306d55888c3f4222b7247bc0f9e6947bba (patch)
tree02f6cd625357ac1c4bf476e4923140d5ea88732d
parentupdate credits (diff)
downloadcore-c830ca306d55888c3f4222b7247bc0f9e6947bba.tar.gz
core-c830ca306d55888c3f4222b7247bc0f9e6947bba.zip
vcl: re-exporting broken pdfs -> empty pages
Certain pdf documents when loaded in LO_IMPORT_USE_PDFIUM=1 mode even if pdf-version < v1.6 sometimes has missing objects referred by other objects for determing its stream length for instance. As a result parsing fails and results in a pdf with empty pages. A round trip through pdfium and exporting to v1.6 seems to cure the issue. Possibly it does some repairing work to determine the length of the stream in a independent pass through the file. Conflicts: vcl/source/filter/ipdf/pdfread.cxx Change-Id: Id09f67eddab4163ed12a3a3f3a73baf92e2912aa Reviewed-on: https://gerrit.libreoffice.org/c/core/+/141856 Tested-by: Jenkins Reviewed-by: Dennis Francis <dennis.francis@collabora.com> (cherry picked from commit 3f9e8ac6172f5b1dfd2869ee1c6aea4f24d3f480) Reviewed-on: https://gerrit.libreoffice.org/c/core/+/142137 Reviewed-by: Xisco Fauli <xiscofauli@libreoffice.org>
-rw-r--r--include/vcl/filter/pdfdocument.hxx2
-rw-r--r--vcl/Library_vcl.mk1
-rw-r--r--vcl/inc/pdf/ExternalPDFStreams.hxx2
-rw-r--r--vcl/inc/pdf/pdfcompat.hxx42
-rw-r--r--vcl/source/filter/ipdf/pdfcompat.cxx114
-rw-r--r--vcl/source/filter/ipdf/pdfdocument.cxx14
-rw-r--r--vcl/source/filter/ipdf/pdfread.cxx110
7 files changed, 183 insertions, 102 deletions
diff --git a/include/vcl/filter/pdfdocument.hxx b/include/vcl/filter/pdfdocument.hxx
index dd03029227d2..fbe0be89cdc6 100644
--- a/include/vcl/filter/pdfdocument.hxx
+++ b/include/vcl/filter/pdfdocument.hxx
@@ -576,6 +576,8 @@ public:
//@{
/// Read elements from the start of the stream till its end.
bool Read(SvStream& rStream);
+ /// Calls Read() first and if it fails it tries to fixup and then retry.
+ bool ReadWithPossibleFixup(SvStream& rStream);
void SetSignatureLine(std::vector<sal_Int8>&& rSignatureLine);
void SetSignaturePage(size_t nPage);
/// Sign the read document with xCertificate in the edit buffer.
diff --git a/vcl/Library_vcl.mk b/vcl/Library_vcl.mk
index 6910927bfde0..1d18d2325ad8 100644
--- a/vcl/Library_vcl.mk
+++ b/vcl/Library_vcl.mk
@@ -451,6 +451,7 @@ $(eval $(call gb_Library_add_exception_objects,vcl,\
vcl/source/filter/ipict/ipict \
vcl/source/filter/ipsd/ipsd \
vcl/source/filter/ipict/shape \
+ vcl/source/filter/ipdf/pdfcompat \
vcl/source/filter/ipdf/pdfread \
vcl/source/filter/ipdf/pdfdocument \
vcl/source/filter/iras/iras \
diff --git a/vcl/inc/pdf/ExternalPDFStreams.hxx b/vcl/inc/pdf/ExternalPDFStreams.hxx
index 7840217630c8..45b15f7a74bc 100644
--- a/vcl/inc/pdf/ExternalPDFStreams.hxx
+++ b/vcl/inc/pdf/ExternalPDFStreams.hxx
@@ -42,7 +42,7 @@ struct VCL_DLLPUBLIC ExternalPDFStream
aPDFStream.WriteBytes(maDataContainer.getData(), maDataContainer.getSize());
aPDFStream.Seek(0);
auto pPDFDocument = std::make_shared<filter::PDFDocument>();
- if (!pPDFDocument->Read(aPDFStream))
+ if (!pPDFDocument->ReadWithPossibleFixup(aPDFStream))
{
SAL_WARN("vcl.pdfwriter",
"PDFWriterImpl::writeReferenceXObject: reading the PDF document failed");
diff --git a/vcl/inc/pdf/pdfcompat.hxx b/vcl/inc/pdf/pdfcompat.hxx
new file mode 100644
index 000000000000..8f629b3bc8ee
--- /dev/null
+++ b/vcl/inc/pdf/pdfcompat.hxx
@@ -0,0 +1,42 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#pragma once
+
+#include <config_features.h>
+#include <tools/gen.hxx>
+#include <tools/stream.hxx>
+#include <tools/UnitConversion.hxx>
+#include <vcl/graph.hxx>
+
+namespace vcl::pdf
+{
+/// Convert to inch, then assume 96 DPI.
+inline double pointToPixel(const double fPoint, const double fResolutionDPI)
+{
+ return o3tl::convert(fPoint, o3tl::Length::pt, o3tl::Length::in) * fResolutionDPI;
+}
+
+/// Decide if PDF data is old enough to be compatible.
+bool isCompatible(SvStream& rInStream, sal_uInt64 nPos, sal_uInt64 nSize);
+
+/// Converts to highest supported format version (currently 1.6).
+/// Usually used to deal with missing referenced objects in the
+/// source pdf stream.
+bool convertToHighestSupported(SvStream& rInStream, SvStream& rOutStream);
+
+/// Takes care of transparently downgrading the version of the PDF stream in
+/// case it's too new for our PDF export.
+bool getCompatibleStream(SvStream& rInStream, SvStream& rOutStream);
+
+BinaryDataContainer createBinaryDataContainer(SvStream& rStream);
+
+} // end of vcl::filter::ipdf namespace
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/vcl/source/filter/ipdf/pdfcompat.cxx b/vcl/source/filter/ipdf/pdfcompat.cxx
new file mode 100644
index 000000000000..62413e585be9
--- /dev/null
+++ b/vcl/source/filter/ipdf/pdfcompat.cxx
@@ -0,0 +1,114 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <pdf/pdfcompat.hxx>
+
+#include <o3tl/string_view.hxx>
+#include <vcl/filter/PDFiumLibrary.hxx>
+#include <sal/log.hxx>
+
+namespace vcl::pdf
+{
+/// Decide if PDF data is old enough to be compatible.
+bool isCompatible(SvStream& rInStream, sal_uInt64 nPos, sal_uInt64 nSize)
+{
+ if (nSize < 8)
+ return false;
+
+ // %PDF-x.y
+ sal_uInt8 aFirstBytes[8];
+ rInStream.Seek(nPos);
+ sal_uLong nRead = rInStream.ReadBytes(aFirstBytes, 8);
+ if (nRead < 8)
+ return false;
+
+ if (aFirstBytes[0] != '%' || aFirstBytes[1] != 'P' || aFirstBytes[2] != 'D'
+ || aFirstBytes[3] != 'F' || aFirstBytes[4] != '-')
+ return false;
+
+ sal_Int32 nMajor = o3tl::toInt32(std::string_view(reinterpret_cast<char*>(&aFirstBytes[5]), 1));
+ sal_Int32 nMinor = o3tl::toInt32(std::string_view(reinterpret_cast<char*>(&aFirstBytes[7]), 1));
+ return !(nMajor > 1 || (nMajor == 1 && nMinor > 6));
+}
+
+/// Converts to highest supported format version (1.6).
+/// Usually used to deal with missing referenced objects in source
+/// pdf stream.
+bool convertToHighestSupported(SvStream& rInStream, SvStream& rOutStream)
+{
+ sal_uInt64 nPos = STREAM_SEEK_TO_BEGIN;
+ sal_uInt64 nSize = STREAM_SEEK_TO_END;
+ rInStream.Seek(nPos);
+ // Convert to PDF-1.6.
+ auto pPdfium = vcl::pdf::PDFiumLibrary::get();
+ if (!pPdfium)
+ return false;
+
+ // Read input into a buffer.
+ SvMemoryStream aInBuffer;
+ aInBuffer.WriteStream(rInStream, nSize);
+
+ SvMemoryStream aSaved;
+ {
+ // Load the buffer using pdfium.
+ std::unique_ptr<vcl::pdf::PDFiumDocument> pPdfDocument
+ = pPdfium->openDocument(aInBuffer.GetData(), aInBuffer.GetSize(), OString());
+ if (!pPdfDocument)
+ return false;
+
+ // 16 means PDF-1.6.
+ if (!pPdfDocument->saveWithVersion(aSaved, 16))
+ return false;
+ }
+
+ aSaved.Seek(STREAM_SEEK_TO_BEGIN);
+ rOutStream.WriteStream(aSaved);
+
+ return rOutStream.good();
+}
+
+/// Takes care of transparently downgrading the version of the PDF stream in
+/// case it's too new for our PDF export.
+bool getCompatibleStream(SvStream& rInStream, SvStream& rOutStream)
+{
+ sal_uInt64 nPos = STREAM_SEEK_TO_BEGIN;
+ sal_uInt64 nSize = STREAM_SEEK_TO_END;
+ bool bCompatible = isCompatible(rInStream, nPos, nSize);
+ rInStream.Seek(nPos);
+ if (bCompatible)
+ // Not converting.
+ rOutStream.WriteStream(rInStream, nSize);
+ else
+ convertToHighestSupported(rInStream, rOutStream);
+
+ return rOutStream.good();
+}
+
+BinaryDataContainer createBinaryDataContainer(SvStream& rStream)
+{
+ // Save the original PDF stream for later use.
+ SvMemoryStream aMemoryStream;
+ if (!getCompatibleStream(rStream, aMemoryStream))
+ return {};
+
+ const sal_uInt32 nStreamLength = aMemoryStream.TellEnd();
+
+ auto aPdfData = std::make_unique<std::vector<sal_uInt8>>(nStreamLength);
+
+ aMemoryStream.Seek(STREAM_SEEK_TO_BEGIN);
+ aMemoryStream.ReadBytes(aPdfData->data(), aPdfData->size());
+ if (aMemoryStream.GetError())
+ return {};
+
+ return { std::move(aPdfData) };
+}
+
+} // end vcl::filter::ipdf namespace
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/vcl/source/filter/ipdf/pdfdocument.cxx b/vcl/source/filter/ipdf/pdfdocument.cxx
index 493826e38f8f..a93083ce85a8 100644
--- a/vcl/source/filter/ipdf/pdfdocument.cxx
+++ b/vcl/source/filter/ipdf/pdfdocument.cxx
@@ -8,6 +8,8 @@
*/
#include <vcl/filter/pdfdocument.hxx>
+#include <pdf/pdfcompat.hxx>
+#include <config_features.h>
#include <map>
#include <memory>
@@ -1349,6 +1351,18 @@ void PDFDocument::SetIDObject(size_t nID, PDFObjectElement* pObject)
m_aIDObjects[nID] = pObject;
}
+bool PDFDocument::ReadWithPossibleFixup(SvStream& rStream)
+{
+ if (Read(rStream))
+ return true;
+
+ // Read failed, try a roundtrip through pdfium and then retry.
+ rStream.Seek(0);
+ SvMemoryStream aStandardizedStream;
+ vcl::pdf::convertToHighestSupported(rStream, aStandardizedStream);
+ return Read(aStandardizedStream);
+}
+
bool PDFDocument::Read(SvStream& rStream)
{
// Check file magic.
diff --git a/vcl/source/filter/ipdf/pdfread.cxx b/vcl/source/filter/ipdf/pdfread.cxx
index e99682e0c0e4..c6bc4fd5b282 100644
--- a/vcl/source/filter/ipdf/pdfread.cxx
+++ b/vcl/source/filter/ipdf/pdfread.cxx
@@ -8,8 +8,7 @@
*/
#include <vcl/pdfread.hxx>
-
-#include <tools/UnitConversion.hxx>
+#include <pdf/pdfcompat.hxx>
#include <pdf/PdfConfig.hxx>
#include <vcl/graph.hxx>
@@ -23,99 +22,6 @@
using namespace com::sun::star;
-namespace
-{
-/// Convert to inch, then assume 96 DPI.
-inline double pointToPixel(const double fPoint, const double fResolutionDPI)
-{
- return o3tl::convert(fPoint, o3tl::Length::pt, o3tl::Length::in) * fResolutionDPI;
-}
-
-/// Decide if PDF data is old enough to be compatible.
-bool isCompatible(SvStream& rInStream, sal_uInt64 nPos, sal_uInt64 nSize)
-{
- if (nSize < 8)
- return false;
-
- // %PDF-x.y
- sal_uInt8 aFirstBytes[8];
- rInStream.Seek(nPos);
- sal_uLong nRead = rInStream.ReadBytes(aFirstBytes, 8);
- if (nRead < 8)
- return false;
-
- if (aFirstBytes[0] != '%' || aFirstBytes[1] != 'P' || aFirstBytes[2] != 'D'
- || aFirstBytes[3] != 'F' || aFirstBytes[4] != '-')
- return false;
-
- sal_Int32 nMajor = o3tl::toInt32(std::string_view(reinterpret_cast<char*>(&aFirstBytes[5]), 1));
- sal_Int32 nMinor = o3tl::toInt32(std::string_view(reinterpret_cast<char*>(&aFirstBytes[7]), 1));
- return !(nMajor > 1 || (nMajor == 1 && nMinor > 6));
-}
-
-/// Takes care of transparently downgrading the version of the PDF stream in
-/// case it's too new for our PDF export.
-bool getCompatibleStream(SvStream& rInStream, SvStream& rOutStream)
-{
- sal_uInt64 nPos = STREAM_SEEK_TO_BEGIN;
- sal_uInt64 nSize = STREAM_SEEK_TO_END;
- bool bCompatible = isCompatible(rInStream, nPos, nSize);
- rInStream.Seek(nPos);
- if (bCompatible)
- // Not converting.
- rOutStream.WriteStream(rInStream, nSize);
- else
- {
- // Downconvert to PDF-1.6.
- auto pPdfium = vcl::pdf::PDFiumLibrary::get();
- if (!pPdfium)
- return false;
-
- // Read input into a buffer.
- SvMemoryStream aInBuffer;
- aInBuffer.WriteStream(rInStream, nSize);
-
- SvMemoryStream aSaved;
- {
- // Load the buffer using pdfium.
- std::unique_ptr<vcl::pdf::PDFiumDocument> pPdfDocument
- = pPdfium->openDocument(aInBuffer.GetData(), aInBuffer.GetSize(), OString());
- if (!pPdfDocument)
- return false;
-
- // 16 means PDF-1.6.
- if (!pPdfDocument->saveWithVersion(aSaved, 16))
- return false;
- }
-
- aSaved.Seek(STREAM_SEEK_TO_BEGIN);
- rOutStream.WriteStream(aSaved);
- }
-
- return rOutStream.good();
-}
-
-BinaryDataContainer createBinaryDataContainer(SvStream& rStream)
-{
- // Save the original PDF stream for later use.
- SvMemoryStream aMemoryStream;
- if (!getCompatibleStream(rStream, aMemoryStream))
- return {};
-
- const sal_uInt32 nStreamLength = aMemoryStream.TellEnd();
-
- auto aPdfData = std::make_unique<std::vector<sal_uInt8>>(nStreamLength);
-
- aMemoryStream.Seek(STREAM_SEEK_TO_BEGIN);
- aMemoryStream.ReadBytes(aPdfData->data(), aPdfData->size());
- if (aMemoryStream.GetError())
- return {};
-
- return { std::move(aPdfData) };
-}
-
-} // end anonymous namespace
-
namespace vcl
{
size_t RenderPDFBitmaps(const void* pBuffer, int nSize, std::vector<BitmapEx>& rBitmaps,
@@ -160,10 +66,12 @@ size_t RenderPDFBitmaps(const void* pBuffer, int nSize, std::vector<BitmapEx>& r
// Returned unit is points, convert that to pixel.
- const size_t nPageWidth = std::round(pointToPixel(nPageWidthPoints, fResolutionDPI)
- * PDF_INSERT_MAGIC_SCALE_FACTOR);
- const size_t nPageHeight = std::round(pointToPixel(nPageHeightPoints, fResolutionDPI)
- * PDF_INSERT_MAGIC_SCALE_FACTOR);
+ const size_t nPageWidth
+ = std::round(vcl::pdf::pointToPixel(nPageWidthPoints, fResolutionDPI)
+ * PDF_INSERT_MAGIC_SCALE_FACTOR);
+ const size_t nPageHeight
+ = std::round(vcl::pdf::pointToPixel(nPageHeightPoints, fResolutionDPI)
+ * PDF_INSERT_MAGIC_SCALE_FACTOR);
std::unique_ptr<vcl::pdf::PDFiumBitmap> pPdfBitmap
= pPdfium->createBitmap(nPageWidth, nPageHeight, /*nAlpha=*/1);
if (!pPdfBitmap)
@@ -222,7 +130,7 @@ size_t RenderPDFBitmaps(const void* pBuffer, int nSize, std::vector<BitmapEx>& r
bool importPdfVectorGraphicData(SvStream& rStream,
std::shared_ptr<VectorGraphicData>& rVectorGraphicData)
{
- BinaryDataContainer aDataContainer = createBinaryDataContainer(rStream);
+ BinaryDataContainer aDataContainer = vcl::pdf::createBinaryDataContainer(rStream);
if (aDataContainer.isEmpty())
{
SAL_WARN("vcl.filter", "ImportPDF: empty PDF data array");
@@ -433,7 +341,7 @@ size_t ImportPDFUnloaded(const OUString& rURL, std::vector<PDFGraphicResult>& rG
::utl::UcbStreamHelper::CreateStream(rURL, StreamMode::READ | StreamMode::SHARE_DENYNONE));
// Save the original PDF stream for later use.
- BinaryDataContainer aDataContainer = createBinaryDataContainer(*xStream);
+ BinaryDataContainer aDataContainer = vcl::pdf::createBinaryDataContainer(*xStream);
if (aDataContainer.isEmpty())
return 0;