summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDennis Francis <dennis.francis@collabora.com>2022-10-18 16:14:37 +0530
committerMiklos Vajna <vmiklos@collabora.com>2022-10-25 15:57:57 +0200
commit7b5e440dc7b7b710d695a00c51c3b061bc5729ba (patch)
treea863b4e611aff501344b820c01cf0039f2587064
parentsw: fix crash in SwViewShellImp::AddPendingLOKInvalidation() (diff)
downloadcore-7b5e440dc7b7b710d695a00c51c3b061bc5729ba.tar.gz
core-7b5e440dc7b7b710d695a00c51c3b061bc5729ba.zip
vcl: re-exporting broken pdfs -> empty pages
Certain pdf documents when loaded in LO_IMPORT_USE_PDFIUM=1 mode even if pdf-version < v1.6 sometimes has missing objects referred by other objects for determing its stream length for instance. As a result parsing fails and results in a pdf with empty pages. A round trip through pdfium and exporting to v1.6 seems to cure the issue. Possibly it does some repairing work to determine the length of the stream in a independent pass through the file. Change-Id: Id09f67eddab4163ed12a3a3f3a73baf92e2912aa Reviewed-on: https://gerrit.libreoffice.org/c/core/+/141495 Tested-by: Jenkins CollaboraOffice <jenkinscollaboraoffice@gmail.com> Reviewed-by: Miklos Vajna <vmiklos@collabora.com>
-rw-r--r--include/vcl/filter/pdfdocument.hxx2
-rw-r--r--vcl/Library_vcl.mk1
-rw-r--r--vcl/inc/pdf/ExternalPDFStreams.hxx2
-rw-r--r--vcl/inc/pdf/pdfcompat.hxx45
-rw-r--r--vcl/source/filter/ipdf/pdfcompat.cxx129
-rw-r--r--vcl/source/filter/ipdf/pdfdocument.cxx19
-rw-r--r--vcl/source/filter/ipdf/pdfread.cxx112
7 files changed, 202 insertions, 108 deletions
diff --git a/include/vcl/filter/pdfdocument.hxx b/include/vcl/filter/pdfdocument.hxx
index 7f7cc8dfb641..6ab6adc2468a 100644
--- a/include/vcl/filter/pdfdocument.hxx
+++ b/include/vcl/filter/pdfdocument.hxx
@@ -575,6 +575,8 @@ public:
//@{
/// Read elements from the start of the stream till its end.
bool Read(SvStream& rStream);
+ /// Calls Read() first and if it fails it tries to fixup and then retry.
+ bool ReadWithPossibleFixup(SvStream& rStream);
void SetSignatureLine(const std::vector<sal_Int8>& rSignatureLine);
void SetSignaturePage(size_t nPage);
/// Sign the read document with xCertificate in the edit buffer.
diff --git a/vcl/Library_vcl.mk b/vcl/Library_vcl.mk
index 9c6e7220330e..8c19497ff6cd 100644
--- a/vcl/Library_vcl.mk
+++ b/vcl/Library_vcl.mk
@@ -428,6 +428,7 @@ $(eval $(call gb_Library_add_exception_objects,vcl,\
vcl/source/filter/GraphicFormatDetector \
vcl/source/filter/igif/decode \
vcl/source/filter/igif/gifread \
+ vcl/source/filter/ipdf/pdfcompat \
vcl/source/filter/ipdf/pdfread \
vcl/source/filter/ipdf/pdfdocument \
vcl/source/filter/ixbm/xbmread \
diff --git a/vcl/inc/pdf/ExternalPDFStreams.hxx b/vcl/inc/pdf/ExternalPDFStreams.hxx
index 0a1997fe7dc7..71448910ac0f 100644
--- a/vcl/inc/pdf/ExternalPDFStreams.hxx
+++ b/vcl/inc/pdf/ExternalPDFStreams.hxx
@@ -41,7 +41,7 @@ struct VCL_DLLPUBLIC ExternalPDFStream
aPDFStream.WriteBytes(maData.data(), maData.size());
aPDFStream.Seek(0);
auto pPDFDocument = std::make_shared<filter::PDFDocument>();
- if (!pPDFDocument->Read(aPDFStream))
+ if (!pPDFDocument->ReadWithPossibleFixup(aPDFStream))
{
SAL_WARN("vcl.pdfwriter",
"PDFWriterImpl::writeReferenceXObject: reading the PDF document failed");
diff --git a/vcl/inc/pdf/pdfcompat.hxx b/vcl/inc/pdf/pdfcompat.hxx
new file mode 100644
index 000000000000..29de3901a436
--- /dev/null
+++ b/vcl/inc/pdf/pdfcompat.hxx
@@ -0,0 +1,45 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#pragma once
+
+#include <config_features.h>
+#include <tools/gen.hxx>
+#include <tools/stream.hxx>
+#include <vcl/vectorgraphicdata.hxx>
+
+namespace vcl::pdf
+{
+#if HAVE_FEATURE_PDFIUM
+
+/// Convert to inch, then assume 96 DPI.
+inline double pointToPixel(const double fPoint, const double fResolutionDPI)
+{
+ return fPoint * fResolutionDPI / 72.;
+}
+
+/// Decide if PDF data is old enough to be compatible.
+bool isCompatible(SvStream& rInStream, sal_uInt64 nPos, sal_uInt64 nSize);
+
+/// Converts to highest supported format version (currently 1.6).
+/// Usually used to deal with missing referenced objects in the
+/// source pdf stream.
+bool convertToHighestSupported(SvStream& rInStream, SvStream& rOutStream);
+
+#endif // HAVE_FEATURE_PDFIUM
+
+/// Takes care of transparently downgrading the version of the PDF stream in
+/// case it's too new for our PDF export.
+bool getCompatibleStream(SvStream& rInStream, SvStream& rOutStream);
+
+VectorGraphicDataArray createVectorGraphicDataArray(SvStream& rStream);
+
+} // end of vcl::filter::ipdf namespace
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/vcl/source/filter/ipdf/pdfcompat.cxx b/vcl/source/filter/ipdf/pdfcompat.cxx
new file mode 100644
index 000000000000..3136e2940249
--- /dev/null
+++ b/vcl/source/filter/ipdf/pdfcompat.cxx
@@ -0,0 +1,129 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <pdf/pdfcompat.hxx>
+
+#if HAVE_FEATURE_PDFIUM
+#include <fpdfview.h>
+#include <fpdf_edit.h>
+#include <tools/UnitConversion.hxx>
+#endif
+
+#include <vcl/filter/PDFiumLibrary.hxx>
+#include <sal/log.hxx>
+
+namespace vcl::pdf
+{
+#if HAVE_FEATURE_PDFIUM
+
+/// Decide if PDF data is old enough to be compatible.
+bool isCompatible(SvStream& rInStream, sal_uInt64 nPos, sal_uInt64 nSize)
+{
+ if (nSize < 8)
+ return false;
+
+ // %PDF-x.y
+ sal_uInt8 aFirstBytes[8];
+ rInStream.Seek(nPos);
+ sal_uLong nRead = rInStream.ReadBytes(aFirstBytes, 8);
+ if (nRead < 8)
+ return false;
+
+ if (aFirstBytes[0] != '%' || aFirstBytes[1] != 'P' || aFirstBytes[2] != 'D'
+ || aFirstBytes[3] != 'F' || aFirstBytes[4] != '-')
+ return false;
+
+ sal_Int32 nMajor = OString(aFirstBytes[5]).toInt32();
+ sal_Int32 nMinor = OString(aFirstBytes[7]).toInt32();
+ return !(nMajor > 1 || (nMajor == 1 && nMinor > 6));
+}
+
+/// Converts to highest supported format version (1.6).
+/// Usually used to deal with missing referenced objects in source
+/// pdf stream.
+bool convertToHighestSupported(SvStream& rInStream, SvStream& rOutStream)
+{
+ sal_uInt64 nPos = STREAM_SEEK_TO_BEGIN;
+ sal_uInt64 nSize = STREAM_SEEK_TO_END;
+ rInStream.Seek(nPos);
+ // Convert to PDF-1.6.
+ auto pPdfium = vcl::pdf::PDFiumLibrary::get();
+ if (!pPdfium)
+ return false;
+
+ // Read input into a buffer.
+ SvMemoryStream aInBuffer;
+ aInBuffer.WriteStream(rInStream, nSize);
+
+ SvMemoryStream aSaved;
+ {
+ // Load the buffer using pdfium.
+ std::unique_ptr<vcl::pdf::PDFiumDocument> pPdfDocument
+ = pPdfium->openDocument(aInBuffer.GetData(), aInBuffer.GetSize());
+ if (!pPdfDocument)
+ return false;
+
+ // 16 means PDF-1.6.
+ if (!pPdfDocument->saveWithVersion(aSaved, 16))
+ return false;
+ }
+
+ aSaved.Seek(STREAM_SEEK_TO_BEGIN);
+ rOutStream.WriteStream(aSaved);
+
+ return rOutStream.good();
+}
+
+/// Takes care of transparently downgrading the version of the PDF stream in
+/// case it's too new for our PDF export.
+bool getCompatibleStream(SvStream& rInStream, SvStream& rOutStream)
+{
+ sal_uInt64 nPos = STREAM_SEEK_TO_BEGIN;
+ sal_uInt64 nSize = STREAM_SEEK_TO_END;
+ bool bCompatible = isCompatible(rInStream, nPos, nSize);
+ rInStream.Seek(nPos);
+ if (bCompatible)
+ // Not converting.
+ rOutStream.WriteStream(rInStream, nSize);
+ else
+ convertToHighestSupported(rInStream, rOutStream);
+
+ return rOutStream.good();
+}
+#else
+bool getCompatibleStream(SvStream& rInStream, SvStream& rOutStream)
+{
+ rInStream.Seek(STREAM_SEEK_TO_BEGIN);
+ rOutStream.WriteStream(rInStream, STREAM_SEEK_TO_END);
+ return rOutStream.good();
+}
+#endif // HAVE_FEATURE_PDFIUM
+
+VectorGraphicDataArray createVectorGraphicDataArray(SvStream& rStream)
+{
+ // Save the original PDF stream for later use.
+ SvMemoryStream aMemoryStream;
+ if (!getCompatibleStream(rStream, aMemoryStream))
+ return VectorGraphicDataArray();
+
+ const sal_uInt32 nStreamLength = aMemoryStream.TellEnd();
+
+ VectorGraphicDataArray aPdfData(nStreamLength);
+
+ aMemoryStream.Seek(STREAM_SEEK_TO_BEGIN);
+ aMemoryStream.ReadBytes(aPdfData.begin(), nStreamLength);
+ if (aMemoryStream.GetError())
+ return VectorGraphicDataArray();
+
+ return aPdfData;
+}
+
+} // end vcl::filter::ipdf namespace
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/vcl/source/filter/ipdf/pdfdocument.cxx b/vcl/source/filter/ipdf/pdfdocument.cxx
index 8a3ee8924d04..7569deede0f5 100644
--- a/vcl/source/filter/ipdf/pdfdocument.cxx
+++ b/vcl/source/filter/ipdf/pdfdocument.cxx
@@ -8,6 +8,8 @@
*/
#include <vcl/filter/pdfdocument.hxx>
+#include <pdf/pdfcompat.hxx>
+#include <config_features.h>
#include <map>
#include <memory>
@@ -29,6 +31,7 @@
#include <o3tl/safeint.hxx>
#include <pdf/objectcopier.hxx>
+#include <vcl/pdfread.hxx>
using namespace com::sun::star;
@@ -1350,6 +1353,22 @@ void PDFDocument::SetIDObject(size_t nID, PDFObjectElement* pObject)
m_aIDObjects[nID] = pObject;
}
+bool PDFDocument::ReadWithPossibleFixup(SvStream& rStream)
+{
+#if HAVE_FEATURE_PDFIUM
+ if (Read(rStream))
+ return true;
+
+ // Read failed, try a roundtrip through pdfium and then retry.
+ rStream.Seek(0);
+ SvMemoryStream aStandardizedStream;
+ vcl::pdf::convertToHighestSupported(rStream, aStandardizedStream);
+ return Read(aStandardizedStream);
+#else
+ return Read(rStream);
+#endif
+}
+
bool PDFDocument::Read(SvStream& rStream)
{
// Check file magic.
diff --git a/vcl/source/filter/ipdf/pdfread.cxx b/vcl/source/filter/ipdf/pdfread.cxx
index cda9f9d2304b..05b9a966fd02 100644
--- a/vcl/source/filter/ipdf/pdfread.cxx
+++ b/vcl/source/filter/ipdf/pdfread.cxx
@@ -8,6 +8,7 @@
*/
#include <vcl/pdfread.hxx>
+#include <pdf/pdfcompat.hxx>
#include <config_features.h>
@@ -28,109 +29,6 @@
using namespace com::sun::star;
-namespace
-{
-#if HAVE_FEATURE_PDFIUM
-
-/// Convert to inch, then assume 96 DPI.
-inline double pointToPixel(const double fPoint, const double fResolutionDPI)
-{
- return fPoint * fResolutionDPI / 72.;
-}
-
-/// Decide if PDF data is old enough to be compatible.
-bool isCompatible(SvStream& rInStream, sal_uInt64 nPos, sal_uInt64 nSize)
-{
- if (nSize < 8)
- return false;
-
- // %PDF-x.y
- sal_uInt8 aFirstBytes[8];
- rInStream.Seek(nPos);
- sal_uLong nRead = rInStream.ReadBytes(aFirstBytes, 8);
- if (nRead < 8)
- return false;
-
- if (aFirstBytes[0] != '%' || aFirstBytes[1] != 'P' || aFirstBytes[2] != 'D'
- || aFirstBytes[3] != 'F' || aFirstBytes[4] != '-')
- return false;
-
- sal_Int32 nMajor = OString(aFirstBytes[5]).toInt32();
- sal_Int32 nMinor = OString(aFirstBytes[7]).toInt32();
- return !(nMajor > 1 || (nMajor == 1 && nMinor > 6));
-}
-
-/// Takes care of transparently downgrading the version of the PDF stream in
-/// case it's too new for our PDF export.
-bool getCompatibleStream(SvStream& rInStream, SvStream& rOutStream)
-{
- sal_uInt64 nPos = STREAM_SEEK_TO_BEGIN;
- sal_uInt64 nSize = STREAM_SEEK_TO_END;
- bool bCompatible = isCompatible(rInStream, nPos, nSize);
- rInStream.Seek(nPos);
- if (bCompatible)
- // Not converting.
- rOutStream.WriteStream(rInStream, nSize);
- else
- {
- // Downconvert to PDF-1.6.
- auto pPdfium = vcl::pdf::PDFiumLibrary::get();
- if (!pPdfium)
- return false;
-
- // Read input into a buffer.
- SvMemoryStream aInBuffer;
- aInBuffer.WriteStream(rInStream, nSize);
-
- SvMemoryStream aSaved;
- {
- // Load the buffer using pdfium.
- std::unique_ptr<vcl::pdf::PDFiumDocument> pPdfDocument
- = pPdfium->openDocument(aInBuffer.GetData(), aInBuffer.GetSize());
- if (!pPdfDocument)
- return false;
-
- // 16 means PDF-1.6.
- if (!pPdfDocument->saveWithVersion(aSaved, 16))
- return false;
- }
-
- aSaved.Seek(STREAM_SEEK_TO_BEGIN);
- rOutStream.WriteStream(aSaved);
- }
-
- return rOutStream.good();
-}
-#else
-bool getCompatibleStream(SvStream& rInStream, SvStream& rOutStream)
-{
- rInStream.Seek(STREAM_SEEK_TO_BEGIN);
- rOutStream.WriteStream(rInStream, STREAM_SEEK_TO_END);
- return rOutStream.good();
-}
-#endif // HAVE_FEATURE_PDFIUM
-
-VectorGraphicDataArray createVectorGraphicDataArray(SvStream& rStream)
-{
- // Save the original PDF stream for later use.
- SvMemoryStream aMemoryStream;
- if (!getCompatibleStream(rStream, aMemoryStream))
- return VectorGraphicDataArray();
-
- const sal_uInt32 nStreamLength = aMemoryStream.TellEnd();
-
- VectorGraphicDataArray aPdfData(nStreamLength);
-
- aMemoryStream.Seek(STREAM_SEEK_TO_BEGIN);
- aMemoryStream.ReadBytes(aPdfData.begin(), nStreamLength);
- if (aMemoryStream.GetError())
- return VectorGraphicDataArray();
-
- return aPdfData;
-}
-
-} // end anonymous namespace
-
namespace vcl
{
size_t RenderPDFBitmaps(const void* pBuffer, int nSize, std::vector<BitmapEx>& rBitmaps,
@@ -168,8 +66,8 @@ size_t RenderPDFBitmaps(const void* pBuffer, int nSize, std::vector<BitmapEx>& r
}
// Returned unit is points, convert that to pixel.
- const size_t nPageWidth = pointToPixel(nPageWidthPoints, fResolutionDPI);
- const size_t nPageHeight = pointToPixel(nPageHeightPoints, fResolutionDPI);
+ const size_t nPageWidth = vcl::pdf::pointToPixel(nPageWidthPoints, fResolutionDPI);
+ const size_t nPageHeight = vcl::pdf::pointToPixel(nPageHeightPoints, fResolutionDPI);
std::unique_ptr<vcl::pdf::PDFiumBitmap> pPdfBitmap
= pPdfium->createBitmap(nPageWidth, nPageHeight, /*alpha=*/1);
if (!pPdfBitmap)
@@ -236,7 +134,7 @@ size_t RenderPDFBitmaps(const void* pBuffer, int nSize, std::vector<BitmapEx>& r
bool ImportPDF(SvStream& rStream, Graphic& rGraphic)
{
- VectorGraphicDataArray aPdfDataArray = createVectorGraphicDataArray(rStream);
+ VectorGraphicDataArray aPdfDataArray = vcl::pdf::createVectorGraphicDataArray(rStream);
if (!aPdfDataArray.hasElements())
{
SAL_WARN("vcl.filter", "ImportPDF: empty PDF data array");
@@ -437,7 +335,7 @@ size_t ImportPDFUnloaded(const OUString& rURL, std::vector<PDFGraphicResult>& rG
::utl::UcbStreamHelper::CreateStream(rURL, StreamMode::READ | StreamMode::SHARE_DENYNONE));
// Save the original PDF stream for later use.
- VectorGraphicDataArray aPdfDataArray = createVectorGraphicDataArray(*xStream);
+ VectorGraphicDataArray aPdfDataArray = vcl::pdf::createVectorGraphicDataArray(*xStream);
if (!aPdfDataArray.hasElements())
return 0;