diff options
author | Dennis Francis <dennis.francis@collabora.com> | 2022-10-18 16:14:37 +0530 |
---|---|---|
committer | Miklos Vajna <vmiklos@collabora.com> | 2022-10-25 15:57:57 +0200 |
commit | 7b5e440dc7b7b710d695a00c51c3b061bc5729ba (patch) | |
tree | a863b4e611aff501344b820c01cf0039f2587064 | |
parent | sw: fix crash in SwViewShellImp::AddPendingLOKInvalidation() (diff) | |
download | core-7b5e440dc7b7b710d695a00c51c3b061bc5729ba.tar.gz core-7b5e440dc7b7b710d695a00c51c3b061bc5729ba.zip |
vcl: re-exporting broken pdfs -> empty pages
Certain pdf documents when loaded in LO_IMPORT_USE_PDFIUM=1 mode even if
pdf-version < v1.6 sometimes has missing objects referred by other
objects for determing its stream length for instance. As a result
parsing fails and results in a pdf with empty pages. A round trip
through pdfium and exporting to v1.6 seems to cure the issue. Possibly
it does some repairing work to determine the length of the stream in a
independent pass through the file.
Change-Id: Id09f67eddab4163ed12a3a3f3a73baf92e2912aa
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/141495
Tested-by: Jenkins CollaboraOffice <jenkinscollaboraoffice@gmail.com>
Reviewed-by: Miklos Vajna <vmiklos@collabora.com>
-rw-r--r-- | include/vcl/filter/pdfdocument.hxx | 2 | ||||
-rw-r--r-- | vcl/Library_vcl.mk | 1 | ||||
-rw-r--r-- | vcl/inc/pdf/ExternalPDFStreams.hxx | 2 | ||||
-rw-r--r-- | vcl/inc/pdf/pdfcompat.hxx | 45 | ||||
-rw-r--r-- | vcl/source/filter/ipdf/pdfcompat.cxx | 129 | ||||
-rw-r--r-- | vcl/source/filter/ipdf/pdfdocument.cxx | 19 | ||||
-rw-r--r-- | vcl/source/filter/ipdf/pdfread.cxx | 112 |
7 files changed, 202 insertions, 108 deletions
diff --git a/include/vcl/filter/pdfdocument.hxx b/include/vcl/filter/pdfdocument.hxx index 7f7cc8dfb641..6ab6adc2468a 100644 --- a/include/vcl/filter/pdfdocument.hxx +++ b/include/vcl/filter/pdfdocument.hxx @@ -575,6 +575,8 @@ public: //@{ /// Read elements from the start of the stream till its end. bool Read(SvStream& rStream); + /// Calls Read() first and if it fails it tries to fixup and then retry. + bool ReadWithPossibleFixup(SvStream& rStream); void SetSignatureLine(const std::vector<sal_Int8>& rSignatureLine); void SetSignaturePage(size_t nPage); /// Sign the read document with xCertificate in the edit buffer. diff --git a/vcl/Library_vcl.mk b/vcl/Library_vcl.mk index 9c6e7220330e..8c19497ff6cd 100644 --- a/vcl/Library_vcl.mk +++ b/vcl/Library_vcl.mk @@ -428,6 +428,7 @@ $(eval $(call gb_Library_add_exception_objects,vcl,\ vcl/source/filter/GraphicFormatDetector \ vcl/source/filter/igif/decode \ vcl/source/filter/igif/gifread \ + vcl/source/filter/ipdf/pdfcompat \ vcl/source/filter/ipdf/pdfread \ vcl/source/filter/ipdf/pdfdocument \ vcl/source/filter/ixbm/xbmread \ diff --git a/vcl/inc/pdf/ExternalPDFStreams.hxx b/vcl/inc/pdf/ExternalPDFStreams.hxx index 0a1997fe7dc7..71448910ac0f 100644 --- a/vcl/inc/pdf/ExternalPDFStreams.hxx +++ b/vcl/inc/pdf/ExternalPDFStreams.hxx @@ -41,7 +41,7 @@ struct VCL_DLLPUBLIC ExternalPDFStream aPDFStream.WriteBytes(maData.data(), maData.size()); aPDFStream.Seek(0); auto pPDFDocument = std::make_shared<filter::PDFDocument>(); - if (!pPDFDocument->Read(aPDFStream)) + if (!pPDFDocument->ReadWithPossibleFixup(aPDFStream)) { SAL_WARN("vcl.pdfwriter", "PDFWriterImpl::writeReferenceXObject: reading the PDF document failed"); diff --git a/vcl/inc/pdf/pdfcompat.hxx b/vcl/inc/pdf/pdfcompat.hxx new file mode 100644 index 000000000000..29de3901a436 --- /dev/null +++ b/vcl/inc/pdf/pdfcompat.hxx @@ -0,0 +1,45 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#pragma once + +#include <config_features.h> +#include <tools/gen.hxx> +#include <tools/stream.hxx> +#include <vcl/vectorgraphicdata.hxx> + +namespace vcl::pdf +{ +#if HAVE_FEATURE_PDFIUM + +/// Convert to inch, then assume 96 DPI. +inline double pointToPixel(const double fPoint, const double fResolutionDPI) +{ + return fPoint * fResolutionDPI / 72.; +} + +/// Decide if PDF data is old enough to be compatible. +bool isCompatible(SvStream& rInStream, sal_uInt64 nPos, sal_uInt64 nSize); + +/// Converts to highest supported format version (currently 1.6). +/// Usually used to deal with missing referenced objects in the +/// source pdf stream. +bool convertToHighestSupported(SvStream& rInStream, SvStream& rOutStream); + +#endif // HAVE_FEATURE_PDFIUM + +/// Takes care of transparently downgrading the version of the PDF stream in +/// case it's too new for our PDF export. +bool getCompatibleStream(SvStream& rInStream, SvStream& rOutStream); + +VectorGraphicDataArray createVectorGraphicDataArray(SvStream& rStream); + +} // end of vcl::filter::ipdf namespace + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/vcl/source/filter/ipdf/pdfcompat.cxx b/vcl/source/filter/ipdf/pdfcompat.cxx new file mode 100644 index 000000000000..3136e2940249 --- /dev/null +++ b/vcl/source/filter/ipdf/pdfcompat.cxx @@ -0,0 +1,129 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <pdf/pdfcompat.hxx> + +#if HAVE_FEATURE_PDFIUM +#include <fpdfview.h> +#include <fpdf_edit.h> +#include <tools/UnitConversion.hxx> +#endif + +#include <vcl/filter/PDFiumLibrary.hxx> +#include <sal/log.hxx> + +namespace vcl::pdf +{ +#if HAVE_FEATURE_PDFIUM + +/// Decide if PDF data is old enough to be compatible. +bool isCompatible(SvStream& rInStream, sal_uInt64 nPos, sal_uInt64 nSize) +{ + if (nSize < 8) + return false; + + // %PDF-x.y + sal_uInt8 aFirstBytes[8]; + rInStream.Seek(nPos); + sal_uLong nRead = rInStream.ReadBytes(aFirstBytes, 8); + if (nRead < 8) + return false; + + if (aFirstBytes[0] != '%' || aFirstBytes[1] != 'P' || aFirstBytes[2] != 'D' + || aFirstBytes[3] != 'F' || aFirstBytes[4] != '-') + return false; + + sal_Int32 nMajor = OString(aFirstBytes[5]).toInt32(); + sal_Int32 nMinor = OString(aFirstBytes[7]).toInt32(); + return !(nMajor > 1 || (nMajor == 1 && nMinor > 6)); +} + +/// Converts to highest supported format version (1.6). +/// Usually used to deal with missing referenced objects in source +/// pdf stream. +bool convertToHighestSupported(SvStream& rInStream, SvStream& rOutStream) +{ + sal_uInt64 nPos = STREAM_SEEK_TO_BEGIN; + sal_uInt64 nSize = STREAM_SEEK_TO_END; + rInStream.Seek(nPos); + // Convert to PDF-1.6. + auto pPdfium = vcl::pdf::PDFiumLibrary::get(); + if (!pPdfium) + return false; + + // Read input into a buffer. + SvMemoryStream aInBuffer; + aInBuffer.WriteStream(rInStream, nSize); + + SvMemoryStream aSaved; + { + // Load the buffer using pdfium. + std::unique_ptr<vcl::pdf::PDFiumDocument> pPdfDocument + = pPdfium->openDocument(aInBuffer.GetData(), aInBuffer.GetSize()); + if (!pPdfDocument) + return false; + + // 16 means PDF-1.6. + if (!pPdfDocument->saveWithVersion(aSaved, 16)) + return false; + } + + aSaved.Seek(STREAM_SEEK_TO_BEGIN); + rOutStream.WriteStream(aSaved); + + return rOutStream.good(); +} + +/// Takes care of transparently downgrading the version of the PDF stream in +/// case it's too new for our PDF export. +bool getCompatibleStream(SvStream& rInStream, SvStream& rOutStream) +{ + sal_uInt64 nPos = STREAM_SEEK_TO_BEGIN; + sal_uInt64 nSize = STREAM_SEEK_TO_END; + bool bCompatible = isCompatible(rInStream, nPos, nSize); + rInStream.Seek(nPos); + if (bCompatible) + // Not converting. + rOutStream.WriteStream(rInStream, nSize); + else + convertToHighestSupported(rInStream, rOutStream); + + return rOutStream.good(); +} +#else +bool getCompatibleStream(SvStream& rInStream, SvStream& rOutStream) +{ + rInStream.Seek(STREAM_SEEK_TO_BEGIN); + rOutStream.WriteStream(rInStream, STREAM_SEEK_TO_END); + return rOutStream.good(); +} +#endif // HAVE_FEATURE_PDFIUM + +VectorGraphicDataArray createVectorGraphicDataArray(SvStream& rStream) +{ + // Save the original PDF stream for later use. + SvMemoryStream aMemoryStream; + if (!getCompatibleStream(rStream, aMemoryStream)) + return VectorGraphicDataArray(); + + const sal_uInt32 nStreamLength = aMemoryStream.TellEnd(); + + VectorGraphicDataArray aPdfData(nStreamLength); + + aMemoryStream.Seek(STREAM_SEEK_TO_BEGIN); + aMemoryStream.ReadBytes(aPdfData.begin(), nStreamLength); + if (aMemoryStream.GetError()) + return VectorGraphicDataArray(); + + return aPdfData; +} + +} // end vcl::filter::ipdf namespace + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/vcl/source/filter/ipdf/pdfdocument.cxx b/vcl/source/filter/ipdf/pdfdocument.cxx index 8a3ee8924d04..7569deede0f5 100644 --- a/vcl/source/filter/ipdf/pdfdocument.cxx +++ b/vcl/source/filter/ipdf/pdfdocument.cxx @@ -8,6 +8,8 @@ */ #include <vcl/filter/pdfdocument.hxx> +#include <pdf/pdfcompat.hxx> +#include <config_features.h> #include <map> #include <memory> @@ -29,6 +31,7 @@ #include <o3tl/safeint.hxx> #include <pdf/objectcopier.hxx> +#include <vcl/pdfread.hxx> using namespace com::sun::star; @@ -1350,6 +1353,22 @@ void PDFDocument::SetIDObject(size_t nID, PDFObjectElement* pObject) m_aIDObjects[nID] = pObject; } +bool PDFDocument::ReadWithPossibleFixup(SvStream& rStream) +{ +#if HAVE_FEATURE_PDFIUM + if (Read(rStream)) + return true; + + // Read failed, try a roundtrip through pdfium and then retry. + rStream.Seek(0); + SvMemoryStream aStandardizedStream; + vcl::pdf::convertToHighestSupported(rStream, aStandardizedStream); + return Read(aStandardizedStream); +#else + return Read(rStream); +#endif +} + bool PDFDocument::Read(SvStream& rStream) { // Check file magic. diff --git a/vcl/source/filter/ipdf/pdfread.cxx b/vcl/source/filter/ipdf/pdfread.cxx index cda9f9d2304b..05b9a966fd02 100644 --- a/vcl/source/filter/ipdf/pdfread.cxx +++ b/vcl/source/filter/ipdf/pdfread.cxx @@ -8,6 +8,7 @@ */ #include <vcl/pdfread.hxx> +#include <pdf/pdfcompat.hxx> #include <config_features.h> @@ -28,109 +29,6 @@ using namespace com::sun::star; -namespace -{ -#if HAVE_FEATURE_PDFIUM - -/// Convert to inch, then assume 96 DPI. -inline double pointToPixel(const double fPoint, const double fResolutionDPI) -{ - return fPoint * fResolutionDPI / 72.; -} - -/// Decide if PDF data is old enough to be compatible. -bool isCompatible(SvStream& rInStream, sal_uInt64 nPos, sal_uInt64 nSize) -{ - if (nSize < 8) - return false; - - // %PDF-x.y - sal_uInt8 aFirstBytes[8]; - rInStream.Seek(nPos); - sal_uLong nRead = rInStream.ReadBytes(aFirstBytes, 8); - if (nRead < 8) - return false; - - if (aFirstBytes[0] != '%' || aFirstBytes[1] != 'P' || aFirstBytes[2] != 'D' - || aFirstBytes[3] != 'F' || aFirstBytes[4] != '-') - return false; - - sal_Int32 nMajor = OString(aFirstBytes[5]).toInt32(); - sal_Int32 nMinor = OString(aFirstBytes[7]).toInt32(); - return !(nMajor > 1 || (nMajor == 1 && nMinor > 6)); -} - -/// Takes care of transparently downgrading the version of the PDF stream in -/// case it's too new for our PDF export. -bool getCompatibleStream(SvStream& rInStream, SvStream& rOutStream) -{ - sal_uInt64 nPos = STREAM_SEEK_TO_BEGIN; - sal_uInt64 nSize = STREAM_SEEK_TO_END; - bool bCompatible = isCompatible(rInStream, nPos, nSize); - rInStream.Seek(nPos); - if (bCompatible) - // Not converting. - rOutStream.WriteStream(rInStream, nSize); - else - { - // Downconvert to PDF-1.6. - auto pPdfium = vcl::pdf::PDFiumLibrary::get(); - if (!pPdfium) - return false; - - // Read input into a buffer. - SvMemoryStream aInBuffer; - aInBuffer.WriteStream(rInStream, nSize); - - SvMemoryStream aSaved; - { - // Load the buffer using pdfium. - std::unique_ptr<vcl::pdf::PDFiumDocument> pPdfDocument - = pPdfium->openDocument(aInBuffer.GetData(), aInBuffer.GetSize()); - if (!pPdfDocument) - return false; - - // 16 means PDF-1.6. - if (!pPdfDocument->saveWithVersion(aSaved, 16)) - return false; - } - - aSaved.Seek(STREAM_SEEK_TO_BEGIN); - rOutStream.WriteStream(aSaved); - } - - return rOutStream.good(); -} -#else -bool getCompatibleStream(SvStream& rInStream, SvStream& rOutStream) -{ - rInStream.Seek(STREAM_SEEK_TO_BEGIN); - rOutStream.WriteStream(rInStream, STREAM_SEEK_TO_END); - return rOutStream.good(); -} -#endif // HAVE_FEATURE_PDFIUM - -VectorGraphicDataArray createVectorGraphicDataArray(SvStream& rStream) -{ - // Save the original PDF stream for later use. - SvMemoryStream aMemoryStream; - if (!getCompatibleStream(rStream, aMemoryStream)) - return VectorGraphicDataArray(); - - const sal_uInt32 nStreamLength = aMemoryStream.TellEnd(); - - VectorGraphicDataArray aPdfData(nStreamLength); - - aMemoryStream.Seek(STREAM_SEEK_TO_BEGIN); - aMemoryStream.ReadBytes(aPdfData.begin(), nStreamLength); - if (aMemoryStream.GetError()) - return VectorGraphicDataArray(); - - return aPdfData; -} - -} // end anonymous namespace - namespace vcl { size_t RenderPDFBitmaps(const void* pBuffer, int nSize, std::vector<BitmapEx>& rBitmaps, @@ -168,8 +66,8 @@ size_t RenderPDFBitmaps(const void* pBuffer, int nSize, std::vector<BitmapEx>& r } // Returned unit is points, convert that to pixel. - const size_t nPageWidth = pointToPixel(nPageWidthPoints, fResolutionDPI); - const size_t nPageHeight = pointToPixel(nPageHeightPoints, fResolutionDPI); + const size_t nPageWidth = vcl::pdf::pointToPixel(nPageWidthPoints, fResolutionDPI); + const size_t nPageHeight = vcl::pdf::pointToPixel(nPageHeightPoints, fResolutionDPI); std::unique_ptr<vcl::pdf::PDFiumBitmap> pPdfBitmap = pPdfium->createBitmap(nPageWidth, nPageHeight, /*alpha=*/1); if (!pPdfBitmap) @@ -236,7 +134,7 @@ size_t RenderPDFBitmaps(const void* pBuffer, int nSize, std::vector<BitmapEx>& r bool ImportPDF(SvStream& rStream, Graphic& rGraphic) { - VectorGraphicDataArray aPdfDataArray = createVectorGraphicDataArray(rStream); + VectorGraphicDataArray aPdfDataArray = vcl::pdf::createVectorGraphicDataArray(rStream); if (!aPdfDataArray.hasElements()) { SAL_WARN("vcl.filter", "ImportPDF: empty PDF data array"); @@ -437,7 +335,7 @@ size_t ImportPDFUnloaded(const OUString& rURL, std::vector<PDFGraphicResult>& rG ::utl::UcbStreamHelper::CreateStream(rURL, StreamMode::READ | StreamMode::SHARE_DENYNONE)); // Save the original PDF stream for later use. - VectorGraphicDataArray aPdfDataArray = createVectorGraphicDataArray(*xStream); + VectorGraphicDataArray aPdfDataArray = vcl::pdf::createVectorGraphicDataArray(*xStream); if (!aPdfDataArray.hasElements()) return 0; |