tdf#123234: Fix subsetting CFF deprecated endchar

Type 2 “endchar” operator can take 4 extra arguments and act like Type 1 “seac” operator. Although this is documented as deprecated, the fonts in the bug report use it. Detect the presence of the extra arguments and output a “seac” operator for them. Change-Id: Ic24ec8fda586c82612f0cc13f2f31585d40ad9c4 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/139908 Tested-by: Jenkins Reviewed-by: خالد حسني <khaled@aliftype.com> (cherry picked from commit 03ff7ee47c6b4e0dbf38a040825aaca53ce2ed28) Reviewed-on: https://gerrit.libreoffice.org/c/core/+/141396 Reviewed-by: Caolán McNamara <caolanm@redhat.com>
author: Khaled Hosny <khaled@aliftype.com> 2022-10-15 17:16:36 +0200
committer: Caolán McNamara <caolanm@redhat.com> 2022-10-15 22:39:31 +0200
commit: 45318605384fac327a8b487e0132c4559d25a29c (patch)
tree: d3ef0f4faebf62345fe77151855f46818dfe86df
parent: tdf#126242: Handle fractional values when subsetting CFF (diff)
download: core-45318605384fac327a8b487e0132c4559d25a29c.tar.gz
core-45318605384fac327a8b487e0132c4559d25a29c.zip
1 files changed, 171 insertions, 21 deletions
diff --git a/vcl/source/fontsubset/cff.cxx b/vcl/source/fontsubset/cff.cxx
index d417c7fe1a42..ef756e2fa57f 100644
--- a/vcl/source/fontsubset/cff.cxx
+++ b/vcl/source/fontsubset/cff.cxx
@@ -162,6 +162,55 @@ static const char* pDictEscs[] = {
     "nFDArray",             "nFDSelect",        "sFontName"
 };
 
+static const char* pStandardEncoding[] = {
+    ".notdef", ".notdef", ".notdef", ".notdef", ".notdef",
+    ".notdef", ".notdef", ".notdef", ".notdef", ".notdef",
+    ".notdef", ".notdef", ".notdef", ".notdef", ".notdef",
+    ".notdef", ".notdef", ".notdef", ".notdef", ".notdef",
+    ".notdef", ".notdef", ".notdef", ".notdef", ".notdef",
+    ".notdef", ".notdef", ".notdef", ".notdef", ".notdef",
+    ".notdef", ".notdef", "space", "exclam", "quotedbl",
+    "numbersign", "dollar", "percent", "ampersand",
+    "quoteright", "parenleft", "parenright", "asterisk", "plus",
+    "comma", "hyphen", "period", "slash", "zero", "one", "two",
+    "three", "four", "five", "six", "seven", "eight", "nine",
+    "colon", "semicolon", "less", "equal", "greater",
+    "question", "at", "A", "B", "C", "D", "E", "F", "G", "H",
+    "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T",
+    "U", "V", "W", "X", "Y", "Z", "bracketleft", "backslash",
+    "bracketright", "asciicircum", "underscore", "quoteleft",
+    "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l",
+    "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x",
+    "y", "z", "braceleft", "bar", "braceright", "asciitilde",
+    ".notdef", ".notdef", ".notdef", ".notdef", ".notdef",
+    ".notdef", ".notdef", ".notdef", ".notdef", ".notdef",
+    ".notdef", ".notdef", ".notdef", ".notdef", ".notdef",
+    ".notdef", ".notdef", ".notdef", ".notdef", ".notdef",
+    ".notdef", ".notdef", ".notdef", ".notdef", ".notdef",
+    ".notdef", ".notdef", ".notdef", ".notdef", ".notdef",
+    ".notdef", ".notdef", ".notdef", ".notdef", "exclamdown",
+    "cent", "sterling", "fraction", "yen", "florin", "section",
+    "currency", "quotesingle", "quotedblleft", "guillemotleft",
+    "guilsinglleft", "guilsinglright", "fi", "fl", ".notdef",
+    "endash", "dagger", "daggerdbl", "periodcentered",
+    ".notdef", "paragraph", "bullet", "quotesinglbase",
+    "quotedblbase", "quotedblright", "guillemotright",
+    "ellipsis", "perthousand", ".notdef", "questiondown",
+    ".notdef", "grave", "acute", "circumflex", "tilde",
+    "macron", "breve", "dotaccent", "dieresis", ".notdef",
+    "ring", "cedilla", ".notdef", "hungarumlaut", "ogonek",
+    "caron", "emdash", ".notdef", ".notdef", ".notdef",
+    ".notdef", ".notdef", ".notdef", ".notdef", ".notdef",
+    ".notdef", ".notdef", ".notdef", ".notdef", ".notdef",
+    ".notdef", ".notdef", ".notdef", "AE", ".notdef",
+    "ordfeminine", ".notdef", ".notdef", ".notdef", ".notdef",
+    "Lslash", "Oslash", "OE", "ordmasculine", ".notdef",
+    ".notdef", ".notdef", ".notdef", ".notdef", "ae", ".notdef",
+    ".notdef", ".notdef", "dotlessi", ".notdef", ".notdef",
+    "lslash", "oslash", "oe", "germandbls", ".notdef",
+    ".notdef", ".notdef", ".notdef"
+};
+
 namespace {
 
 namespace TYPE1OP
@@ -261,6 +310,17 @@ struct CffLocal
     bool        mbForceBold;
 };
 
+const int MAX_T1OPS_SIZE = 81920; // TODO: use dynamic value
+
+struct CharString
+{
+    int nLen;
+    U8 aOps[MAX_T1OPS_SIZE];
+    int nCffGlyphId;
+    ValType aCharWidth;
+};
+
+
 class CffSubsetterContext
 :   private CffGlobal
 {
@@ -277,6 +337,8 @@ public:
                 sal_Int32* pGlyphWidths, int nGlyphCount, FontSubsetInfo& );
 
 private:
+    void    convertCharStrings(const sal_GlyphId* pGlyphIds, int nGlyphCount,
+                std::vector<CharString>& rCharStrings);
     int     convert2Type1Ops( CffLocal*, const U8* pType2Ops, int nType2Len, U8* pType1Ops);
     void    convertOneTypeOp();
     void    convertOneTypeEsc();
@@ -306,6 +368,7 @@ private:
     int         getFDSelect( int nGlyphIndex) const;
     int         getGlyphSID( int nGlyphIndex) const;
     const char* getGlyphName( int nGlyphIndex);
+    bool        getBaseAccent(ValType aBase, ValType aAccent, int* nBase, int* nAccent);
 
     void    read2push();
     void    writeType1Val( ValType);
@@ -342,6 +405,9 @@ private:
     ValType mnHintStack[ NMAXHINTS];
 
     ValType maCharWidth;
+
+    bool    mbDoSeac;
+    std::vector<sal_GlyphId> maExtraGlyphIds;
 };
 
 }
@@ -362,6 +428,7 @@ CffSubsetterContext::CffSubsetterContext( const U8* pBasePtr, int nBaseLen)
     , mnHorzHintSize(0)
     , mnHintStack{}
     , maCharWidth(-1)
+    , mbDoSeac(true)
 {
 //  setCharStringType( 1);
     // TODO: new CffLocal[ mnFDAryCount];
@@ -888,6 +955,27 @@ void CffSubsetterContext::convertOneTypeOp()
         clear();
         break;
     case TYPE2OP::ENDCHAR:
+        if (size() >= 4 && mbDoSeac)
+        {
+            // Deprecated seac-like use of endchar (Adobe Technical Note #5177,
+            // Appendix C).
+            auto achar = popVal();
+            auto bchar = popVal();
+            auto ady = popVal();
+            auto adx = popVal();
+            int nBase, nAccent;
+            if (getBaseAccent(bchar, achar, &nBase, &nAccent))
+            {
+                maExtraGlyphIds.push_back(nBase);
+                maExtraGlyphIds.push_back(nAccent);
+                writeType1Val(0); // TODO accent sb
+                writeType1Val(adx);
+                writeType1Val(ady);
+                writeType1Val(bchar);
+                writeType1Val(achar);
+                writeTypeEsc(TYPE1OP::SEAC);
+            }
+        }
         if( mbNeedClose)
             writeTypeOp( TYPE1OP::CLOSEPATH);
         else
@@ -1106,8 +1194,6 @@ void CffSubsetterContext::callType2Subr( bool bGlobal, int nSubrNumber)
     mpReadEnd = pOldReadEnd;
 }
 
-const int MAX_T1OPS_SIZE = 81920; // TODO: use dynamic value
-
 int CffSubsetterContext::convert2Type1Ops( CffLocal* pCffLocal, const U8* const pT2Ops, int nT2Len, U8* const pT1Ops)
 {
     mpCffLocal = pCffLocal;
@@ -1605,6 +1691,28 @@ const char* CffSubsetterContext::getGlyphName( int nGlyphIndex)
     return pGlyphName;
 }
 
+bool CffSubsetterContext::getBaseAccent(ValType aBase, ValType aAccent, int* nBase, int* nAccent)
+{
+    bool bBase = false, bAccent = false;
+    for (int i = 0; i < mnCharStrCount; i++)
+    {
+        const char* pGlyphName = getGlyphName(i);
+        if (pGlyphName == pStandardEncoding[int(aBase)])
+        {
+            *nBase = i;
+            bBase = true;
+        }
+        if (pGlyphName == pStandardEncoding[int(aAccent)])
+        {
+            *nAccent = i;
+            bAccent = true;
+        }
+        if (bBase && bAccent)
+            return true;
+    }
+    return false;
+}
+
 namespace {
 
 class Type1Emitter
@@ -1772,6 +1880,47 @@ void Type1Emitter::emitValVector( const char* pLineHead, const char* pLineTail,
     mpPtr += sprintf( mpPtr, "%s", pLineTail);
 }
 
+void CffSubsetterContext::convertCharStrings(const sal_GlyphId* pGlyphIds, int nGlyphCount,
+                                             std::vector<CharString>& rCharStrings)
+{
+    // If we are doing extra glyphs used for seac operator, check for already
+    // converted glyphs.
+    bool bCheckDuplicates = !rCharStrings.empty();
+    rCharStrings.reserve(rCharStrings.size() + nGlyphCount);
+    for (int i = 0; i < nGlyphCount; ++i)
+    {
+        const int nCffGlyphId = pGlyphIds[i];
+        assert((nCffGlyphId >= 0) && (nCffGlyphId < mnCharStrCount));
+
+        if (!bCheckDuplicates)
+        {
+            const auto& it
+                = std::find_if(rCharStrings.begin(), rCharStrings.end(),
+                               [&](const CharString& c) { return c.nCffGlyphId == nCffGlyphId; });
+            if (it != rCharStrings.end())
+                continue;
+        }
+
+        // get privdict context matching to the glyph
+        const int nFDSelect = getFDSelect(nCffGlyphId);
+        if (nFDSelect < 0)
+            continue;
+        mpCffLocal = &maCffLocal[nFDSelect];
+
+        // convert the Type2op charstring to its Type1op counterpart
+        const int nT2Len = seekIndexData(mnCharStrBase, nCffGlyphId);
+        assert(nT2Len > 0);
+
+        CharString aCharString;
+        const int nT1Len = convert2Type1Ops(mpCffLocal, mpReadPtr, nT2Len, aCharString.aOps);
+        aCharString.nLen = nT1Len;
+        aCharString.nCffGlyphId = nCffGlyphId;
+        aCharString.aCharWidth = maCharWidth;
+
+        rCharStrings.push_back(aCharString);
+    }
+}
+
 void CffSubsetterContext::emitAsType1( Type1Emitter& rEmitter,
     const sal_GlyphId* pReqGlyphIds, const U8* pReqEncoding,
     sal_Int32* pGlyphWidths, int nGlyphCount, FontSubsetInfo& rFSInfo)
@@ -1986,33 +2135,34 @@ void CffSubsetterContext::emitAsType1( Type1Emitter& rEmitter,
     // TODO: emit used LocalSubr charstrings?
 
     // emit the CharStrings for the requested glyphs
+    std::vector<CharString> aCharStrings;
+    mbDoSeac = true;
+    convertCharStrings(pReqGlyphIds, nGlyphCount, aCharStrings);
+
+    // The previous convertCharStrings might collect extra glyphs used in seac
+    // operator, convert them as well
+    if (!maExtraGlyphIds.empty())
+    {
+        mbDoSeac = false;
+        convertCharStrings(maExtraGlyphIds.data(), maExtraGlyphIds.size(), aCharStrings);
+    }
     pOut += sprintf( pOut,
-        "2 index /CharStrings %d dict dup begin\n", nGlyphCount);
+        "2 index /CharStrings %zu dict dup begin\n", aCharStrings.size());
     rEmitter.emitAllCrypted();
-    for( int i = 0; i < nGlyphCount; ++i) {
-        const int nCffGlyphId = pReqGlyphIds[i];
-        assert( (nCffGlyphId >= 0) && (nCffGlyphId < mnCharStrCount));
-        // get privdict context matching to the glyph
-        const int nFDSelect = getFDSelect( nCffGlyphId);
-        if( nFDSelect < 0)
-            continue;
-        mpCffLocal = &maCffLocal[ nFDSelect];
-        // convert the Type2op charstring to its Type1op counterpart
-        const int nT2Len = seekIndexData( mnCharStrBase, nCffGlyphId);
-        assert( nT2Len > 0);
-        U8 aType1Ops[ MAX_T1OPS_SIZE]; // TODO: dynamic allocation
-        const int nT1Len = convert2Type1Ops( mpCffLocal, mpReadPtr, nT2Len, aType1Ops);
+    for (size_t i = 0; i < aCharStrings.size(); i++)
+    {
+        const auto& rCharString = aCharStrings[i];
         // get the glyph name
-        const char* pGlyphName = getGlyphName( nCffGlyphId);
+        const char* pGlyphName = getGlyphName(rCharString.nCffGlyphId);
         // emit the encrypted Type1op charstring
-        pOut += sprintf( pOut, "/%s %d RD ", pGlyphName, nT1Len);
-        memcpy( pOut, aType1Ops, nT1Len);
-        pOut += nT1Len;
+        pOut += sprintf( pOut, "/%s %d RD ", pGlyphName, rCharString.nLen);
+        memcpy( pOut, rCharString.aOps, rCharString.nLen);
+        pOut += rCharString.nLen;
         pOut += sprintf( pOut, " ND\n");
         rEmitter.emitAllCrypted();
         // provide individual glyphwidths if requested
         if( pGlyphWidths ) {
-            ValType aCharWidth = maCharWidth;
+            ValType aCharWidth = rCharString.aCharWidth;
             if( maFontMatrix.size() >= 4)
                 aCharWidth *= 1000.0F * maFontMatrix[0];
             pGlyphWidths[i] = static_cast<sal_Int32>(aCharWidth);
author	Khaled Hosny <khaled@aliftype.com>	2022-10-15 17:16:36 +0200
committer	Caolán McNamara <caolanm@redhat.com>	2022-10-15 22:39:31 +0200
commit	45318605384fac327a8b487e0132c4559d25a29c (patch)
tree	d3ef0f4faebf62345fe77151855f46818dfe86df
parent	tdf#126242: Handle fractional values when subsetting CFF (diff)
download	core-45318605384fac327a8b487e0132c4559d25a29c.tar.gz core-45318605384fac327a8b487e0132c4559d25a29c.zip