diff options
author | Miklos Vajna <vmiklos@frugalware.org> | 2011-11-29 00:48:55 +0100 |
---|---|---|
committer | Miklos Vajna <vmiklos@frugalware.org> | 2011-11-29 00:52:11 +0100 |
commit | 4f6d80fbb8a83ef98dd3c0d746fa7fe650d71f02 (patch) | |
tree | 9afc449edbc5c6aa12bfa583f6f2474c5bc1c748 | |
parent | mark these as const (diff) | |
download | core-4f6d80fbb8a83ef98dd3c0d746fa7fe650d71f02.tar.gz core-4f6d80fbb8a83ef98dd3c0d746fa7fe650d71f02.zip |
RTF: Avoid importing unicode characters one by one
This imprives speed of importing unicode text further. Results of a
sample of chinese text of 5 pages:
- before: 6.692s
- after: 1.388s
-rw-r--r-- | writerfilter/source/rtftok/rtfdocumentimpl.cxx | 34 | ||||
-rw-r--r-- | writerfilter/source/rtftok/rtfdocumentimpl.hxx | 4 |
2 files changed, 35 insertions, 3 deletions
diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.cxx b/writerfilter/source/rtftok/rtfdocumentimpl.cxx index 0e81b4aebdd8..1a138c0e454c 100644 --- a/writerfilter/source/rtftok/rtfdocumentimpl.cxx +++ b/writerfilter/source/rtftok/rtfdocumentimpl.cxx @@ -291,7 +291,8 @@ RTFDocumentImpl::RTFDocumentImpl(uno::Reference<uno::XComponentContext> const& x m_nCurrentStyleIndex(0), m_bEq(false), m_bWasInFrame(false), - m_bIsInFrame(false) + m_bIsInFrame(false), + m_aUnicodeBuffer() { OSL_ASSERT(xInputStream.is()); m_pInStream.reset(utl::UcbStreamHelper::CreateStream(xInputStream, sal_True)); @@ -339,6 +340,7 @@ bool RTFDocumentImpl::isSubstream() const void RTFDocumentImpl::finishSubstream() { + checkUnicode(); // At the end of a footnote stream, we need to emit a run break when importing from Word. // We can't do so unconditionally, as Writer already writes a \par at the end of the footnote. if (m_bNeedCr) @@ -697,12 +699,20 @@ int RTFDocumentImpl::resolveChars(char ch) { OStringBuffer aBuf; + bool bUnicodeChecked = false; while(!Strm().IsEof() && ch != '{' && ch != '}' && ch != '\\') { if (ch != 0x0d && ch != 0x0a) { if (m_aStates.top().nCharsToSkip == 0) + { + if (!bUnicodeChecked) + { + checkUnicode(); + bUnicodeChecked = true; + } aBuf.append(ch); + } else m_aStates.top().nCharsToSkip--; } @@ -989,6 +999,7 @@ void RTFDocumentImpl::replayBuffer(RTFBuffer_t& rBuffer) int RTFDocumentImpl::dispatchDestination(RTFKeyword nKeyword) { + checkUnicode(); RTFSkipDestination aSkip(*this); switch (nKeyword) { @@ -1277,6 +1288,8 @@ int RTFDocumentImpl::dispatchDestination(RTFKeyword nKeyword) int RTFDocumentImpl::dispatchSymbol(RTFKeyword nKeyword) { + if (nKeyword != RTF_HEXCHAR) + checkUnicode(); RTFSkipDestination aSkip(*this); sal_uInt8 cCh = 0; @@ -1450,6 +1463,7 @@ int RTFDocumentImpl::dispatchSymbol(RTFKeyword nKeyword) int RTFDocumentImpl::dispatchFlag(RTFKeyword nKeyword) { + checkUnicode(); RTFSkipDestination aSkip(*this); int nParam = -1; @@ -1905,6 +1919,8 @@ int RTFDocumentImpl::dispatchFlag(RTFKeyword nKeyword) int RTFDocumentImpl::dispatchValue(RTFKeyword nKeyword, int nParam) { + if (nKeyword != RTF_U) + checkUnicode(); RTFSkipDestination aSkip(*this); int nSprm = 0; RTFValue::Pointer_t pIntValue(new RTFValue(nParam)); @@ -2231,8 +2247,7 @@ int RTFDocumentImpl::dispatchValue(RTFKeyword nKeyword, int nParam) case RTF_U: if ((SAL_MIN_INT16 <= nParam) && (nParam <= SAL_MAX_INT16)) { - OUString aStr(static_cast<sal_Unicode>(nParam)); - text(aStr); + m_aUnicodeBuffer.append(static_cast<sal_Unicode>(nParam)); m_aStates.top().nCharsToSkip = m_aStates.top().nUc; } break; @@ -2550,6 +2565,7 @@ int RTFDocumentImpl::dispatchValue(RTFKeyword nKeyword, int nParam) int RTFDocumentImpl::dispatchToggle(RTFKeyword nKeyword, bool bParam, int nParam) { + checkUnicode(); RTFSkipDestination aSkip(*this); int nSprm = -1; RTFValue::Pointer_t pBoolValue(new RTFValue(!bParam || nParam != 0)); @@ -2650,6 +2666,7 @@ int RTFDocumentImpl::pushState() { //OSL_TRACE("%s before push: %d", OSL_THIS_FUNC, m_nGroup); + checkUnicode(); m_nGroupStartPos = Strm().Tell(); RTFParserState aState; if (m_aStates.empty()) @@ -2728,6 +2745,7 @@ int RTFDocumentImpl::popState() { //OSL_TRACE("%s before pop: m_nGroup %d, dest state: %d", OSL_THIS_FUNC, m_nGroup, m_aStates.top().nDestinationState); + checkUnicode(); RTFSprms aSprms; RTFSprms aAttributes; OUStringBuffer aDestinationText; @@ -3184,6 +3202,16 @@ void RTFDocumentImpl::setSkipUnknown(bool bSkipUnknown) m_bSkipUnknown = bSkipUnknown; } +void RTFDocumentImpl::checkUnicode() +{ + if (m_aUnicodeBuffer.getLength() > 0) + { + OSL_TRACE("debug, sending collected unicode chars"); + OUString aString = m_aUnicodeBuffer.makeStringAndClear(); + text(aString); + } +} + RTFParserState::RTFParserState() : nInternalState(INTERNAL_NORMAL), nDestinationState(DESTINATION_NORMAL), diff --git a/writerfilter/source/rtftok/rtfdocumentimpl.hxx b/writerfilter/source/rtftok/rtfdocumentimpl.hxx index 2e636041822e..863a6d5563b3 100644 --- a/writerfilter/source/rtftok/rtfdocumentimpl.hxx +++ b/writerfilter/source/rtftok/rtfdocumentimpl.hxx @@ -341,6 +341,8 @@ namespace writerfilter { /// If we got tokens indicating we're in a frame. bool inFrame(); void checkChangedFrame(); + /// If we have some unicode characters to send. + void checkUnicode(); uno::Reference<uno::XComponentContext> const& m_xContext; uno::Reference<io::XInputStream> const& m_xInputStream; @@ -425,6 +427,8 @@ namespace writerfilter { bool m_bWasInFrame; /// If a frame start token is already sent to dmapper (nesting them is not OK). bool m_bIsInFrame; + // Unicode characters are collected here so we don't have to send them one by one. + rtl::OUStringBuffer m_aUnicodeBuffer; }; } // namespace rtftok |