summaryrefslogtreecommitdiffstats
path: root/sal/rtl/source/ustring.c
diff options
context:
space:
mode:
Diffstat (limited to 'sal/rtl/source/ustring.c')
-rw-r--r--sal/rtl/source/ustring.c51
1 files changed, 50 insertions, 1 deletions
diff --git a/sal/rtl/source/ustring.c b/sal/rtl/source/ustring.c
index b0bdd2d98e73..feb597547ae1 100644
--- a/sal/rtl/source/ustring.c
+++ b/sal/rtl/source/ustring.c
@@ -1,3 +1,4 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*************************************************************************
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -551,7 +552,7 @@ static void rtl_string2UString_status( rtl_uString** ppThis,
sal_uInt32 nCvtFlags,
sal_uInt32 *pInfo )
{
- OSL_ENSURE(rtl_isOctetTextEncoding(eTextEncoding),
+ OSL_ENSURE(nLen == 0 || rtl_isOctetTextEncoding(eTextEncoding),
"rtl_string2UString_status() - Wrong TextEncoding" );
if ( !nLen )
@@ -800,6 +801,29 @@ void SAL_CALL rtl_uString_intern( rtl_uString ** newStr,
}
}
+static int rtl_canGuessUOutputLength( int len, rtl_TextEncoding eTextEncoding )
+{
+ // FIXME: Maybe we should use a bit flag in the higher bits of the
+ // eTextEncoding value itself to determine the encoding type. But if we
+ // do, be sure to mask the value in certain places that expect the values
+ // to be numbered serially from 0 and up. One such place is
+ // Impl_getTextEncodingData().
+
+ switch ( eTextEncoding )
+ {
+ // 1 to 1 (with no zero elements)
+ case RTL_TEXTENCODING_IBM_437:
+ case RTL_TEXTENCODING_IBM_850:
+ case RTL_TEXTENCODING_IBM_860:
+ case RTL_TEXTENCODING_IBM_861:
+ case RTL_TEXTENCODING_IBM_863:
+ case RTL_TEXTENCODING_IBM_865:
+ return len;
+ break;
+ }
+ return 0;
+}
+
void SAL_CALL rtl_uString_internConvert( rtl_uString ** newStr,
const sal_Char * str,
sal_Int32 len,
@@ -817,6 +841,7 @@ void SAL_CALL rtl_uString_internConvert( rtl_uString ** newStr,
if ( len < 256 )
{ // try various optimisations
+ sal_Int32 ulen;
if ( len < 0 )
len = strlen( str );
if ( eTextEncoding == RTL_TEXTENCODING_ASCII_US )
@@ -836,6 +861,28 @@ void SAL_CALL rtl_uString_internConvert( rtl_uString ** newStr,
rtl_ustring_intern_internal( newStr, pScratch, CANNOT_RETURN );
return;
}
+ else if ( (ulen = rtl_canGuessUOutputLength(len, eTextEncoding)) != 0 )
+ {
+ rtl_uString *pScratch;
+ rtl_TextToUnicodeConverter hConverter;
+ sal_Size nSrcBytes;
+ sal_uInt32 nInfo;
+
+ pScratch = alloca( sizeof(rtl_uString) + ulen * sizeof (IMPL_RTL_STRCODE) );
+
+ hConverter = rtl_createTextToUnicodeConverter( eTextEncoding );
+ rtl_convertTextToUnicode(
+ hConverter, 0, str, len, pScratch->buffer, ulen, convertFlags, &nInfo, &nSrcBytes );
+ rtl_destroyTextToUnicodeConverter( hConverter );
+
+ if (pInfo)
+ *pInfo = nInfo;
+
+ pScratch->length = ulen;
+ rtl_ustring_intern_internal( newStr, pScratch, CANNOT_RETURN );
+ return;
+ }
+
/* FIXME: we want a nice UTF-8 / alloca shortcut here */
}
@@ -932,3 +979,5 @@ sal_Bool rtl_convertStringToUString(
rtl_string2UString_status(target, source, length, encoding, flags, &info);
return (sal_Bool) ((info & RTL_TEXTTOUNICODE_INFO_ERROR) == 0);
}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */