From ce3ffbaec7a23d4e04048def3fd3da53a911043c Mon Sep 17 00:00:00 2001 From: Stephan Bergmann Date: Thu, 5 Jan 2012 14:19:08 +0100 Subject: Properly extract from tcvt*.tab files what goes into textenc.cxx. --- sal/textenc/tables.cxx | 16 +++- sal/textenc/tcvtlat1.tab | 220 ------------------------------------------ sal/textenc/tcvtuni1.tab | 22 ----- sal/textenc/textenc.cxx | 245 ++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 254 insertions(+), 249 deletions(-) (limited to 'sal/textenc') diff --git a/sal/textenc/tables.cxx b/sal/textenc/tables.cxx index 7fa8e7045cba..c8d9c19cd11a 100644 --- a/sal/textenc/tables.cxx +++ b/sal/textenc/tables.cxx @@ -28,6 +28,8 @@ #include "sal/config.h" +#include + #include "sal/types.h" #ifndef INCLUDED_RTL_TEXTENC_TENCHELP_H @@ -128,9 +130,15 @@ static sal_uInt16 const aImplDoubleByteIdentifierTab[1] = { 0 }; extern "C" SAL_DLLPUBLIC_EXPORT ImplTextEncodingData const * sal_getFullTextEncodingData( rtl_TextEncoding nEncoding ) { + assert( + nEncoding != RTL_TEXTENCODING_ASCII_US && + nEncoding != RTL_TEXTENCODING_ISO_8859_1 && + nEncoding != RTL_TEXTENCODING_MS_1252 && + nEncoding != RTL_TEXTENCODING_UTF8); + // handled by Impl_getTextEncodingData static ImplTextEncodingData const * const aData[] = { NULL, /* DONTKNOW */ - &aImplMS1252TextEncodingData, /* MS_1252 */ + NULL, /* MS_1252, see above */ &aImplAPPLEROMANTextEncodingData, /* APPLE_ROMAN */ &aImplIBM437TextEncodingData, /* IBM_437 */ &aImplIBM850TextEncodingData, /* IBM_850 */ @@ -140,8 +148,8 @@ extern "C" SAL_DLLPUBLIC_EXPORT ImplTextEncodingData const * &aImplIBM865TextEncodingData, /* IBM_865 */ NULL, /* reserved (SYSTEM) */ &aImplSYMBOLTextEncodingData, /* SYMBOL */ - &aImplUSASCIITextEncodingData, /* ASCII_US */ - &aImplISO88591TextEncodingData, /* ISO_8859_1 */ + NULL, /* ASCII_US, see above */ + NULL, /* ISO_8859_1, see above */ &aImplISO88592TextEncodingData, /* ISO_8859_2 */ &aImplISO88593TextEncodingData, /* ISO_8859_3 */ &aImplISO88594TextEncodingData, /* ISO_8859_4 */ @@ -205,7 +213,7 @@ extern "C" SAL_DLLPUBLIC_EXPORT ImplTextEncodingData const * &aImplIso2022CnTextEncodingData, /* ISO_2022_CN */ &aImplKOI8RTextEncodingData, /* KOI8_R */ &aImplUTF7TextEncodingData, /* UTF7 */ - &aImplUTF8TextEncodingData, /* UTF8 */ + NULL, /* UTF8, see above */ &aImplISO885910TextEncodingData, /* ISO_8859_10 */ &aImplISO885913TextEncodingData, /* ISO_8859_13 */ &aImplEUCKRTextEncodingData, /* EUC_KR */ diff --git a/sal/textenc/tcvtlat1.tab b/sal/textenc/tcvtlat1.tab index 60c3218b64b3..4526063b3b6c 100644 --- a/sal/textenc/tcvtlat1.tab +++ b/sal/textenc/tcvtlat1.tab @@ -1039,176 +1039,6 @@ static ImplTextEncodingData const aImplIBM865TextEncodingData /* ======================================================================= */ -/* MS-1252 */ -/* Windows Standard CharSet (ANSI) for Western Script */ -/* 1-Byte, 0x00-0x7F ASCII ohne Ausnahme */ -/* Convert-Tables: mappings/vendors/micsft/windows/cp1252.txt from 04/15/98 Version 2.01 */ -/* Last-Changes from us: */ - -/* ----------------------------------------------------------------------- */ - -#define MS1252UNI_START 0x80 -#define MS1252UNI_END 0xFF -static sal_uInt16 const aImplMS1252ToUniTab[MS1252UNI_END - MS1252UNI_START + 1] = -{ -/* 0 1 2 3 4 5 6 7 */ -/* 8 9 A B C D E F */ - 0x20AC, 0, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, /* 0x80 */ - 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0, 0x017D, 0, /* 0x80 */ - 0, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, /* 0x90 */ - 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0, 0x017E, 0x0178, /* 0x90 */ - 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, /* 0xA0 */ - 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, /* 0xA0 */ - 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, /* 0xB0 */ - 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, /* 0xB0 */ - 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, /* 0xC0 */ - 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, /* 0xC0 */ - 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, /* 0xD0 */ - 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, /* 0xD0 */ - 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, /* 0xE0 */ - 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, /* 0xE0 */ - 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, /* 0xF0 */ - 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF /* 0xF0 */ -}; - -/* ----------------------------------------------------------------------- */ - -#define MS1252TOCHARTABEX_COUNT 27 -static ImplUniCharTabData const aImplMS1252ToCharTabEx[MS1252TOCHARTABEX_COUNT] = -{ - { 0x0152, 0x8C, 0 }, - { 0x0153, 0x9C, 0 }, - { 0x0160, 0x8A, 0 }, - { 0x0161, 0x9A, 0 }, - { 0x0178, 0x9F, 0 }, - { 0x017D, 0x8E, 0 }, - { 0x017E, 0x9E, 0 }, - { 0x0192, 0x83, 0 }, - { 0x02C6, 0x88, 0 }, - { 0x02DC, 0x98, 0 }, - { 0x2013, 0x96, 0 }, - { 0x2014, 0x97, 0 }, - { 0x2018, 0x91, 0 }, - { 0x2019, 0x92, 0 }, - { 0x201A, 0x82, 0 }, - { 0x201C, 0x93, 0 }, - { 0x201D, 0x94, 0 }, - { 0x201E, 0x84, 0 }, - { 0x2020, 0x86, 0 }, - { 0x2021, 0x87, 0 }, - { 0x2022, 0x95, 0 }, - { 0x2026, 0x85, 0 }, - { 0x2030, 0x89, 0 }, - { 0x2039, 0x8B, 0 }, - { 0x203A, 0x9B, 0 }, - { 0x20AC, 0x80, 0 }, - { 0x2122, 0x99, 0 }, -}; - -/* ----------------------------------------------------------------------- */ - -static ImplByteConvertData const aImplMS1252ByteCvtData = -{ - aImplMS1252ToUniTab, - NULL, - MS1252UNI_START, MS1252UNI_END, - NOTABUNI_START, NOTABUNI_END, - aImplA0FFSameToCharTab, - NULL, - aImplMS1252ToCharTabEx, - SAMEA0FFCHAR_START, SAMEA0FFCHAR_END, - NOTABCHAR_START, NOTABCHAR_END, - MS1252TOCHARTABEX_COUNT -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplMS1252TextEncodingData - = { { &aImplMS1252ByteCvtData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL }, - 1, - 1, - 1, - 0, - "iso8859-1", - "windows-1252", - RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MIME }; - /* WIN, SCRIPT_LATIN, pc code page 850 */ - -/* ======================================================================= */ - -/* ISO-8859-1 */ -/* Unix Standard CharSet (Latin1) for Western Script */ -/* 1-Byte, 0x00-0x7F ASCII ohne Ausnahme, 0x80-0x9F Control-Caracter wie in Unicode */ -/* Convert-Tables: mappings/iso8859/8859-1.txt from 07/27/99 Version 1.0 (based on Unicode 3.0) */ -/* Last-Changes from us: */ - -#define ISO88591UNI_START 0xA0 -#define ISO88591UNI_END 0xFF -static sal_uInt16 const aImplISO88591ToUniTab[ISO88591UNI_END - ISO88591UNI_START + 1] = -{ -/* 0 1 2 3 4 5 6 7 */ -/* 8 9 A B C D E F */ - 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, /* 0xA0 */ - 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, /* 0xA0 */ - 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, /* 0xB0 */ - 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, /* 0xB0 */ - 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, /* 0xC0 */ - 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, /* 0xC0 */ - 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, /* 0xD0 */ - 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, /* 0xD0 */ - 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, /* 0xE0 */ - 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, /* 0xE0 */ - 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, /* 0xF0 */ - 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF /* 0xF0 */ -}; - -/* ----------------------------------------------------------------------- */ - -static ImplByteConvertData const aImplISO88591ByteCvtData = -{ - aImplISO88591ToUniTab, - aImpl8090SameToUniTab, - ISO88591UNI_START, ISO88591UNI_END, - SAME8090UNI_START, SAME8090UNI_END, - aImplA0FFSameToCharTab, - aImpl8090SameToCharTab, - NULL, - SAMEA0FFCHAR_START, SAMEA0FFCHAR_END, - SAME8090CHAR_START, SAME8090CHAR_END, - 0 -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplISO88591TextEncodingData - = { { &aImplISO88591ByteCvtData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL }, - 1, - 1, - 1, - 0, - "iso8859-1", - "iso-8859-1", - RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MIME }; - /* SCRIPT_LATIN, pc code page 850 */ - -/* ======================================================================= */ - /* ISO-8859-14 */ /* ungefaehr wie Latin1 */ /* 1-Byte, 0x00-0x7F ASCII ohne Ausnahme, 0x80-0x9F Control-Caracter wie in Unicode */ @@ -1723,53 +1553,3 @@ static ImplTextEncodingData const aImplAPPLEROMANTextEncodingData "macintosh", RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MIME }; /* MAC, SCRIPT_LATIN, pc code page 850 */ - -/* ======================================================================= */ - -/* US-ASCII */ -/* 7-Bit ASCII */ -/* 1-Byte, 0x00-0x7F ASCII ohne Ausnahme */ -/* For the import we use ISO-8859-1 with MS extension (MS-1252), because */ -/* when the 8-Bit is set, the chance, that this is a ISO-8859-1 character */ -/* is the greatest. For the export all chars greater than 127 are not */ -/* converted and are replaced by the replacement character. */ -/* Last-Changes from us: */ - -/* ----------------------------------------------------------------------- */ - -static ImplByteConvertData const aImplUSASCIIByteCvtData = -{ - aImplMS1252ToUniTab, - NULL, - MS1252UNI_START, MS1252UNI_END, - NOTABUNI_START, NOTABUNI_END, - NULL, - NULL, - NULL, - NOTABCHAR_START, NOTABCHAR_END, - NOTABCHAR_START, NOTABCHAR_END, - 0 -}; - -/* ----------------------------------------------------------------------- */ - -static ImplTextEncodingData const aImplUSASCIITextEncodingData - = { { &aImplUSASCIIByteCvtData, - ImplCharToUnicode, - ImplUnicodeToChar, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL }, - 1, - 1, - 1, - 0, - "iso8859-1", - "us-ascii", - RTL_TEXTENCODING_INFO_ASCII - | RTL_TEXTENCODING_INFO_7BIT - | RTL_TEXTENCODING_INFO_MIME }; - /* SCRIPT_LATIN, pc code page 437 */ diff --git a/sal/textenc/tcvtuni1.tab b/sal/textenc/tcvtuni1.tab index 4efaa1ad51ad..83f0a5edc39d 100644 --- a/sal/textenc/tcvtuni1.tab +++ b/sal/textenc/tcvtuni1.tab @@ -54,28 +54,6 @@ static ImplTextEncodingData const aImplUTF7TextEncodingData | RTL_TEXTENCODING_INFO_MIME }; /* SCRIPT_UNICODE, pc code page 850 */ -static ImplTextEncodingData const aImplUTF8TextEncodingData - = { { NULL, - &ImplConvertUtf8ToUnicode, - &ImplConvertUnicodeToUtf8, - &ImplCreateUtf8ToUnicodeContext, - &ImplDestroyContext, - &ImplResetUtf8ToUnicodeContext, - &ImplCreateUnicodeToUtf8Context, - &ImplDestroyContext, - &ImplResetUnicodeToUtf8Context }, - 1, - 6, - 1, - 0, - "iso8859-1", - "utf-8", - RTL_TEXTENCODING_INFO_ASCII - | RTL_TEXTENCODING_INFO_UNICODE - | RTL_TEXTENCODING_INFO_MULTIBYTE - | RTL_TEXTENCODING_INFO_MIME }; - /* SCRIPT_UNICODE, pc code page 850 */ - static char aImplJavaUtf8TextConverterTag; /* The value of this tag is irrelevant. Only its address != NULL is used to distinguish between RTL_TEXTENCODING_UTF8 and diff --git a/sal/textenc/textenc.cxx b/sal/textenc/textenc.cxx index da758f7c12ec..9b3e25623073 100644 --- a/sal/textenc/textenc.cxx +++ b/sal/textenc/textenc.cxx @@ -33,6 +33,7 @@ /* TODO! This file should not be called textenc.c, because it is not the implementation of rtl/textenc.h. Rather, it should be called gettextencodingdata.c. */ +#include "context.h" #include "gettextencodingdata.h" #include "tenchelp.h" #include "rtl/textenc.h" @@ -96,9 +97,247 @@ static sal_uChar const aImplA0FFSameToCharTab[SAMEA0FFCHAR_END static sal_uInt16 const aImplDoubleByteIdentifierTab[1] = { 0 }; -#include "tcvtest1.tab" -#include "tcvtlat1.tab" -#include "tcvtuni1.tab" +/* ======================================================================= */ + +/* MS-1252 */ +/* Windows Standard CharSet (ANSI) for Western Script */ +/* 1-Byte, 0x00-0x7F ASCII ohne Ausnahme */ +/* Convert-Tables: mappings/vendors/micsft/windows/cp1252.txt from 04/15/98 Version 2.01 */ +/* Last-Changes from us: */ + +/* ----------------------------------------------------------------------- */ + +#define MS1252UNI_START 0x80 +#define MS1252UNI_END 0xFF +static sal_uInt16 const aImplMS1252ToUniTab[MS1252UNI_END - MS1252UNI_START + 1] = +{ +/* 0 1 2 3 4 5 6 7 */ +/* 8 9 A B C D E F */ + 0x20AC, 0, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, /* 0x80 */ + 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0, 0x017D, 0, /* 0x80 */ + 0, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, /* 0x90 */ + 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0, 0x017E, 0x0178, /* 0x90 */ + 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, /* 0xA0 */ + 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, /* 0xA0 */ + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, /* 0xB0 */ + 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, /* 0xB0 */ + 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, /* 0xC0 */ + 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, /* 0xC0 */ + 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, /* 0xD0 */ + 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, /* 0xD0 */ + 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, /* 0xE0 */ + 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, /* 0xE0 */ + 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, /* 0xF0 */ + 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF /* 0xF0 */ +}; + +/* ----------------------------------------------------------------------- */ + +#define MS1252TOCHARTABEX_COUNT 27 +static ImplUniCharTabData const aImplMS1252ToCharTabEx[MS1252TOCHARTABEX_COUNT] = +{ + { 0x0152, 0x8C, 0 }, + { 0x0153, 0x9C, 0 }, + { 0x0160, 0x8A, 0 }, + { 0x0161, 0x9A, 0 }, + { 0x0178, 0x9F, 0 }, + { 0x017D, 0x8E, 0 }, + { 0x017E, 0x9E, 0 }, + { 0x0192, 0x83, 0 }, + { 0x02C6, 0x88, 0 }, + { 0x02DC, 0x98, 0 }, + { 0x2013, 0x96, 0 }, + { 0x2014, 0x97, 0 }, + { 0x2018, 0x91, 0 }, + { 0x2019, 0x92, 0 }, + { 0x201A, 0x82, 0 }, + { 0x201C, 0x93, 0 }, + { 0x201D, 0x94, 0 }, + { 0x201E, 0x84, 0 }, + { 0x2020, 0x86, 0 }, + { 0x2021, 0x87, 0 }, + { 0x2022, 0x95, 0 }, + { 0x2026, 0x85, 0 }, + { 0x2030, 0x89, 0 }, + { 0x2039, 0x8B, 0 }, + { 0x203A, 0x9B, 0 }, + { 0x20AC, 0x80, 0 }, + { 0x2122, 0x99, 0 }, +}; + +/* ----------------------------------------------------------------------- */ + +static ImplByteConvertData const aImplMS1252ByteCvtData = +{ + aImplMS1252ToUniTab, + NULL, + MS1252UNI_START, MS1252UNI_END, + NOTABUNI_START, NOTABUNI_END, + aImplA0FFSameToCharTab, + NULL, + aImplMS1252ToCharTabEx, + SAMEA0FFCHAR_START, SAMEA0FFCHAR_END, + NOTABCHAR_START, NOTABCHAR_END, + MS1252TOCHARTABEX_COUNT +}; + +/* ----------------------------------------------------------------------- */ + +static ImplTextEncodingData const aImplMS1252TextEncodingData + = { { &aImplMS1252ByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + 1, + 1, + 1, + 0, + "iso8859-1", + "windows-1252", + RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MIME }; + /* WIN, SCRIPT_LATIN, pc code page 850 */ + +/* ======================================================================= */ + +/* ISO-8859-1 */ +/* Unix Standard CharSet (Latin1) for Western Script */ +/* 1-Byte, 0x00-0x7F ASCII ohne Ausnahme, 0x80-0x9F Control-Caracter wie in Unicode */ +/* Convert-Tables: mappings/iso8859/8859-1.txt from 07/27/99 Version 1.0 (based on Unicode 3.0) */ +/* Last-Changes from us: */ + +#define ISO88591UNI_START 0xA0 +#define ISO88591UNI_END 0xFF +static sal_uInt16 const aImplISO88591ToUniTab[ISO88591UNI_END - ISO88591UNI_START + 1] = +{ +/* 0 1 2 3 4 5 6 7 */ +/* 8 9 A B C D E F */ + 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, /* 0xA0 */ + 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, /* 0xA0 */ + 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, /* 0xB0 */ + 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, /* 0xB0 */ + 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, /* 0xC0 */ + 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, /* 0xC0 */ + 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, /* 0xD0 */ + 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, /* 0xD0 */ + 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, /* 0xE0 */ + 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, /* 0xE0 */ + 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, /* 0xF0 */ + 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF /* 0xF0 */ +}; + +/* ----------------------------------------------------------------------- */ + +static ImplByteConvertData const aImplISO88591ByteCvtData = +{ + aImplISO88591ToUniTab, + aImpl8090SameToUniTab, + ISO88591UNI_START, ISO88591UNI_END, + SAME8090UNI_START, SAME8090UNI_END, + aImplA0FFSameToCharTab, + aImpl8090SameToCharTab, + NULL, + SAMEA0FFCHAR_START, SAMEA0FFCHAR_END, + SAME8090CHAR_START, SAME8090CHAR_END, + 0 +}; + +/* ----------------------------------------------------------------------- */ + +static ImplTextEncodingData const aImplISO88591TextEncodingData + = { { &aImplISO88591ByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + 1, + 1, + 1, + 0, + "iso8859-1", + "iso-8859-1", + RTL_TEXTENCODING_INFO_ASCII | RTL_TEXTENCODING_INFO_MIME }; + /* SCRIPT_LATIN, pc code page 850 */ + +/* ======================================================================= */ + +/* US-ASCII */ +/* 7-Bit ASCII */ +/* 1-Byte, 0x00-0x7F ASCII ohne Ausnahme */ +/* For the import we use ISO-8859-1 with MS extension (MS-1252), because */ +/* when the 8-Bit is set, the chance, that this is a ISO-8859-1 character */ +/* is the greatest. For the export all chars greater than 127 are not */ +/* converted and are replaced by the replacement character. */ +/* Last-Changes from us: */ + +/* ----------------------------------------------------------------------- */ + +static ImplByteConvertData const aImplUSASCIIByteCvtData = +{ + aImplMS1252ToUniTab, + NULL, + MS1252UNI_START, MS1252UNI_END, + NOTABUNI_START, NOTABUNI_END, + NULL, + NULL, + NULL, + NOTABCHAR_START, NOTABCHAR_END, + NOTABCHAR_START, NOTABCHAR_END, + 0 +}; + +/* ----------------------------------------------------------------------- */ + +static ImplTextEncodingData const aImplUSASCIITextEncodingData + = { { &aImplUSASCIIByteCvtData, + ImplCharToUnicode, + ImplUnicodeToChar, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL }, + 1, + 1, + 1, + 0, + "iso8859-1", + "us-ascii", + RTL_TEXTENCODING_INFO_ASCII + | RTL_TEXTENCODING_INFO_7BIT + | RTL_TEXTENCODING_INFO_MIME }; + /* SCRIPT_LATIN, pc code page 437 */ + +static ImplTextEncodingData const aImplUTF8TextEncodingData + = { { NULL, + &ImplConvertUtf8ToUnicode, + &ImplConvertUnicodeToUtf8, + &ImplCreateUtf8ToUnicodeContext, + &ImplDestroyContext, + &ImplResetUtf8ToUnicodeContext, + &ImplCreateUnicodeToUtf8Context, + &ImplDestroyContext, + &ImplResetUnicodeToUtf8Context }, + 1, + 6, + 1, + 0, + "iso8859-1", + "utf-8", + RTL_TEXTENCODING_INFO_ASCII + | RTL_TEXTENCODING_INFO_UNICODE + | RTL_TEXTENCODING_INFO_MULTIBYTE + | RTL_TEXTENCODING_INFO_MIME }; + /* SCRIPT_UNICODE, pc code page 850 */ namespace { -- cgit