From 209fc9fd7fa433947af0bf86e210d73fa7f5a045 Mon Sep 17 00:00:00 2001 From: Alex Henrie Date: Fri, 20 Dec 2019 09:23:06 -0700 Subject: Add case table for Deseret and Osage For characters like these that are in Unicode's Supplementary Multilingual Plane, only the low surrogate changes when changing case. Change-Id: I2c4e9880b4c41a6ecfc333bb2710cf1db3f80da7 Reviewed-on: https://gerrit.libreoffice.org/85621 Tested-by: Jenkins Reviewed-by: Eike Rathke --- i18nutil/source/utility/casefolding.cxx | 17 +++++++++++---- i18nutil/source/utility/casefolding_data.h | 33 ++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 4 deletions(-) (limited to 'i18nutil') diff --git a/i18nutil/source/utility/casefolding.cxx b/i18nutil/source/utility/casefolding.cxx index 0b01565a4466..cf3a716701e3 100644 --- a/i18nutil/source/utility/casefolding.cxx +++ b/i18nutil/source/utility/casefolding.cxx @@ -24,6 +24,7 @@ #include #include #include +#include using namespace com::sun::star::lang; using namespace com::sun::star::uno; @@ -91,13 +92,21 @@ const Mapping& casefolding::getConditionalValue(const sal_Unicode* str, sal_Int3 Mapping casefolding::getValue(const sal_Unicode* str, sal_Int32 pos, sal_Int32 len, Locale const & aLocale, MappingType nMappingType) { - Mapping dummy = { 0, 1, { 0, 0, 0 } }; - sal_Int16 address = CaseMappingIndex[str[pos] >> 8]; + Mapping dummy = { 0, 1, { str[pos], 0, 0 } }; + + sal_uInt32 c; + if (pos > 0 && rtl::isHighSurrogate(str[pos-1]) && rtl::isLowSurrogate(str[pos])) { + c = rtl::combineSurrogates(str[pos-1], str[pos]); + if (c >= SAL_N_ELEMENTS(CaseMappingIndex) * 256) + return dummy; + } else { + c = str[pos]; + } - dummy.map[0] = str[pos]; + sal_Int16 address = CaseMappingIndex[c >> 8]; if (address >= 0) { - address = (address << 8) + (str[pos] & 0xFF); + address = (address << 8) + (c & 0xFF); if (static_cast(CaseMappingValue[address].type) & nMappingType) { MappingType type = static_cast(CaseMappingValue[address].type); if (type & MappingType::NotValue) { diff --git a/i18nutil/source/utility/casefolding_data.h b/i18nutil/source/utility/casefolding_data.h index 1cd912b2200d..20c5cc714103 100644 --- a/i18nutil/source/utility/casefolding_data.h +++ b/i18nutil/source/utility/casefolding_data.h @@ -56,6 +56,7 @@ static const sal_Int8 CaseMappingIndex[] = { -1, -1, -1, -1, -1, -1, -1, -1, // e800 - efff -1, -1, -1, -1, -1, -1, -1, -1, // f000 - f7ff -1, -1, -1, 0x0b, -1, -1, -1, 0x0c, // f800 - ffff + -1, -1, -1, -1, 0x0d, -1, -1, -1, // 10000 - 107ff }; @@ -489,6 +490,38 @@ static const Value CaseMappingValue[] = { {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, // fff0 - fff7 {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, // fff8 - ffff + {0x6a, 0xDC28}, {0x6a, 0xDC29}, {0x6a, 0xDC2A}, {0x6a, 0xDC2B}, {0x6a, 0xDC2C}, {0x6a, 0xDC2D}, {0x6a, 0xDC2E}, {0x6a, 0xDC2F}, // 10400 - 10407 + {0x6a, 0xDC30}, {0x6a, 0xDC31}, {0x6a, 0xDC32}, {0x6a, 0xDC33}, {0x6a, 0xDC34}, {0x6a, 0xDC35}, {0x6a, 0xDC36}, {0x6a, 0xDC37}, // 10408 - 1040f + {0x6a, 0xDC38}, {0x6a, 0xDC39}, {0x6a, 0xDC3A}, {0x6a, 0xDC3B}, {0x6a, 0xDC3C}, {0x6a, 0xDC3D}, {0x6a, 0xDC3E}, {0x6a, 0xDC3F}, // 10410 - 10417 + {0x6a, 0xDC40}, {0x6a, 0xDC41}, {0x6a, 0xDC42}, {0x6a, 0xDC43}, {0x6a, 0xDC44}, {0x6a, 0xDC45}, {0x6a, 0xDC46}, {0x6a, 0xDC47}, // 10418 - 1041f + {0x6a, 0xDC48}, {0x6a, 0xDC49}, {0x6a, 0xDC4A}, {0x6a, 0xDC4B}, {0x6a, 0xDC4C}, {0x6a, 0xDC4D}, {0x6a, 0xDC4E}, {0x6a, 0xDC4F}, // 10420 - 10427 + {0x15, 0xDC00}, {0x15, 0xDC01}, {0x15, 0xDC02}, {0x15, 0xDC03}, {0x15, 0xDC04}, {0x15, 0xDC05}, {0x15, 0xDC06}, {0x15, 0xDC07}, // 10428 - 1042f + {0x15, 0xDC08}, {0x15, 0xDC09}, {0x15, 0xDC0A}, {0x15, 0xDC0B}, {0x15, 0xDC0C}, {0x15, 0xDC0D}, {0x15, 0xDC0E}, {0x15, 0xDC0F}, // 10430 - 10437 + {0x15, 0xDC10}, {0x15, 0xDC11}, {0x15, 0xDC12}, {0x15, 0xDC13}, {0x15, 0xDC14}, {0x15, 0xDC15}, {0x15, 0xDC16}, {0x15, 0xDC17}, // 10438 - 1043f + {0x15, 0xDC18}, {0x15, 0xDC19}, {0x15, 0xDC1A}, {0x15, 0xDC1B}, {0x15, 0xDC1C}, {0x15, 0xDC1D}, {0x15, 0xDC1E}, {0x15, 0xDC1F}, // 10440 - 10447 + {0x15, 0xDC20}, {0x15, 0xDC21}, {0x15, 0xDC22}, {0x15, 0xDC23}, {0x15, 0xDC24}, {0x15, 0xDC25}, {0x15, 0xDC26}, {0x15, 0xDC27}, // 10448 - 1044f + {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, // 10450 - 10457 + {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, // 10458 - 1045f + {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, // 10460 - 10467 + {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, // 10468 - 1046f + {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, // 10470 - 10477 + {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, // 10478 - 1047f + {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, // 10480 - 10487 + {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, // 10488 - 1048f + {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, // 10490 - 10497 + {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, // 10498 - 1049f + {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, // 104a0 - 104a7 + {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, {0x00, 0x0000}, // 104a8 - 104af + {0x6a, 0xDCD8}, {0x6a, 0xDCD9}, {0x6a, 0xDCDA}, {0x6a, 0xDCDB}, {0x6a, 0xDCDC}, {0x6a, 0xDCDD}, {0x6a, 0xDCDE}, {0x6a, 0xDCDF}, // 104b0 - 104b7 + {0x6a, 0xDCE0}, {0x6a, 0xDCE1}, {0x6a, 0xDCE2}, {0x6a, 0xDCE3}, {0x6a, 0xDCE4}, {0x6a, 0xDCE5}, {0x6a, 0xDCE6}, {0x6a, 0xDCE7}, // 104b8 - 104bf + {0x6a, 0xDCE8}, {0x6a, 0xDCE9}, {0x6a, 0xDCEA}, {0x6a, 0xDCEB}, {0x6a, 0xDCEC}, {0x6a, 0xDCED}, {0x6a, 0xDCEE}, {0x6a, 0xDCEF}, // 104c0 - 104c7 + {0x6a, 0xDCF0}, {0x6a, 0xDCF1}, {0x6a, 0xDCF2}, {0x6a, 0xDCF3}, {0x6a, 0xDCF4}, {0x6a, 0xDCF5}, {0x6a, 0xDCF6}, {0x6a, 0xDCF7}, // 104c8 - 104cf + {0x6a, 0xDCF8}, {0x6a, 0xDCF9}, {0x6a, 0xDCFA}, {0x6a, 0xDCFB}, {0x6a, 0x0000}, {0x6a, 0x0000}, {0x6a, 0x0000}, {0x6a, 0x0000}, // 104d0 - 104d7 + {0x15, 0xDCB0}, {0x15, 0xDCB1}, {0x15, 0xDCB2}, {0x15, 0xDCB3}, {0x15, 0xDCB4}, {0x15, 0xDCB5}, {0x15, 0xDCB6}, {0x15, 0xDCB7}, // 104d8 - 104df + {0x15, 0xDCB8}, {0x15, 0xDCB9}, {0x15, 0xDCBA}, {0x15, 0xDCBB}, {0x15, 0xDCBC}, {0x15, 0xDCBD}, {0x15, 0xDCBE}, {0x15, 0xDCBF}, // 104e0 - 104e7 + {0x15, 0xDCC0}, {0x15, 0xDCC1}, {0x15, 0xDCC2}, {0x15, 0xDCC3}, {0x15, 0xDCC4}, {0x15, 0xDCC5}, {0x15, 0xDCC6}, {0x15, 0xDCC7}, // 104e8 - 104ef + {0x15, 0xDCD0}, {0x15, 0xDCD1}, {0x15, 0xDCD2}, {0x15, 0xDCD3}, {0x15, 0xDCD4}, {0x15, 0xDCD5}, {0x15, 0xDCD6}, {0x15, 0xDCD7}, // 104f0 - 104f7 + {0x15, 0xDCD8}, {0x15, 0xDCD9}, {0x15, 0xDCDA}, {0x15, 0xDCDB}, {0x15, 0x0000}, {0x15, 0x0000}, {0x15, 0x0000}, {0x15, 0x0000}, // 104f8 - 104ff }; -- cgit