From dcb28419b09940c55905b152fb1d9631607c7c05 Mon Sep 17 00:00:00 2001 From: Caolán McNamara Date: Tue, 24 Jul 2012 11:54:28 +0100 Subject: Related: #i29548# Thai word breakiterator regression test Change-Id: Ie47dfe6ab5e308c0353d557fe7530a983622db96 --- i18npool/qa/cppunit/test_breakiterator.cxx | 53 ++++++++++++++++++++++++++---- i18npool/source/breakiterator/data/README | 17 +--------- 2 files changed, 47 insertions(+), 23 deletions(-) (limited to 'i18npool') diff --git a/i18npool/qa/cppunit/test_breakiterator.cxx b/i18npool/qa/cppunit/test_breakiterator.cxx index b944540be83d..0c913bcf0599 100644 --- a/i18npool/qa/cppunit/test_breakiterator.cxx +++ b/i18npool/qa/cppunit/test_breakiterator.cxx @@ -45,6 +45,8 @@ #include +#include + using namespace ::com::sun::star; class TestBreakIterator : public test::BootstrapFixtureBase @@ -558,19 +560,56 @@ void TestBreakIterator::testAsian() } //A test to ensure that our thai word boundary detection is useful -//http://lists.freedesktop.org/archives/libreoffice/2012-February/025959.html void TestBreakIterator::testThai() { lang::Locale aLocale; aLocale.Language = ::rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("th")); aLocale.Country = ::rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("TH")); - const sal_Unicode THAI1[] = { 0x0E01, 0x0E38, 0x0E2B, 0x0E25, 0x0E32, 0x0E1A }; - ::rtl::OUString aTest(THAI1, SAL_N_ELEMENTS(THAI1)); - i18n::Boundary aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale, - i18n::WordType::DICTIONARY_WORD, true); - CPPUNIT_ASSERT_MESSAGE("Should skip full word", - aBounds.startPos == 0 && aBounds.endPos == aTest.getLength()); + //See http://lists.freedesktop.org/archives/libreoffice/2012-February/025959.html + { + const sal_Unicode THAI[] = { 0x0E01, 0x0E38, 0x0E2B, 0x0E25, 0x0E32, 0x0E1A }; + ::rtl::OUString aTest(THAI, SAL_N_ELEMENTS(THAI)); + i18n::Boundary aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale, + i18n::WordType::DICTIONARY_WORD, true); + CPPUNIT_ASSERT_MESSAGE("Should skip full word", + aBounds.startPos == 0 && aBounds.endPos == aTest.getLength()); + } + + //See https://issues.apache.org/ooo/show_bug.cgi?id=29548 + //make sure forwards and back are consistent + { + const sal_Unicode THAI[] = + { + 0x0E2D, 0x0E38, 0x0E17, 0x0E22, 0x0E32, 0x0E19, 0x0E41, + 0x0E2B, 0x0E48, 0x0E07, 0x0E0A, 0x0E32, 0x0E15, 0x0E34, + 0x0E19, 0x0E49, 0x0E33, 0x0E2B, 0x0E19, 0x0E32, 0x0E27, + 0x0E2D, 0x0E38, 0x0E17, 0x0E22, 0x0E32, 0x0E19, 0x0E41, + 0x0E2B, 0x0E48, 0x0E07, 0x0E0A, 0x0E32, 0x0E15, 0x0E34, + 0x0E19, 0x0E49, 0x0E33, 0x0E2B, 0x0E19, 0x0E32, 0x0E27 + }; + ::rtl::OUString aTest(THAI, SAL_N_ELEMENTS(THAI)); + + std::stack aPositions; + sal_Int32 nPos = -1; + do + { + nPos = m_xBreak->nextWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos; + aPositions.push(nPos); + } + while (nPos < aTest.getLength()); + nPos = aTest.getLength(); + CPPUNIT_ASSERT(!aPositions.empty()); + aPositions.pop(); + do + { + CPPUNIT_ASSERT(!aPositions.empty()); + nPos = m_xBreak->previousWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos; + CPPUNIT_ASSERT(nPos == aPositions.top()); + aPositions.pop(); + } + while (nPos > 0); + } } #if TODO diff --git a/i18npool/source/breakiterator/data/README b/i18npool/source/breakiterator/data/README index 2005560479a2..cd6ba4ae43b1 100644 --- a/i18npool/source/breakiterator/data/README +++ b/i18npool/source/breakiterator/data/README @@ -590,22 +590,6 @@ Date: Fri Jul 30 13:38:57 2004 +0000 2004/06/14 23:24:16 khong 1.8.92.2: #112772# Japanese word breakiterator is not correct 2004/06/11 19:23:04 khong 1.8.92.1: #112772# Japanese word breakiterator is not correct -commit 6138176b324d55efb1970ad4506c3aee41061d6f -Author: Kurt Zenker -Date: Fri Jul 30 13:38:43 2004 +0000 - - INTEGRATION: CWS i18n13 (1.15.2); FILE MERGED - 2004/06/17 20:29:38 khong 1.15.2.3: # - 2004/06/11 18:59:43 khong 1.15.2.2: #i29548# Fix Thai word breakiterator problem - 2004/06/08 21:11:34 khong 1.15.2.1: #i29548# Fix Thai word breakiterator problem - -commit 25e78ca2315f780c2708dd51824aba36c29bc7b7 -Author: Kurt Zenker -Date: Fri Jul 30 13:38:30 2004 +0000 - - INTEGRATION: CWS i18n13 (1.4.162); FILE MERGED - 2004/06/08 21:11:33 khong 1.4.162.1: #i29548# Fix Thai word breakiterator problem - commit d6b8dabc3dc4811e1152d411a8428ccb334d16ab Author: Kurt Zenker Date: Fri Jul 30 13:38:17 2004 +0000 @@ -731,4 +715,5 @@ Date: Fri Nov 7 14:14:35 2003 +0000 done, regression tests added: #i13494# fix word breakiterator rule to handle punctuations and signs correctly +#i29548# Fix Thai word breakiterator problem #i11993# #i14904# fix word breakiterator issues -- cgit