summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEike Rathke <erack@redhat.com>2022-05-16 14:58:31 +0200
committerEike Rathke <erack@redhat.com>2022-05-16 18:36:19 +0200
commitf6e7b9a9921cc08ce603bb005deb187a7fdafb55 (patch)
treeb535749958ea1ba8c37a1ccbe34b99791d18e8f9
parentstore: add unit tests (diff)
downloadcore-f6e7b9a9921cc08ce603bb005deb187a7fdafb55.tar.gz
core-f6e7b9a9921cc08ce603bb005deb187a7fdafb55.zip
Update to ICU 71.1
No major changes. See https://icu.unicode.org/download/71 Change-Id: I7929d175962ff13e4369005633a4135f17f97e8c Reviewed-on: https://gerrit.libreoffice.org/c/core/+/134404 Reviewed-by: Eike Rathke <erack@redhat.com> Tested-by: Jenkins
-rw-r--r--configure.ac2
-rw-r--r--download.lst8
-rw-r--r--external/icu/UnpackedTarball_icu.mk4
-rw-r--r--external/icu/icu4c-khmerbreakengine.patch.162
4 files changed, 38 insertions, 38 deletions
diff --git a/configure.ac b/configure.ac
index 2f0f5a574ed6..79c8b20a5043 100644
--- a/configure.ac
+++ b/configure.ac
@@ -10715,7 +10715,7 @@ fi
dnl ===================================================================
dnl Check for system icu
dnl ===================================================================
-ICU_MAJOR=70
+ICU_MAJOR=71
ICU_MINOR=1
ICU_RECLASSIFIED_PREPEND_SET_EMPTY="TRUE"
ICU_RECLASSIFIED_CONDITIONAL_JAPANESE_STARTER="TRUE"
diff --git a/download.lst b/download.lst
index bef96b45a1ce..567c5705d2c6 100644
--- a/download.lst
+++ b/download.lst
@@ -108,10 +108,10 @@ export HUNSPELL_SHA256SUM := 57be4e03ae9dd62c3471f667a0d81a14513e314d4d92081292b
export HUNSPELL_TARBALL := hunspell-1.7.0.tar.gz
export HYPHEN_SHA256SUM := 304636d4eccd81a14b6914d07b84c79ebb815288c76fe027b9ebff6ff24d5705
export HYPHEN_TARBALL := 5ade6ae2a99bc1e9e57031ca88d36dad-hyphen-2.8.8.tar.gz
-export ICU_SHA256SUM := 8d205428c17bf13bb535300669ed28b338a157b1c01ae66d31d0d3e2d47c3fd5
-export ICU_TARBALL := icu4c-70_1-src.tgz
-export ICU_DATA_SHA256SUM := c72723ddba3300ffb231d6b09e2a728ea6e89de10ed5927f74bacbd77042336e
-export ICU_DATA_TARBALL := icu4c-70_1-data.zip
+export ICU_SHA256SUM := 67a7e6e51f61faf1306b6935333e13b2c48abd8da6d2f46ce6adca24b1e21ebf
+export ICU_TARBALL := icu4c-71_1-src.tgz
+export ICU_DATA_SHA256SUM := e3882b4fece6e5e039f22c3189b7ba224180fd26fdbfa9db284617455b93e804
+export ICU_DATA_TARBALL := icu4c-71_1-data.zip
export JFREEREPORT_FLOW_ENGINE_SHA256SUM := 233f66e8d25c5dd971716d4200203a612a407649686ef3b52075d04b4c9df0dd
export JFREEREPORT_FLOW_ENGINE_TARBALL := ba2930200c9f019c2d93a8c88c651a0f-flow-engine-0.9.4.zip
export JFREEREPORT_FLUTE_SHA256SUM := 1b5b24f7bc543c0362b667692f78db8bab4ed6dafc6172f104d0bd3757d8a133
diff --git a/external/icu/UnpackedTarball_icu.mk b/external/icu/UnpackedTarball_icu.mk
index 25b4f1defa26..ccb1a1a8b9b2 100644
--- a/external/icu/UnpackedTarball_icu.mk
+++ b/external/icu/UnpackedTarball_icu.mk
@@ -39,11 +39,11 @@ $(eval $(call gb_UnpackedTarball_add_patches,icu,\
external/icu/strict_ansi.patch \
external/icu/icu4c-windows-cygwin-cross.patch.1 \
external/icu/icu4c-emscripten-cross.patch.1 \
- external/icu/icu4c-khmerbreakengine.patch.1 \
external/icu/icu4c-use-pkgdata-single-ccode-file-mode.patch.1 \
+ external/icu/do-not-reset-useful-cache-to-empty-in-populateNear.patch.2 \
+ external/icu/icu4c-khmerbreakengine.patch.1 \
external/icu/icu4c-$(if $(filter ANDROID,$(OS)),android,rpath).patch.1 \
$(if $(filter-out ANDROID,$(OS)),external/icu/icu4c-icudata-stdlibs.patch.1) \
- external/icu/do-not-reset-useful-cache-to-empty-in-populateNear.patch.2 \
))
$(eval $(call gb_UnpackedTarball_add_file,icu,source/data/brkitr/khmerdict.dict,external/icu/khmerdict.dict))
diff --git a/external/icu/icu4c-khmerbreakengine.patch.1 b/external/icu/icu4c-khmerbreakengine.patch.1
index 78cce146c2bf..ea8f20f443ff 100644
--- a/external/icu/icu4c-khmerbreakengine.patch.1
+++ b/external/icu/icu4c-khmerbreakengine.patch.1
@@ -1,7 +1,7 @@
diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
---- icu.org/source/common/dictbe.cpp 2021-10-28 18:04:57.000000000 +0200
-+++ icu/source/common/dictbe.cpp 2021-11-15 20:39:03.710870385 +0100
-@@ -32,7 +32,19 @@
+--- icu.org/source/common/dictbe.cpp 2022-04-08 00:41:55.000000000 +0200
++++ icu/source/common/dictbe.cpp 2022-05-16 13:56:43.426870900 +0200
+@@ -35,7 +35,19 @@
******************************************************************
*/
@@ -13,16 +13,16 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
+DictionaryBreakEngine::DictionaryBreakEngine(uint32_t breakTypes)
+ : fTypes(breakTypes), clusterLimit(3) {
+ UErrorCode status = U_ZERO_ERROR;
-+ fViramaSet.applyPattern(UNICODE_STRING_SIMPLE("[[:ccc=VR:]]"), status);
++ fViramaSet.applyPattern(UnicodeString(u"[[:ccc=VR:]]"), status);
+
+ // note Skip Sets contain fIgnoreSet characters too.
-+ fSkipStartSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=OP:][:lb=QU:]\\u200C\\u200D\\u2060]"), status);
-+ fSkipEndSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=CP:][:lb=QU:][:lb=EX:][:lb=CL:]\\u200C\\u200D\\u2060]"), status);
-+ fNBeforeSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=CR:][:lb=LF:][:lb=NL:][:lb=SP:][:lb=ZW:][:lb=IS:][:lb=BA:][:lb=NS:]]"), status);
++ fSkipStartSet.applyPattern(UnicodeString(u"[[:lb=OP:][:lb=QU:]\\u200C\\u200D\\u2060]"), status);
++ fSkipEndSet.applyPattern(UnicodeString(u"[[:lb=CP:][:lb=QU:][:lb=EX:][:lb=CL:]\\u200C\\u200D\\u2060]"), status);
++ fNBeforeSet.applyPattern(UnicodeString(u"[[:lb=CR:][:lb=LF:][:lb=NL:][:lb=SP:][:lb=ZW:][:lb=IS:][:lb=BA:][:lb=NS:]]"), status);
}
DictionaryBreakEngine::~DictionaryBreakEngine() {
-@@ -81,6 +93,169 @@
+@@ -85,6 +97,169 @@
fSet.compact();
}
@@ -192,7 +192,7 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
/*
******************************************************************
* PossibleWord
-@@ -110,7 +285,7 @@
+@@ -114,7 +289,7 @@
~PossibleWord() {}
// Fill the list of candidates if needed, select the longest, and return the number found
@@ -201,7 +201,7 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
// Select the currently marked candidate, point after it in the text, and invalidate self
int32_t acceptMarked( UText *text );
-@@ -131,12 +306,12 @@
+@@ -135,12 +310,12 @@
};
@@ -216,7 +216,7 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
// Dictionary leaves text after longest prefix, not longest word. Back up.
if (count <= 0) {
utext_setNativeIndex(text, start);
-@@ -808,53 +983,30 @@
+@@ -814,53 +989,30 @@
* KhmerBreakEngine
*/
@@ -243,17 +243,17 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
{
UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Khmr");
-- fKhmerWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]]"), status);
+- UnicodeSet khmerWordSet(UnicodeString(u"[[:Khmr:]&[:LineBreak=SA:]]"), status);
+
+ clusterLimit = 3;
+
-+ fKhmerWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]\\u2060\\u200C\\u200D]"), status);
++ UnicodeSet khmerWordSet(UnicodeString(u"[[:Khmr:]\\u2060\\u200C\\u200D]"), status);
if (U_SUCCESS(status)) {
- setCharacters(fKhmerWordSet);
+ setCharacters(khmerWordSet);
}
- fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]&[:M:]]"), status);
+ fMarkSet.applyPattern(UnicodeString(u"[[:Khmr:]&[:LineBreak=SA:]&[:M:]]"), status);
- fMarkSet.add(0x0020);
-- fEndWordSet = fKhmerWordSet;
+- fEndWordSet = khmerWordSet;
- fBeginWordSet.add(0x1780, 0x17B3);
- //fBeginWordSet.add(0x17A3, 0x17A4); // deprecated vowels
- //fEndWordSet.remove(0x17A5, 0x17A9); // Khmer independent vowels that can't end a word
@@ -268,8 +268,8 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
-// fSuffixSet.add(THAI_MAIYAMOK);
+ fIgnoreSet.add(0x2060); // WJ
+ fIgnoreSet.add(0x200C, 0x200D); // ZWJ, ZWNJ
-+ fBaseSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:lb=SA:]&[:^M:]]"), status);
-+ fPuncSet.applyPattern(UNICODE_STRING_SIMPLE("[\\u17D4\\u17D5\\u17D6\\u17D7\\u17D9:]"), status);
++ fBaseSet.applyPattern(UnicodeString(u"[[:Khmr:]&[:lb=SA:]&[:^M:]]"), status);
++ fPuncSet.applyPattern(UnicodeString(u"[\\u17D4\\u17D5\\u17D6\\u17D7\\u17D9:]"), status);
// Compact for caching.
fMarkSet.compact();
@@ -282,8 +282,8 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
UTRACE_EXIT_STATUS(status);
}
-@@ -869,175 +1021,204 @@
- UVector32 &foundBreaks,
+@@ -876,175 +1028,205 @@
+ UBool /* isPhraseBreaking */,
UErrorCode& status ) const {
if (U_FAILURE(status)) return 0;
- if ((rangeEnd - rangeStart) < KHMER_MIN_WORD_SPAN) {
@@ -304,7 +304,7 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
+ --scanStart;
+ startZwsp = scanBeforeStart(text, scanStart, breakStart);
}
--
+
- uint32_t wordsFound = 0;
- int32_t cpWordLength = 0;
- int32_t cuWordLength = 0;
@@ -633,9 +633,9 @@ diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
#if !UCONFIG_NO_NORMALIZATION
diff -ur icu.org/source/common/dictbe.h icu/source/common/dictbe.h
---- icu.org/source/common/dictbe.h 2021-10-28 18:04:57.000000000 +0200
-+++ icu/source/common/dictbe.h 2021-11-15 20:41:53.052317579 +0100
-@@ -34,7 +34,8 @@
+--- icu.org/source/common/dictbe.h 2022-04-08 00:41:55.000000000 +0200
++++ icu/source/common/dictbe.h 2022-05-16 13:49:33.820459894 +0200
+@@ -35,7 +35,8 @@
* threads without synchronization.</p>
*/
class DictionaryBreakEngine : public LanguageBreakEngine {
@@ -645,7 +645,7 @@ diff -ur icu.org/source/common/dictbe.h icu/source/common/dictbe.h
/**
* The set of characters handled by this engine
* @internal
-@@ -42,14 +43,84 @@
+@@ -43,14 +44,84 @@
UnicodeSet fSet;
@@ -731,10 +731,10 @@ diff -ur icu.org/source/common/dictbe.h icu/source/common/dictbe.h
* <p>Virtual destructor.</p>
*/
virtual ~DictionaryBreakEngine();
-@@ -303,10 +374,12 @@
+@@ -305,10 +376,12 @@
+ * @internal
*/
- UnicodeSet fKhmerWordSet;
- UnicodeSet fEndWordSet;
UnicodeSet fBeginWordSet;
- UnicodeSet fMarkSet;
@@ -748,8 +748,8 @@ diff -ur icu.org/source/common/dictbe.h icu/source/common/dictbe.h
public:
diff -ur icu.org/source/common/dictionarydata.cpp icu/source/common/dictionarydata.cpp
---- icu.org/source/common/dictionarydata.cpp 2021-10-28 18:04:57.000000000 +0200
-+++ icu/source/common/dictionarydata.cpp 2021-11-15 19:25:00.583694898 +0100
+--- icu.org/source/common/dictionarydata.cpp 2022-04-08 00:41:55.000000000 +0200
++++ icu/source/common/dictionarydata.cpp 2022-05-16 13:49:33.821459892 +0200
@@ -44,7 +44,7 @@
int32_t UCharsDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t limit,
@@ -797,8 +797,8 @@ diff -ur icu.org/source/common/dictionarydata.cpp icu/source/common/dictionaryda
if (values != NULL) {
values[wordCount] = bt.getValue();
diff -ur icu.org/source/common/dictionarydata.h icu/source/common/dictionarydata.h
---- icu.org/source/common/dictionarydata.h 2021-10-28 18:04:57.000000000 +0200
-+++ icu/source/common/dictionarydata.h 2021-11-15 20:44:34.484790590 +0100
+--- icu.org/source/common/dictionarydata.h 2022-04-08 00:41:55.000000000 +0200
++++ icu/source/common/dictionarydata.h 2022-05-16 13:49:33.822459891 +0200
@@ -21,6 +21,7 @@
#include "unicode/utext.h"
#include "unicode/udata.h"